In [117]:
from owlready2 import *
import pandas as pd

onto = get_ontology("http://test.org/toxic-onto-bg.owl")

In [119]:
with onto:

    # Main classes 
    class Context(Thing):
        pass
    
    class Content(Thing):
        pass 
    
    # Properties 
    class definition(Content >> str): 
        """Official definitions for the main language categories"""
        pass 
        
    class blocks(Context >> Content):
        """Describes the relationship between the Content and Context subclasses."""
        pass
  
    # Types of Content 
    class ToxicLanguage(onto.Content):
        definition = """Toxic language is defined as rude, disrespectful, or unreasonable language that is likely to make someone leave a discussion."""
    
    class MedicalTerminology(onto.Content): 
        definition = """Medical terminology is language that is used to describe anatomical structures, processes, conditions, medical procedures, and treatments."""
    
    class MinorityGroup(onto.Content): 
        definition = """Minority group language refers to subgroups of the population with unique social, religious, ethnic, racial, and/or other characteristics that differ from those of a majority group."""
    
    class NonToxicLanguage(onto.Content):
        definition = """Non-toxic language refers to communication that is free from harmful, abusive or offensive content."""
            
    class Ambiguous(onto.Content):
        equivalent_to = [onto.ToxicLanguage & onto.NonToxicLanguage]
        comment = "Ambiguous words are the ones that have at least one toxic and at least one non-toxic meaning."
        
    class FamilyFriendlyContentBlocked(onto.Content):
        equivalent_to = [(onto.ToxicLanguage| onto.MedicalTerminology | onto.MinorityGroup) & Not(onto.NonToxicLanguage)]
        comment = "Words that would be blocked in family-friendly context."      

    class ForumContentBlocked(onto.Content): 
        equivalent_to = [onto.ToxicLanguage & Not(onto.NonToxicLanguage) & Not(onto.MedicalTerminology) & Not(onto.MinorityGroup)]
        comment = "Words that would be blocked in forums."        
        
    #Types of contexts 
    class FamilyFriendly(onto.Context):
        equivalent_to = [onto.Context & blocks.only(FamilyFriendlyContentBlocked)]
        comment = """Content suitable for children. Excludes all words except those with multiple meanings, at least one of witch is strictly non-toxic (individuals from the  "NonToxicLanguage" classes)."""
        
    class Forum(onto.Context): 
        equivalent_to = [onto.Context & blocks.only(ForumContentBlocked)]
        comment = """Internet content, typical for discussion forums. Excludes strictly toxic words and phrases (representatives of the "ToxicLanguage" class only), but allows those with other meaninig."""
        

In [120]:
for cls in onto.classes():
    print(cls)

toxic-onto-bg.Context
toxic-onto-bg.Content
toxic-onto-bg.ToxicLanguage
toxic-onto-bg.MedicalTerminology
toxic-onto-bg.MinorityGroup
toxic-onto-bg.NonToxicLanguage
toxic-onto-bg.Ambiguous
toxic-onto-bg.FamilyFriendlyContentBlocked
toxic-onto-bg.ForumContentBlocked
toxic-onto-bg.FamilyFriendly
toxic-onto-bg.Forum


In [123]:
file_path = "../data/toxic-words-new.csv"
toxic_words_df = pd.read_csv(file_path)

toxic_words_df.head()

Unnamed: 0.1,Unnamed: 0,Word,Toxic,MedicalTerminology,NonToxic,MinorityGroup
0,1,акам,1,0,0,0
1,2,акаме,1,0,0,0
2,3,акат,1,0,0,0
3,4,акате,1,0,0,0
4,5,акото,1,0,0,0


In [124]:
for index, row in toxic_words_df.iterrows():
    word = row['Word']
    
    word_individual = onto.Content(word)
    word_individual.text = word
    
    if row['Toxic'] == 1:
        word_individual.is_a.append(onto.ToxicLanguage)
        
    if row['MedicalTerminology'] == 1:
        word_individual.is_a.append(onto.MedicalTerminology)
        
    if row['MinorityGroup'] == 1:
        word_individual.is_a.append(onto.MinorityGroup)
        
    if row['NonToxic'] == 1:
        word_individual.is_a.append(onto.NonToxicLanguage)

    if row['Toxic'] == 1 and row['NonToxic'] == 1:
        word_individual.is_a.append(onto.Ambiguous)
        
        
    #NB: the following can also be done with reasoning 
    if (row['Toxic'] == 1 or row['MedicalTerminology'] == 1 or row['MinorityGroup'] == 1) & row['NonToxic'] == 0:
        word_individual.is_a.append(onto.FamilyFriendlyContentBlocked)
        
    if (row['Toxic'] == 1) & (row['MedicalTerminology'] == 0) & (row['MinorityGroup'] == 0) & (row['NonToxic'] == 0):
        word_individual.is_a.append(onto.ForumContentBlocked)

In [125]:
onto.MedicalTerminology.instances()

[toxic-onto-bg.вагина,
 toxic-onto-bg.вагината,
 toxic-onto-bg.вагини,
 toxic-onto-bg.вагините,
 toxic-onto-bg.гърди,
 toxic-onto-bg.гърдите,
 toxic-onto-bg.еякулации,
 toxic-onto-bg.еякулация,
 toxic-onto-bg.еякулирам,
 toxic-onto-bg.еякулират,
 toxic-onto-bg.еякулирате,
 toxic-onto-bg.изпразване,
 toxic-onto-bg.изпразването,
 toxic-onto-bg.мастурбатор,
 toxic-onto-bg.мастурбация,
 toxic-onto-bg.мастурбацията,
 toxic-onto-bg.мастурбирам,
 toxic-onto-bg.мастурбираме,
 toxic-onto-bg.мастурбират,
 toxic-onto-bg.пенис,
 toxic-onto-bg.пениса,
 toxic-onto-bg.пениси,
 toxic-onto-bg.пенисите,
 toxic-onto-bg.седалище,
 toxic-onto-bg.седалището,
 toxic-onto-bg.секс,
 toxic-onto-bg.содомия,
 toxic-onto-bg.сперма,
 toxic-onto-bg.спермата,
 toxic-onto-bg.сперми,
 toxic-onto-bg.спермите,
 toxic-onto-bg.хомо]

In [126]:
onto.MinorityGroup.instances()

[toxic-onto-bg.наци,
 toxic-onto-bg.нацита,
 toxic-onto-bg.нацитата,
 toxic-onto-bg.нацито,
 toxic-onto-bg.негри,
 toxic-onto-bg.негро,
 toxic-onto-bg.негър,
 toxic-onto-bg.педераст,
 toxic-onto-bg.педерасти,
 toxic-onto-bg.хомо,
 toxic-onto-bg.чалми,
 toxic-onto-bg.чалмите]

In [127]:
onto.NonToxicLanguage.instances()

[toxic-onto-bg.гърди,
 toxic-onto-bg.гърдите,
 toxic-onto-bg.изпразване,
 toxic-onto-bg.изпразването,
 toxic-onto-bg.кучка,
 toxic-onto-bg.кучката,
 toxic-onto-bg.кучки,
 toxic-onto-bg.кучките,
 toxic-onto-bg.кюнец,
 toxic-onto-bg.кюнци,
 toxic-onto-bg.лесбос,
 toxic-onto-bg.мангал,
 toxic-onto-bg.педал,
 toxic-onto-bg.педали,
 toxic-onto-bg.печка,
 toxic-onto-bg.свирка,
 toxic-onto-bg.свирки,
 toxic-onto-bg.свирките,
 toxic-onto-bg.седалище,
 toxic-onto-bg.седалището,
 toxic-onto-bg.труженичка,
 toxic-onto-bg.труженичката,
 toxic-onto-bg.труженички,
 toxic-onto-bg.труженичките,
 toxic-onto-bg.чалми,
 toxic-onto-bg.чалмите,
 toxic-onto-bg.чернилка,
 toxic-onto-bg.чернилката,
 toxic-onto-bg.чернилки,
 toxic-onto-bg.чернилките,
 toxic-onto-bg.чукам,
 toxic-onto-bg.чукат]

In [128]:
onto.Ambiguous.instances()

[toxic-onto-bg.гърди,
 toxic-onto-bg.гърдите,
 toxic-onto-bg.изпразване,
 toxic-onto-bg.изпразването,
 toxic-onto-bg.кучка,
 toxic-onto-bg.кучката,
 toxic-onto-bg.кучки,
 toxic-onto-bg.кучките,
 toxic-onto-bg.кюнец,
 toxic-onto-bg.кюнци,
 toxic-onto-bg.лесбос,
 toxic-onto-bg.мангал,
 toxic-onto-bg.педал,
 toxic-onto-bg.педали,
 toxic-onto-bg.печка,
 toxic-onto-bg.свирка,
 toxic-onto-bg.свирки,
 toxic-onto-bg.свирките,
 toxic-onto-bg.седалище,
 toxic-onto-bg.седалището,
 toxic-onto-bg.труженичка,
 toxic-onto-bg.труженичката,
 toxic-onto-bg.труженички,
 toxic-onto-bg.труженичките,
 toxic-onto-bg.чалми,
 toxic-onto-bg.чалмите,
 toxic-onto-bg.чернилка,
 toxic-onto-bg.чернилката,
 toxic-onto-bg.чернилки,
 toxic-onto-bg.чернилките,
 toxic-onto-bg.чукам,
 toxic-onto-bg.чукат]

In [129]:
onto.FamilyFriendlyContentBlocked.instances()

[toxic-onto-bg.акам,
 toxic-onto-bg.акаме,
 toxic-onto-bg.акат,
 toxic-onto-bg.акате,
 toxic-onto-bg.акото,
 toxic-onto-bg.анилингус,
 toxic-onto-bg.анилингуси,
 toxic-onto-bg.бозки,
 toxic-onto-bg.бозките,
 toxic-onto-bg.букаке,
 toxic-onto-bg.букакета,
 toxic-onto-bg.букакетата,
 toxic-onto-bg.вагина,
 toxic-onto-bg.вагината,
 toxic-onto-bg.вагини,
 toxic-onto-bg.вагините,
 toxic-onto-bg.втф,
 toxic-onto-bg.глупак,
 toxic-onto-bg.глупаци,
 toxic-onto-bg.глупости,
 toxic-onto-bg.гноемъд,
 toxic-onto-bg.говна,
 toxic-onto-bg.гъз,
 toxic-onto-bg.гъза,
 toxic-onto-bg.гъзобар,
 toxic-onto-bg.гъзоблиз,
 toxic-onto-bg.гъзобрък,
 toxic-onto-bg.гъзове,
 toxic-onto-bg.гъзовете,
 toxic-onto-bg.деба,
 toxic-onto-bg.дееба,
 toxic-onto-bg.дупара,
 toxic-onto-bg.дупарата,
 toxic-onto-bg.дупари,
 toxic-onto-bg.дупарите,
 toxic-onto-bg.дупе,
 toxic-onto-bg.дупета,
 toxic-onto-bg.дупетата,
 toxic-onto-bg.еба,
 toxic-onto-bg.ебан,
 toxic-onto-bg.ебана,
 toxic-onto-bg.ебане,
 toxic-onto-bg.ебано,
 toxic

In [130]:
onto.ForumContentBlocked.instances()

[toxic-onto-bg.акам,
 toxic-onto-bg.акаме,
 toxic-onto-bg.акат,
 toxic-onto-bg.акате,
 toxic-onto-bg.акото,
 toxic-onto-bg.анилингус,
 toxic-onto-bg.анилингуси,
 toxic-onto-bg.бозки,
 toxic-onto-bg.бозките,
 toxic-onto-bg.букаке,
 toxic-onto-bg.букакета,
 toxic-onto-bg.букакетата,
 toxic-onto-bg.втф,
 toxic-onto-bg.глупак,
 toxic-onto-bg.глупаци,
 toxic-onto-bg.глупости,
 toxic-onto-bg.гноемъд,
 toxic-onto-bg.говна,
 toxic-onto-bg.гъз,
 toxic-onto-bg.гъза,
 toxic-onto-bg.гъзобар,
 toxic-onto-bg.гъзоблиз,
 toxic-onto-bg.гъзобрък,
 toxic-onto-bg.гъзове,
 toxic-onto-bg.гъзовете,
 toxic-onto-bg.деба,
 toxic-onto-bg.дееба,
 toxic-onto-bg.дупара,
 toxic-onto-bg.дупарата,
 toxic-onto-bg.дупари,
 toxic-onto-bg.дупарите,
 toxic-onto-bg.дупе,
 toxic-onto-bg.дупета,
 toxic-onto-bg.дупетата,
 toxic-onto-bg.еба,
 toxic-onto-bg.ебан,
 toxic-onto-bg.ебана,
 toxic-onto-bg.ебане,
 toxic-onto-bg.ебано,
 toxic-onto-bg.ебаняк,
 toxic-onto-bg.ебаси,
 toxic-onto-bg.ебат,
 toxic-onto-bg.ебач,
 toxic-onto-bg.

In [131]:
toxic_words_df.describe().loc['mean']

Unnamed: 0            150.000000
Toxic                   1.000000
MedicalTerminology      0.107023
NonToxic                0.107023
MinorityGroup           0.040134
Name: mean, dtype: float64

In [132]:
for subclass in onto.Content.subclasses():
    print(subclass, len(subclass.instances()))

toxic-onto-bg.ToxicLanguage 299
toxic-onto-bg.MedicalTerminology 32
toxic-onto-bg.MinorityGroup 12
toxic-onto-bg.NonToxicLanguage 32
toxic-onto-bg.Ambiguous 32
toxic-onto-bg.FamilyFriendlyContentBlocked 267
toxic-onto-bg.ForumContentBlocked 232


In [134]:
list(onto.Context.subclasses())

[toxic-onto-bg.FamilyFriendly, toxic-onto-bg.Forum]

In [135]:
for p in onto.properties():
    print(p)

toxic-onto-bg.definition
toxic-onto-bg.blocks


In [136]:
updated_ontology_path = "../ontology/toxic-language-ontology-with-individuals-new.owl"
onto.save(file=updated_ontology_path)
print("Ontology updated");

Ontology updated
