In [1]:
from owlready2 import get_ontology, Thing, Property
import pandas as pd

ontology_path = "../ontology/toxic-ontology.owl"
onto = get_ontology(ontology_path).load()

for cls in onto.classes():
    print(cls)


toxic-ontology.Context
toxic-ontology.Content
toxic-ontology.Definition
toxic-ontology.MedicalTerminology
toxic-ontology.MedicalTerminologyDefinition
toxic-ontology.MinorityGroup
toxic-ontology.MinorityGroupDefinition
toxic-ontology.NonToxicLanguage
toxic-ontology.NonToxicLanguageDefinition
toxic-ontology.ToxicLanguage
toxic-ontology.ToxicLanguageDefinition








In [2]:
for cls in onto.classes():
    print(f"Class: {cls}")
    print("Subclasses:", list(cls.subclasses()))
    print("Superclasses:", list(cls.is_a))
    print()


for cls in onto.classes():
    print(f"Class: {cls}")
    for individual in cls.instances():
        print(f" - Individual: {individual}")

Class: toxic-ontology.Context
Subclasses: []
Superclasses: [owl.Thing]

Class: toxic-ontology.Content
Subclasses: [toxic-ontology.MedicalTerminology, toxic-ontology.MinorityGroup, toxic-ontology.NonToxicLanguage, toxic-ontology.ToxicLanguage]
Superclasses: [owl.Thing]

Class: toxic-ontology.Definition
Subclasses: [toxic-ontology.MedicalTerminologyDefinition, toxic-ontology.MinorityGroupDefinition, toxic-ontology.NonToxicLanguageDefinition, toxic-ontology.ToxicLanguageDefinition]
Superclasses: [owl.Thing]

Class: toxic-ontology.MedicalTerminology
Subclasses: []
Superclasses: [toxic-ontology.Content]

Class: toxic-ontology.MedicalTerminologyDefinition
Subclasses: []
Superclasses: [toxic-ontology.Definition]

Class: toxic-ontology.MinorityGroup
Subclasses: []
Superclasses: [toxic-ontology.Content]

Class: toxic-ontology.MinorityGroupDefinition
Subclasses: []
Superclasses: [toxic-ontology.Definition]

Class: toxic-ontology.NonToxicLanguage
Subclasses: []
Superclasses: [toxic-ontology.Conte

In [3]:
file_path = "../data/toxic-words.csv"
toxic_words_df = pd.read_csv(file_path)

toxic_words_df.head()

Unnamed: 0.1,Unnamed: 0,Word,Toxic,MedicalTerminology,NonToxic,MinorityGroup
0,1,акам,1,0,0,0
1,2,акаме,1,0,0,0
2,3,акат,1,0,0,0
3,4,акате,1,0,0,0
4,5,акото,1,0,0,0


In [4]:
ToxicLanguage = onto.ToxicLanguage
MedicalTerminology = onto.MedicalTerminology
MinorityGroup = onto.MinorityGroup
NonToxicLanguage = onto.NonToxicLanguage
Definition = onto.Definition
Context = onto.Context

hasDefinition = onto.hasDefinition
contains = onto.contains

In [5]:
toxic_language_def = onto.ToxicLanguageDefinition
medical_term_def = onto.MedicalTerminologyDefinition
minority_group_def = onto.MinorityGroupDefinition
non_toxic_word_def = onto.NonToxicLanguageDefinition

ambiguous_context = onto.Ambiguous
official_context = onto.Official
family_friendly_context = onto.FamilyFriendly
forum_context = onto.Forum
all_language_context = onto.AllLanguage

In [6]:

for index, row in toxic_words_df.iterrows():
    word = row['Word']
    
    word_individual = onto.Content(word)
    word_individual.text = word
    
    if row['Toxic'] == 1:
        word_individual.is_a.append(onto.ToxicLanguage)
        word_individual.hasDefinition.append(toxic_language_def)
        
    if row['MedicalTerminology'] == 1:
        word_individual.is_a.append(onto.MedicalTerminology)
        word_individual.hasDefinition.append(medical_term_def)
        
    if row['MinorityGroup'] == 1:
        word_individual.is_a.append(onto.MinorityGroup)
        word_individual.hasDefinition.append(minority_group_def)
        
    if row['NonToxic'] == 1:
        word_individual.is_a.append(onto.NonToxicLanguage)
        word_individual.hasDefinition.append(non_toxic_word_def)

    all_language_context.contains.append(word_individual)

    if row['Toxic'] == 1 and row['NonToxic'] == 1:
        ambiguous_context.contains.append(word_individual)
    
    # blocks all toxic language, including ambiguous meaning - ex. family-friendly content
    if row['NonToxic'] == 1 and row['Toxic'] == 0:
        family_friendly_context.contains.append(word_individual)
    
    # block toxic language AND medical terms - ex. official content
    if (row['Toxic'] == 0 and row['MedicalTerminology'] == 0) or (row['Toxic'] == 1 and (row['NonToxic'] == 1 or row['MinorityGroup'] == 1)) or (row['Toxic'] == 0 and (row['NonToxic'] == 1 or row['MinorityGroup'] == 1)):
        official_context.contains.append(word_individual)
    
    # block toxic language EXCEPT (medical terms AND minority groups and ambiguous) - forums
    if row['Toxic'] == 1 and (row['MedicalTerminology'] == 1 or row['MinorityGroup'] == 1 or row['NonToxic'] == 1):
        forum_context.contains.append(word_individual)
    elif row['Toxic'] == 0:
        forum_context.contains.append(word_individual)

# Save the updated ontology
# Moves the hasDefinition up to the class level(defines that it need to have it)
# ToxicLanguage.hasDefinition.append(toxic_language_def)
# MedicalTerminology.hasDefinition.append(medical_term_def)
# MinorityGroup.hasDefinition.append(minority_group_def)
# NonToxicLanguage.hasDefinition.append(non_toxic_word_def)

updated_ontology_path = "./toxic-language-ontology-with-individuals.owl"
onto.save(file=updated_ontology_path)
print("Ontology updated");

Ontology updated and saved successful
