# Inference Pipeline

In [1]:
import pandas as pd
import torch
from modules.ontology_classes import CogafInstance, set_ontology
from modules.emotion import get_senticnet_response
from modules.engine import COGN_FUNC
from modules.model_builder import BaseModel
from modules.text_processing import load_text_files, get_most_important_words, get_event, get_capabilities





## Load and process texts

In [2]:
text_list = load_text_files('./../data/text/raw/')

df = pd.DataFrame({"file": text_list.keys(), "text": text_list.values()})
df.head()

Unnamed: 0,file,text
0,00_Caida piso mojado.txt,Lecciones Aprendidas\n\nAccidente de trabajo\n...
1,01_Caída de altura.txt,LECCIONES APRENDIDAS\n\nTipo de Accidente: Caí...
2,02_ auxiliar_trafico_aprisionado_vehiculo.txt,Auxiliar de tráfico (Paletero - Señalelo) apri...
3,03_Caida de alturas_Lesiones Múltiples.txt,Lecciones aprendidas\n\nCaida de alturas \nLes...
4,04_Caida_alturas_montaje_estructura.txt,LECCIONES APRENDIDAS\n\nCaída de alturas en mo...


In [3]:
# Load preprocess model
import joblib
MODELS_PATH = "./models/"
processor = joblib.load(MODELS_PATH + "preprocessor.pk")

In [4]:
# First step of processor pipeline is text cleaning. Clean text is needed for emotion recognition
df["clean_text"] = processor[0].transform(df.text)

# Get TF-IDF matrix
tfidf_matrix = processor[1].transform(df.clean_text)

# Perform PCA for dimensionality reduction
# X_pca = pd.DataFrame(processor[2:].transform(tfidf_matrix))

In [5]:
X = pd.DataFrame(tfidf_matrix.toarray())
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2583,2584,2585,2586,2587,2588,2589,2590,2591,2592
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.069118,0.0,0.0,0.0


## Cognitive Function Inference

In [6]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [7]:
X = torch.from_numpy(X.to_numpy()).type(torch.float)

in_features, out_features = X.shape[1], 5

In [8]:
# Load model and predict cognitive function
clf = BaseModel(in_features, 50, out_features)

# Load model state dict 
clf.load_state_dict(torch.load("./models/mlp.pth"))

# Put model to target device (if your data is on GPU, model will have to be on GPU to make predictions)
clf.to(device)

BaseModel(
  (linear_stack): Sequential(
    (0): Linear(in_features=2593, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=5, bias=True)
  )
)

In [9]:
clf.eval()
with torch.inference_mode():
    logits = clf(X.to(device))
    y_pred = logits.argmax(dim=1).to("cpu")

In [10]:
y_pred

tensor([0, 2, 1, 2, 3, 1, 3, 2, 1, 2, 3, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2,
        2, 1, 1, 3, 3, 1, 2, 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 2, 0, 2,
        2, 1, 0, 2, 0, 0, 0, 1, 1, 1, 1, 1, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 2, 2])

In [11]:
pred = [COGN_FUNC[i] for i in y_pred]
pred[:5]

['Attention',
 'Perception',
 'WorkingMemory',
 'Perception',
 'CognitiveFlexibility']

## Emotion Recognition

### With 150 most important words as input

In [12]:
feature_names = processor[1].get_feature_names_out()
tfidf_scores = tfidf_matrix.toarray()

important_words_per_doc = get_most_important_words(feature_names, tfidf_scores, df.file, 150)
important_words_per_doc[0]


{'doc': '00_Caida piso mojado.txt',
 'words': 'piso companero mojado trabajo limpieza caida senalizacion comunicacion evitar alcance antebrazo antideslizante asertivo camin codo contribuir escoba evidenciado fluido funcionario generara limpiar liquido meta nuevamente regresara removedor suela trapero ugc universidad vea verter utilizar espalda mirar accidente elemento bajar causo guarda objetivo advertencia comunicar continuar organizacional cabeza limpio prestar encontrar desplazamiento direccion ocurrir reportar peligro ausencia calzado ocurrencia realizar deber madera paso realizacion actividad golpe mantener ocasionar cumplimiento ubicado labor verificar proteccion condicion momento instalacion adecuado desarrollo atencion inseguro falta pasar consecuencia leccion personal aprendido 00 000 009 0099 00pm 03 030 0312 04 05 055 07 08 09 10 100 1000 103 106 1072 108 11 1100 118 12 120 123 13 13200 14 1401 1409 141 142 146 15 16 164 17 170 171 175 18 180 182 187 1886 19 1950 20 200 2001

In [13]:
results_with_top_words = []
for i in range(len(important_words_per_doc)):
    result = {"file": important_words_per_doc[i]["doc"],
              "cognitive_function": COGN_FUNC[y_pred[i]]}
    
    result.update({"emotion": get_senticnet_response(
        important_words_per_doc[i]["words"])})

    results_with_top_words.append(result)

results_with_top_words[5]

{'file': '05_Caida_trabajador_foso_ascensor.txt',
 'cognitive_function': 'WorkingMemory',
 'emotion': {'status_code': 200,
  'content': 'ecstasy (75.29%) & bliss (4.81%) [INTROSPECTION=90.84%,TEMPER=67.59%,ATTITUDE=-1.27%,SENSITIVITY=-11.21%]\n',
  'introspection': {'value': 0.9084, 'emotion': 'Ecstasy'},
  'temper': {'value': 0.6759000000000001, 'emotion': 'Bliss'},
  'attitude': {'value': -0.0127, 'emotion': 'Dislike'},
  'sensitivity': {'value': -0.1121, 'emotion': 'Anxiety'}}}

### With entire clean text as input

In [14]:
# results = []
# for i in range(len(df.clean_text)):
#     result = {"cognitive_function": COGN_FUNC[y_pred[i]]}
#     result.update(get_senticnet_response(df.clean_text[i]))
#     results.append(result)
# results[5]


## Event and Capabilites extraction

In [15]:
from modules.text_processing import normalize

df['norm_text'] = [normalize(text, False, False) for text in df.text]
df.head()

Unnamed: 0,file,text,clean_text,norm_text
0,00_Caida piso mojado.txt,Lecciones Aprendidas\n\nAccidente de trabajo\n...,leccion aprendido accidente trabajo caida piso...,Lecciones Aprendidas Accidente de trabajo Caíd...
1,01_Caída de altura.txt,LECCIONES APRENDIDAS\n\nTipo de Accidente: Caí...,leccion aprendido tipo accidente caida altura ...,LECCIONES APRENDIDAS Tipo de Accidente: Caída ...
2,02_ auxiliar_trafico_aprisionado_vehiculo.txt,Auxiliar de tráfico (Paletero - Señalelo) apri...,auxiliar trafico paletero senalelo aprisionado...,Auxiliar de tráfico (Paletero - Señalelo) apri...
3,03_Caida de alturas_Lesiones Múltiples.txt,Lecciones aprendidas\n\nCaida de alturas \nLes...,leccion aprendido caida altura lesion multiple...,Lecciones aprendidas Caida de alturas Lesiones...
4,04_Caida_alturas_montaje_estructura.txt,LECCIONES APRENDIDAS\n\nCaída de alturas en mo...,leccion aprendido caida altura montaje estruct...,LECCIONES APRENDIDAS Caída de alturas en monta...


In [16]:
for i in range(len(df.norm_text)):
    results_with_top_words[i].update({"event": get_event(df.norm_text[i])})
    results_with_top_words[i].update({"capabilities": get_capabilities(df.norm_text[i])})


In [17]:
results_with_top_words[0]

{'file': '00_Caida piso mojado.txt',
 'cognitive_function': 'Attention',
 'emotion': {'status_code': 200,
  'content': 'delight (24.92%) & serenity (-61.21%) [INTROSPECTION=-61.3%,TEMPER=-20.2%,ATTITUDE=74.22%,SENSITIVITY=-48.0%]\n',
  'introspection': {'value': -0.613, 'emotion': 'Sadness'},
  'temper': {'value': -0.20199999999999999, 'emotion': 'Annoyance'},
  'attitude': {'value': 0.7422, 'emotion': 'Delight'},
  'sensitivity': {'value': -0.48, 'emotion': 'Fear'}},
 'event': '¿Qué pasó? Un funcionario de la UGC y su compañero de trabajo se encontraban realizando la limpieza de un piso de madera en las instalaciones de la Universidad. Mientras uno de ellos bajaba al primer piso por un trapero limpio, su compañero de trabajo vertió removedor en los guarda- escobas, ocasionando que el desplazamiento del líquido generara un peligro de caída al momento en que este regresara para continuar la actividad de limpieza.',
 'capabilities': ['comunicación']}

In [18]:
import owlready2 as owl

onto = owl.get_ontology("./../ontology/Hourglass_COGAF_Ontology.rdf").load()

In [19]:
instance = results_with_top_words[0]

In [20]:
set_ontology(onto)

for input_dict in results_with_top_words:
    cogaf_object = CogafInstance(input_dict)
    cogaf_object.populate_ontology()

In [21]:
onto.search(is_a=onto.Capability)

[Hourglass_COGAF_Ontology.Capability, Hourglass_COGAF_Ontology.Comunicate, Hourglass_COGAF_Ontology.Focusing, Hourglass_COGAF_Ontology.Remember, Hourglass_COGAF_Ontology.ComprehendEmotions, Hourglass_COGAF_Ontology.ProcessVisualInfo, Hourglass_COGAF_Ontology.comunicación, Hourglass_COGAF_Ontology.fracción, Hourglass_COGAF_Ontology.evaluación, Hourglass_COGAF_Ontology.supervisión, Hourglass_COGAF_Ontology.identificación, Hourglass_COGAF_Ontology.contar, Hourglass_COGAF_Ontology.seguimiento, Hourglass_COGAF_Ontology.control, Hourglass_COGAF_Ontology.planeación, Hourglass_COGAF_Ontology.planificación, Hourglass_COGAF_Ontology.inspección, Hourglass_COGAF_Ontology.evacuación, Hourglass_COGAF_Ontology.relación, Hourglass_COGAF_Ontology.concentración, Hourglass_COGAF_Ontology.clasificación, Hourglass_COGAF_Ontology.acción, Hourglass_COGAF_Ontology.reducción, Hourglass_COGAF_Ontology.confianza, Hourglass_COGAF_Ontology.lección, Hourglass_COGAF_Ontology.autocuidado, Hourglass_COGAF_Ontology.sup

In [22]:
onto.search(is_a=onto.Event)

[Hourglass_COGAF_Ontology.Event, Hourglass_COGAF_Ontology.00_Caida piso mojado, Hourglass_COGAF_Ontology.01_Caída de altura, Hourglass_COGAF_Ontology.02_ auxiliar_trafico_aprisionado_vehiculo, Hourglass_COGAF_Ontology.03_Caida de alturas_Lesiones Múltiples, Hourglass_COGAF_Ontology.04_Caida_alturas_montaje_estructura, Hourglass_COGAF_Ontology.05_Caida_trabajador_foso_ascensor, Hourglass_COGAF_Ontology.12_Explosión mina subterranea de carbón_samaca_boyaca_4_febrero_2022, Hourglass_COGAF_Ontology.13_Explosión mina subterranea de carbón_tasco_Boyaca_26_02_2022, Hourglass_COGAF_Ontology.14_Explosion_mina mestiza_el zulia_NS_20-05-2021, Hourglass_COGAF_Ontology.20_Accidente por inundacion_socota_boyaca_19_04_2017, Hourglass_COGAF_Ontology.21_Accidente minero electrico_Barrancas_Guajira_2017, Hourglass_COGAF_Ontology.22_Accidente minero electrico_Socha_Boyaca_2018, Hourglass_COGAF_Ontology.23_Accidente minero por Inundacion _Amaga_Antioquia__2014, Hourglass_COGAF_Ontology.24_Accidente minero

In [23]:
onto.search(is_a=onto.Emotion)[1].attitude

0.7422

In [30]:
import owlready2
from owlready2 import sync_reasoner, OwlReadyInconsistentOntologyError

owlready2.JAVA_EXE = r"C:\java\java-21\jdk-21.0.2\bin\java.exe"  # Path to java

try:
    with onto:
        sync_reasoner()
except OwlReadyInconsistentOntologyError:
    print("Error! Incosistent ontology.")

* Owlready2 * Running HermiT...
    C:\java\java-21\jdk-21.0.2\bin\java.exe -Xmx2000M -cp c:\Users\alejo\anaconda3\Lib\site-packages\owlready2\hermit;c:\Users\alejo\anaconda3\Lib\site-packages\owlready2\hermit\HermiT.jar org.semanticweb.HermiT.cli.CommandLine -c -O -D -I file:///C:/Users/alejo/AppData/Local/Temp/tmpfe6wnyqi
* Owlready2 * HermiT took 1.5726327896118164 seconds
* Owlready * (NB: only changes on entities loaded in Python are shown, other changes are done but not listed)


In [24]:
onto.save("./output/Populated_Ontology.rdf")

In [25]:
onto.name

'Hourglass_COGAF_Ontology'