# Inference Pipeline

In [1]:
import pandas as pd
import torch
from modules.ontology_classes import CogafInstance
from modules.emotion import get_senticnet_response
from modules.engine import COGN_FUNC
from modules.model_builder import BaseModel
from modules.text_processing import load_text_files, get_most_important_words, get_event





## Load and process texts

In [2]:
text_list = load_text_files('./../data/text/raw/')

df = pd.DataFrame({"file": text_list.keys(), "text": text_list.values()})
df.head()

Unnamed: 0,file,text
0,00_Caida piso mojado.txt,Lecciones Aprendidas\n\nAccidente de trabajo\n...
1,01_Caída de altura.txt,LECCIONES APRENDIDAS\n\nTipo de Accidente: Caí...
2,02_ auxiliar_trafico_aprisionado_vehiculo.txt,Auxiliar de tráfico (Paletero - Señalelo) apri...
3,03_Caida de alturas_Lesiones Múltiples.txt,Lecciones aprendidas\n\nCaida de alturas \nLes...
4,04_Caida_alturas_montaje_estructura.txt,LECCIONES APRENDIDAS\n\nCaída de alturas en mo...


In [3]:
# Load preprocess model
import joblib
MODELS_PATH = "./models/"
processor = joblib.load(MODELS_PATH + "preprocessor.pk")

In [4]:
# First step of processor pipeline is text cleaning. Clean text is needed for emotion recognition
df["clean_text"] = processor[0].transform(df.text)

# Get TF-IDF matrix
tfidf_matrix = processor[1].transform(df.clean_text)

# Perform PCA for dimensionality reduction
# X_pca = pd.DataFrame(processor[2:].transform(tfidf_matrix))

In [5]:
X = pd.DataFrame(tfidf_matrix.toarray())
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2583,2584,2585,2586,2587,2588,2589,2590,2591,2592
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.069118,0.0,0.0,0.0


## Cognitive Function Inference

In [6]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [7]:
X = torch.from_numpy(X.to_numpy()).type(torch.float)

in_features, out_features = X.shape[1], 5

In [8]:
# Load model and predict cognitive function
clf = BaseModel(in_features, 50, out_features)

# Load model state dict 
clf.load_state_dict(torch.load("./models/mlp.pth"))

# Put model to target device (if your data is on GPU, model will have to be on GPU to make predictions)
clf.to(device)

BaseModel(
  (linear_stack): Sequential(
    (0): Linear(in_features=2593, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=5, bias=True)
  )
)

In [9]:
clf.eval()
with torch.inference_mode():
    logits = clf(X.to(device))
    y_pred = logits.argmax(dim=1).to("cpu")

In [10]:
y_pred

tensor([0, 2, 1, 2, 3, 1, 3, 2, 1, 2, 3, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2,
        2, 1, 1, 3, 3, 1, 2, 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 2, 0, 2,
        2, 1, 0, 2, 0, 0, 0, 1, 1, 1, 1, 1, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 2, 2])

In [11]:
pred = [COGN_FUNC[i] for i in y_pred]
pred

['Attention',
 'Perception',
 'WorkingMemory',
 'Perception',
 'CognitiveFlexibility',
 'WorkingMemory',
 'CognitiveFlexibility',
 'Perception',
 'WorkingMemory',
 'Perception',
 'CognitiveFlexibility',
 'Perception',
 'Perception',
 'Perception',
 'Attention',
 'Attention',
 'Perception',
 'Perception',
 'Perception',
 'Attention',
 'Attention',
 'Attention',
 'Attention',
 'Perception',
 'Perception',
 'WorkingMemory',
 'WorkingMemory',
 'CognitiveFlexibility',
 'CognitiveFlexibility',
 'WorkingMemory',
 'Perception',
 'Attention',
 'Attention',
 'Attention',
 'Perception',
 'Perception',
 'Perception',
 'Attention',
 'Perception',
 'Attention',
 'Attention',
 'Attention',
 'Attention',
 'Attention',
 'Perception',
 'Perception',
 'Attention',
 'Perception',
 'Perception',
 'WorkingMemory',
 'Attention',
 'Perception',
 'Attention',
 'Attention',
 'Attention',
 'WorkingMemory',
 'WorkingMemory',
 'WorkingMemory',
 'WorkingMemory',
 'WorkingMemory',
 'Attention',
 'InhibitoryControl',

## Emotion Recognition

### With 200 most important words as input

In [14]:
feature_names = processor[1].get_feature_names_out()
tfidf_scores = tfidf_matrix.toarray()

important_words_per_doc = get_most_important_words(feature_names, tfidf_scores, df.file, 200)


In [15]:
results_with_top_words = []
for i in range(len(important_words_per_doc)):
    result = {"cognitive_function": COGN_FUNC[y_pred[i]]}
    result.update(get_senticnet_response(important_words_per_doc[i]["words"]))
    results_with_top_words.append(result)

### With entire clean text as input

In [16]:
results = []
for i in range(len(df.clean_text)):
    result = {"cognitive_function": COGN_FUNC[y_pred[i]]}
    result.update(get_senticnet_response(df.clean_text[i]))
    results.append(result)

In [17]:
results[5]

{'cognitive_function': 'WorkingMemory',
 'status_code': 200,
 'content': 'ecstasy (83.65%) & calmness (31.58%) [INTROSPECTION=93.61%,TEMPER=43.42%,ATTITUDE=-3.12%,SENSITIVITY=-23.61%]\n',
 'introspection': {'value': 0.9361, 'emotion': 'Ecstasy'},
 'temper': {'value': 0.43420000000000003, 'emotion': 'Calmness'},
 'attitude': {'value': -0.031200000000000002, 'emotion': 'Dislike'},
 'sensitivity': {'value': -0.2361, 'emotion': 'Anxiety'}}

In [18]:
results_with_top_words[5]

{'cognitive_function': 'WorkingMemory',
 'status_code': 200,
 'content': 'ecstasy (75.29%) & bliss (4.81%) [INTROSPECTION=90.84%,TEMPER=67.59%,ATTITUDE=-1.27%,SENSITIVITY=-11.21%]\n',
 'introspection': {'value': 0.9084, 'emotion': 'Ecstasy'},
 'temper': {'value': 0.6759000000000001, 'emotion': 'Bliss'},
 'attitude': {'value': -0.0127, 'emotion': 'Dislike'},
 'sensitivity': {'value': -0.1121, 'emotion': 'Anxiety'}}

## Ontology components inference

In [19]:
from modules.text_processing import normalize

df['norm_text'] = [normalize(text, False, False) for text in df.text]
df.head()

Unnamed: 0,file,text,clean_text,norm_text
0,00_Caida piso mojado.txt,Lecciones Aprendidas\n\nAccidente de trabajo\n...,leccion aprendido accidente trabajo caida piso...,Lecciones Aprendidas Accidente de trabajo Caíd...
1,01_Caída de altura.txt,LECCIONES APRENDIDAS\n\nTipo de Accidente: Caí...,leccion aprendido tipo accidente caida altura ...,LECCIONES APRENDIDAS Tipo de Accidente: Caída ...
2,02_ auxiliar_trafico_aprisionado_vehiculo.txt,Auxiliar de tráfico (Paletero - Señalelo) apri...,auxiliar trafico paletero senalelo aprisionado...,Auxiliar de tráfico (Paletero - Señalelo) apri...
3,03_Caida de alturas_Lesiones Múltiples.txt,Lecciones aprendidas\n\nCaida de alturas \nLes...,leccion aprendido caida altura lesion multiple...,Lecciones aprendidas Caida de alturas Lesiones...
4,04_Caida_alturas_montaje_estructura.txt,LECCIONES APRENDIDAS\n\nCaída de alturas en mo...,leccion aprendido caida altura montaje estruct...,LECCIONES APRENDIDAS Caída de alturas en monta...


In [20]:
events = [get_event(text) for text in df.norm_text]

In [21]:
import owlready2 as owl
onto = owl.get_ontology("./../ontology/Hourglass_COGAF_Ontology.rdf").load()

In [22]:
emotion_instance = onto.Emotion(
    "test_emotion", 
    introspection=results_with_top_words[0]["introspection"]["value"],
    temper=results_with_top_words[0]["temper"]["value"],
    attitude=results_with_top_words[0]["attitude"]["value"],
    sensitivity=results_with_top_words[0]["sensitivity"]["value"]
    )

In [23]:
onto.test_emotion.introspection

-0.613