# Inference Pipeline

In [1]:
import pandas as pd
from modules.ontology_classes import CogafInstance
from modules.emotion import get_senticnet_response
from modules.engine import COGN_FUNC
from modules.text_processing import load_text_files, get_most_important_words





## Load and process texts

In [2]:
text_list = load_text_files('./../data/text/raw/')

df = pd.DataFrame({"file": text_list.keys(), "text": text_list.values()})
df.head()

Unnamed: 0,file,text
0,0000_000_999-accidentes-trabajo.txt,Lecciones Aprendidas\n\nAccidente de trabajo\n...
1,31-5_Caida_desde_escalera_de_silo.txt,LECCIONES APRENDIDAS\n\nTipo de Accidente: Caí...
2,012015-Lecciones-aprendidas.txt,Descripción de caso\n\nEl 08 de octubre de 201...
3,auxiliar_trafico_aprisionado_vehiculo.txt,Auxiliar de tráfico (Paletero - Señalelo) apri...
4,caida_alturas.txt,Lecciones aprendidas\n\nCaida de alturas \nLes...


In [3]:
# Load preprocess model
import joblib
MODELS_PATH = "./models/"
processor = joblib.load(MODELS_PATH + "preprocessor.pk")

In [4]:
# First step of processor pipeline is text cleaning. Clean text is needed for emotion recognition
df["clean_text"] = processor[0].transform(df.text)

# Get TF-IDF matrix
tfidf_matrix = processor[1].transform(df.clean_text)

# Perform PCA for dimensionality reduction
X = pd.DataFrame(processor[2:].transform(tfidf_matrix))

In [5]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,0.01839,0.153832,-0.036187,-0.096114,0.000189,0.008908,0.025292,-0.015457,-0.00255,0.051419,...,0.00452,-0.021593,0.034531,0.036302,0.009767,-0.002026,-0.014995,-0.014123,-0.044726,-0.007895
1,-0.026766,0.421393,-0.068882,0.105197,0.042154,0.029687,0.110361,-0.105812,-0.130303,0.160647,...,0.00557,-0.002325,-0.000482,-0.014414,-0.022651,-0.008666,-0.001102,0.011631,-0.00215,0.002202
2,0.011969,0.184345,-0.021444,-0.121609,-0.055329,-0.001112,0.37544,-0.127316,-0.15134,-0.158765,...,0.004352,0.011736,0.001587,0.00504,0.001057,0.003236,0.006027,-0.008859,-0.001709,0.000355
3,-0.081711,0.261403,-0.040354,-0.218305,0.017647,-0.012647,0.264102,0.090078,-0.115739,-0.180781,...,-0.002372,-0.005515,0.006456,0.006268,-0.002655,-0.004133,-0.000517,0.003722,0.007989,0.005342
4,-0.051404,0.201853,-0.044697,-0.084711,0.000786,0.010653,0.172421,-0.080946,-0.096627,0.077496,...,-0.011919,-0.002356,-0.002698,-0.006592,-0.002185,-0.00931,0.006778,-0.018198,-0.016595,0.014457


In [6]:
feature_names = processor[1].get_feature_names_out()
tfidf_scores = tfidf_matrix.toarray()

important_words_per_doc = get_most_important_words(feature_names, tfidf_scores, df.file)


In [7]:
important_words_per_doc[0]

{'doc': '0000_000_999-accidentes-trabajo.txt',
 'words': 'piso, trabajo, companero, mojado, evitar, senalizacion, limpieza, caida, contribuir, limpio, suela, verter, comunicacion, utilizar, causo, generara, liquido, objetivo, antideslizante, organizacional, comunicar, guarda, nuevamente, antebrazo, codo, elemento, limpiar, madera, accidente, alcance'}

In [9]:
get_senticnet_response(important_words_per_doc[0]["words"])

{'status_code': 200,
 'content': 'ecstasy (66.66%) & delight (50.87%) [INTROSPECTION=88.0%,TEMPER=-1.05%,ATTITUDE=82.79%,SENSITIVITY=-48.0%]\n',
 'introspection': {'value': 0.88, 'emotion': 'Ecstasy'},
 'temper': {'value': -0.0105, 'emotion': 'Annoyance'},
 'attitude': {'value': 0.8279000000000001, 'emotion': 'Delight'},
 'sensitivity': {'value': -0.48, 'emotion': 'Fear'}}

## Cognitive Function Inference

In [6]:
# Load model and predict cognitive function
clf = joblib.load(MODELS_PATH + "svm.pk")
y_pred = clf.predict(X)
y_pred

array([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6], dtype=int64)

## Emotion Recognition

In [7]:
results = []
for i in range(len(df.clean_text)):
    result = {"cognitive_function": COGN_FUNC[y_pred[i]]}
    result.update(get_senticnet_response(df.clean_text[i]))
    results.append(result)

In [8]:
results[5]

{'cognitive_function': 'Praxia',
 'status_code': 200,
 'content': 'ecstasy (78.33%) & calmness (95.35%) [INTROSPECTION=91.85%,TEMPER=64.46%,ATTITUDE=-3.0%,SENSITIVITY=-7.1%]\n',
 'introspection': {'value': 0.9185, 'emotion': 'Ecstasy'},
 'temper': {'value': 0.6446, 'emotion': 'Calmness'},
 'attitude': {'value': -0.03, 'emotion': 'Dislike'},
 'sensitivity': {'value': -0.071, 'emotion': 'Anxiety'}}

## Ontology components inference

In [9]:
#TODO: Get strongest emotion(s) and map to basic emotion model

for result in results:
    if result["status_code"] == 200:
        result.update({"cogaf_instance": CogafInstance(result["cognitive_function"], result["sensitivity"]["emotion"])})

In [10]:
results[2]["cogaf_instance"].cognitiveFunction.__dict__

{'name': 'Praxia',
 'isBasicFunction': False,
 'activities': [<modules.ontology_classes.ComplementaryActivity at 0x25b27331710>],
 'tasks': []}

In [12]:
results[2]["cogaf_instance"].to_dict()

{'cognitiveFunction': {'name': 'Praxia',
  'isBasicFunction': False,
  'activities': [{'name': 'Karaoke', 'mechanics': ['Match', 'Vocalize']}],
  'tasks': []},
 'emotion': {'name': 'Eagerness', 'isBasicEmotion': None, 'state': None}}