# Inference Pipeline

In [1]:
import pandas as pd
from modules.ontology_classes import CogafInstance
from modules.emotion import get_senticnet_response
from modules.engine import COGN_FUNC
from modules.text_processing import load_text_files, get_most_important_words, get_event





## Load and process texts

In [2]:
text_list = load_text_files('./../data/text/raw/')

df = pd.DataFrame({"file": text_list.keys(), "text": text_list.values()})
df.head()

Unnamed: 0,file,text
0,leccion_aprendida_explo_sub_carbon_el_zulia_nt...,LECCIÓN APRENDIDA\n\n¿QUÉ PASÓ?\n\nSe presentó...
1,leccion_aprendida_explo_sub_carbon_cucunuba_cu...,LECCIÓN\tAPRENDIDA\n\n¿QUÉ PASÓ?\n\nEl día 4 d...
2,leccion_aprendida_explo_sub_carbon_cucunuba_cu...,LECCIÓN\tAPRENDIDA\n\n¿QUÉ PASÓ?\n\nEl día 30 ...
3,leccion_aprendida_explo_sub_carbon_buenos_aire...,LECCIÓN APRENDIDA\n\n¿QUÉ PASÓ?\n\nSe presentó...
4,leccion_aprendida_derrumbre_cali_valle_cauca_2...,¿QUÉ PASÓ?\n\nSe presentó un accidente minero ...


In [3]:
# Load preprocess model
import joblib
MODELS_PATH = "./models/"
processor = joblib.load(MODELS_PATH + "preprocessor.pk")

In [4]:
# First step of processor pipeline is text cleaning. Clean text is needed for emotion recognition
df["clean_text"] = processor[0].transform(df.text)

# Get TF-IDF matrix
tfidf_matrix = processor[1].transform(df.clean_text)

# Perform PCA for dimensionality reduction
X = pd.DataFrame(processor[2:].transform(tfidf_matrix))

In [5]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,-0.13606,-0.104221,-0.233905,-0.060897,-0.252483,0.07134,-0.100001,0.103503,-0.177102,0.094348,...,0.036496,0.064023,-0.023733,-0.21063,0.028497,-0.01472,0.005116,-0.010226,0.01228,-0.019039
1,-0.347631,-0.090123,0.443983,0.030019,-0.18767,0.13238,0.01062,-0.085053,0.104718,0.005958,...,-0.06441,-0.010888,0.018755,0.004703,0.003684,0.006932,-0.000483,-0.008082,-0.000163,-0.003191
2,-0.115998,-0.019555,-0.103137,-0.048152,-0.176678,0.057497,0.112956,-0.054637,0.213335,0.016338,...,0.009302,0.004723,-0.001934,0.004133,-0.004232,0.00159,0.010461,-0.000843,0.001069,-0.002502
3,-0.264719,-0.137008,-0.132105,0.001269,-0.121144,0.080024,-0.004796,0.031877,-0.005656,-0.008856,...,-0.002852,0.01541,-0.022148,-0.003718,0.005559,-0.021047,-0.00266,0.005138,0.027774,0.009579
4,-0.090512,-0.034195,-0.136822,-0.090105,-0.134375,0.01592,-0.001898,0.163685,0.185808,0.074291,...,0.008978,-0.017295,0.002429,0.013129,0.00153,-0.002752,-0.003509,0.001671,0.001095,2.3e-05


## Cognitive Function Inference

In [6]:
# Load model and predict cognitive function
clf = joblib.load(MODELS_PATH + "svm.pk")
y_pred = clf.predict(X)
y_pred

array([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6], dtype=int64)

## Emotion Recognition

### With 100 most important words as input

In [7]:
feature_names = processor[1].get_feature_names_out()
tfidf_scores = tfidf_matrix.toarray()

important_words_per_doc = get_most_important_words(feature_names, tfidf_scores, df.file, 100)


In [9]:
results_with_top_words = []
for i in range(len(important_words_per_doc)):
    result = {"cognitive_function": COGN_FUNC[y_pred[i]]}
    result.update(get_senticnet_response(important_words_per_doc[i]["words"]))
    results_with_top_words.append(result)

### With entire clean text as input

In [10]:
results = []
for i in range(len(df.clean_text)):
    result = {"cognitive_function": COGN_FUNC[y_pred[i]]}
    result.update(get_senticnet_response(df.clean_text[i]))
    results.append(result)

In [11]:
results[5]

{'cognitive_function': 'Praxia',
 'status_code': 200,
 'content': 'ecstasy (78.33%) & calmness (95.35%) [INTROSPECTION=91.85%,TEMPER=64.46%,ATTITUDE=-3.0%,SENSITIVITY=-7.1%]\n',
 'introspection': {'value': 0.9185, 'emotion': 'Ecstasy'},
 'temper': {'value': 0.6446, 'emotion': 'Calmness'},
 'attitude': {'value': -0.03, 'emotion': 'Dislike'},
 'sensitivity': {'value': -0.071, 'emotion': 'Anxiety'}}

In [12]:
results_with_top_words[5]

{'cognitive_function': 'Praxia',
 'status_code': 200,
 'content': 'ecstasy (82.24%) & enthusiasm (48.48%) [INTROSPECTION=93.14%,TEMPER=50.49%,ATTITUDE=30.04%,SENSITIVITY=82.0%]\n',
 'introspection': {'value': 0.9314, 'emotion': 'Ecstasy'},
 'temper': {'value': 0.5049, 'emotion': 'Calmness'},
 'attitude': {'value': 0.3004, 'emotion': 'Acceptance'},
 'sensitivity': {'value': 0.82, 'emotion': 'Enthusiasm'}}

## Ontology components inference

In [13]:
from modules.text_processing import normalize

df['norm_text'] = [normalize(text, False, False) for text in df.text]
df.head()

Unnamed: 0,file,text,clean_text,norm_text
0,leccion_aprendida_explo_sub_carbon_el_zulia_nt...,LECCIÓN APRENDIDA\n\n¿QUÉ PASÓ?\n\nSe presentó...,leccion aprendido pasar presentar accidente mi...,LECCIÓN APRENDIDA ¿QUÉ PASÓ? Se presentó un ac...
1,leccion_aprendida_explo_sub_carbon_cucunuba_cu...,LECCIÓN\tAPRENDIDA\n\n¿QUÉ PASÓ?\n\nEl día 4 d...,leccion aprendido pasar 4 abril ano 2020 5:00 ...,LECCIÓN APRENDIDA ¿QUÉ PASÓ? El día 4 de abril...
2,leccion_aprendida_explo_sub_carbon_cucunuba_cu...,LECCIÓN\tAPRENDIDA\n\n¿QUÉ PASÓ?\n\nEl día 30 ...,leccion aprendido pasar 30 mayo 2019 mina muni...,LECCIÓN APRENDIDA ¿QUÉ PASÓ? El día 30 de mayo...
3,leccion_aprendida_explo_sub_carbon_buenos_aire...,LECCIÓN APRENDIDA\n\n¿QUÉ PASÓ?\n\nSe presentó...,leccion aprendido pasar presentar accidente mi...,LECCIÓN APRENDIDA ¿QUÉ PASÓ? Se presentó un ac...
4,leccion_aprendida_derrumbre_cali_valle_cauca_2...,¿QUÉ PASÓ?\n\nSe presentó un accidente minero ...,pasar presentar accidente minero trabajador re...,¿QUÉ PASÓ? Se presentó un accidente minero cua...


In [14]:
events = [get_event(text) for text in df.norm_text]

In [15]:
import owlready2 as owl
onto = owl.get_ontology("./../ontology/Hourglass_COGAF_Ontology.rdf").load()

In [16]:
emotion_instance = onto.Emotion(
    "test_emotion", 
    introspection=results_with_top_words[0]["introspection"]["value"],
    temper=results_with_top_words[0]["temper"]["value"],
    attitude=results_with_top_words[0]["attitude"]["value"],
    sensitivity=results_with_top_words[0]["sensitivity"]["value"]
    )

In [19]:
onto.test_emotion.introspection

-0.0075