# Importing Libraries

In [1]:
# Uncomment the following block and run it AFTER creating a new conda environment:
# a conda environment can be created as 
# conda create -n nlp_tools_2023 python=3.8
# and activated as
# conda activate nlp_tools_2023
'''
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install plotly
!pip install seaborn
!pip install scikit-learn
!pip install librosa
!pip install tensorflow
!pip install tqdm
!pip install pytorch
!pip install transformers
'''

# System Libraries
import os
import pickle
import random
import time

# Loop libraries
from tqdm import tqdm
from itertools import product

# Handling Data Libraries
import pandas as pd
import numpy as np

# display libraries
from IPython.display import Audio, clear_output

# Plot Libraries
import matplotlib.pyplot as plt
from matplotlib import cm
import plotly.express as px
import seaborn as sns

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files.
import librosa
import librosa.display

# text handling libraries
import re

# ML libraries
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score
from sklearn.model_selection import train_test_split

# DL libraries
import tensorflow as tf
import keras as ks

# Test routine
from test_model import *

# Importing Tensorflow models

In [2]:
label2id = {'neu': 0, 'fru': 1, 'ang': 2, 'sad': 3, 'hap': 4, 'exc': 5, 'dis': 6, 'fea': 7, 'oth': 8, 'sur': 9, 'xxx': 10}

model, model_name, batch_size, train_data, val_data, test_data, em_train, em_val, em_test, ignore_emotions, val_distr, test_distr = build_model_and_dataset(weights_path = './Experiments NLP/text/conv_length/ALBERT/NLP_Models/conv1_TextTransformer_s42.h5',                                                    
                                                                conv_length = 1,
                                                                use_audio = False,
                                                                use_text = True,
                                                                use_text_transformers = True,
                                                                use_audio_transformers = False,
                                                                train = False,  
                                                                label2id = label2id,
                                                                verbose = False)

An error occurred: Inter op parallelism cannot be modified after initialization.
conv1_TextTransformer_s42


In [3]:
# VALIDATION EVALUATION
f1s_metric = Masked_F1(len(label2id), mode='none', name='f1s', ignore_indexes=[label2id[emo] for emo in ignore_emotions])
f1_macro = Masked_F1(len(label2id), mode='macro', name='f1_macro', ignore_indexes=[label2id[emo] for emo in ignore_emotions])
val_f1_weighted_metric = Masked_F1(len(label2id), mode='weighted', name='val_f1_weighted', ignore_indexes=[label2id[emo] for emo in ignore_emotions], dataset_distribution=list(val_distr.values()))

val_preds = model.predict(val_data, batch_size=batch_size, verbose=0)

f1s_metric.update_state(em_val, val_preds)
f1_macro.update_state(em_val, val_preds)
val_f1_weighted_metric.update_state(em_val, val_preds)

val_f1s = f1s_metric.result()
val_f1_macro = f1_macro.result()
val_f1_weighted = val_f1_weighted_metric.result()

f1s_metric.reset_state()
f1_macro.reset_state()
val_f1_weighted_metric.reset_state()

# TEST EVALUATION
test_f1_weighted_metric = Masked_F1(len(label2id), mode='weighted', name='test_f1_weighted', ignore_indexes=[label2id[emo] for emo in ignore_emotions], dataset_distribution=list(test_distr.values()))

start = time.time()
test_preds = model.predict(test_data, batch_size=batch_size, verbose=0)
end = time.time()

f1s_metric.update_state(em_test, test_preds)
f1_macro.update_state(em_test, test_preds)
test_f1_weighted_metric.update_state(em_test, test_preds)

test_f1s = f1s_metric.result()
test_f1_macro = f1_macro.result()
test_f1_weighted = test_f1_weighted_metric.result()

f1s_metric.reset_state()
f1_macro.reset_state()
test_f1_weighted_metric.reset_state()

#log_txt = '\n'
#log_txt += str(label2id)
#log_txt += '\n'
log_txt = ''
log_txt += 'Validation f1s:'
log_txt += '\n'
log_txt += f'{val_f1s.numpy()}'
log_txt += '\n'
log_txt += 'val f1 macro:' 
log_txt += '\n'
log_txt += f'{val_f1_macro}'
log_txt += '\n'
log_txt += 'val f1 weighted:'
log_txt += '\n'
log_txt += f'{val_f1_weighted}'
log_txt += '\n'
log_txt += 'Test f1s:'
log_txt += '\n'
log_txt += f'{test_f1s.numpy()}'
log_txt += '\n'
log_txt += 'test f1 macro:'
log_txt += '\n'
log_txt += f'{test_f1_macro}'
log_txt += '\n'
log_txt += 'test f1 weighted:'
log_txt += '\n'
log_txt += f'{test_f1_weighted}'
log_txt += '\n'
log_txt += 'test prediction time'
log_txt += f'{end - start}'


log_file = open(f"{model_name}_results.txt", "w")
log_file.write(log_txt)
log_file.close()

In [4]:
print(log_txt)

Validation f1s:
[0.19301848 0.34571063 0.415      0.33608815 0.12283044 0.28505747
 0.         0.         0.         0.         0.        ]
val f1 macro:
0.28295086228418026
val f1 weighted:
0.31460240907952575
Test f1s:
[0.31623932 0.25069638 0.28380634 0.37401575 0.1318408  0.24315068
 0.         0.         0.         0.         0.        ]
test f1 macro:
0.26662487794996853
test f1 weighted:
0.27731441298947646
test prediction time4492.895488023758
