In [1]:
import ktrain
import tensorflow as tf
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import classification_report

# Evaluate the Ioya2Vec CNN
------

## Load the model

In [2]:
model = tf.keras.models.load_model('./cnn_Ioya2Vec_model')
import pickle
with open('./cnn_Ioya2Vec_predictor/tf_model.preproc', 'rb') as f: preproc = pickle.load(f)

In [3]:
predictor = ktrain.get_predictor(model, preproc)

In [4]:
predictor.model.load_weights('./cnn_Ioya2Vec_predictor/tf_model.h5')

## Load the test set and confirm key values

In [5]:
test = pd.read_csv('../data/test.csv')

In [6]:
x_test = list(test.cleaned_contents)
y_test = list(test.Discrimination_Label)

In [7]:
y_hat = predictor.predict(x_test)

In [8]:
print(classification_report(y_test, y_hat))

              precision    recall  f1-score   support

           0       0.64      0.74      0.69        65
           1       0.80      0.72      0.76        97

    accuracy                           0.73       162
   macro avg       0.72      0.73      0.72       162
weighted avg       0.74      0.73      0.73       162



## Examine the embeddings

In [9]:
def examine_embeddings(model, preproc, words):
    Embeddings = model.weights[0].numpy()
    closest_matches = []
    for word in words:
        tok = preproc.tok.texts_to_sequences([word])[0][0]
        sims = cosine_similarity(Embeddings[tok,:].reshape(1,-1), Embeddings).argsort()[0]
        closest_matches.append(preproc.tok.sequences_to_texts([[sims[-2]]]))
    return closest_matches

In [10]:
examine_embeddings(model, preproc, ['suva', 'accused', 'bread'])

[['lautoka'], ["accused's"], ['winner']]

# Evaluate the CNN with trainable weights
------

## Load the model

In [12]:
model = tf.keras.models.load_model('./cnn_model')
import pickle
with open('./cnn_predictor/tf_model.preproc', 'rb') as f: preproc = pickle.load(f)

In [13]:
predictor = ktrain.get_predictor(model, preproc)
predictor.model.load_weights('./cnn_predictor/tf_model.h5')
y_hat = predictor.predict(x_test)

In [14]:
y_hat = [int(i) for i in y_hat]

In [15]:
print(classification_report(y_test, y_hat))

              precision    recall  f1-score   support

           0       0.66      0.82      0.73        65
           1       0.85      0.72      0.78        97

    accuracy                           0.76       162
   macro avg       0.76      0.77      0.76       162
weighted avg       0.78      0.76      0.76       162



## Examine the embeddings

In [16]:
examine_embeddings(model, preproc, ['suva', 'accused', 'bread'])

[['default'], ['farm'], ['breadwinner']]