In [16]:
import keras
import pickle
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Loading the models

cluster_1 = keras.models.load_model('cluster-1-model.h5')
cluster_2 = keras.models.load_model('cluster-2-model.h5')
main_classifier = keras.models.load_model('binary-classifier.h5')


In [5]:
def load(path):
    """Returns object stored at a given path"""
    
    with open(path, 'rb') as file:
        obj = pickle.load(file)
    
    return obj

# Evaluation Cluster 1
cluster_1_topics = ['competition', 'development', 'economic_and_monetary_affairs', 'energy', 'enterprise', 'external_trade',
                   'external_relations', 'fight_against_fraud', 'foreign_and_security_policy', 'human_rights', 
                   'humanitarian_aid', 'maritime_affairs_and_fisheries', 'research_innovation', 'taxation']

In [8]:
# Loading data for cluster 1

train_sent_cluster_1 = load('/home/jay/new-approach-16-jun-2018/cluster-1-train-sent.pkl')
train_label_cluster_1 = load('/home/jay/new-approach-16-jun-2018/cluster-1-train-label.pkl')
test_sent_cluster_1 = load('/home/jay/new-approach-16-jun-2018/cluster-1-test-sent.pkl')
test_label_cluster_1 = load('/home/jay/new-approach-16-jun-2018/cluster-1-test-label.pkl')

In [12]:
# Accuracy of the model
acc_cluster_1 =cluster_1.evaluate(np.array(test_sent_cluster_1), test_label_cluster_1)
print("Accuracy of classification for Cluster 1 is: {} %".format(acc_cluster_1[1]*100))

Accuracy of classification for Cluster 1 is: 89.96405688370182 %


In [14]:
# Classification report and confusion matrix
predictions_cluster_1 = cluster_1.predict(np.array(test_sent_cluster_1))

In [17]:
print(classification_report(np.argmax(test_label_cluster_1, axis=1), np.argmax(predictions_cluster_1, axis=1)))

             precision    recall  f1-score   support

          0       0.92      0.94      0.93      4495
          1       0.84      0.85      0.84      8250
          2       0.98      0.98      0.98      7621
          3       0.88      0.86      0.87      6912
          4       0.91      0.88      0.90      4439
          5       0.90      0.94      0.92      8378
          6       0.73      0.65      0.69      2126
          7       0.93      0.93      0.93      2158
          8       0.85      0.84      0.84      2313
          9       0.92      0.92      0.92      3017
         10       0.86      0.83      0.85      1308
         11       0.93      0.91      0.92      4300
         12       0.87      0.89      0.88      4913
         13       0.96      0.96      0.96      3760

avg / total       0.90      0.90      0.90     63990



In [19]:
print(confusion_matrix(np.argmax(test_label_cluster_1, axis=1), np.argmax(predictions_cluster_1, axis=1)))

[[4235   10   17   16    6    4    3    1    0    2    0  151   44    6]
 [  10 7002   17  213   10  432  275    1  128   81   45   20   11    5]
 [  26   11 7486   17    9   16    2    3    4    4    2   10   24    7]
 [  14  267   19 5966  118  118   32    5   26    4    2   40  284   17]
 [  14   17   17  153 3921    8  105    1   14    0    2    8  173    6]
 [   6  169   11   66    3 7903   56   45   15   63    1   27   10    3]
 [   6  500    3   29  102   76 1376    1   17   12    2    0    2    0]
 [   3    4    5    9    2   23    1 2007    0    6    2    1    1   94]
 [   4  115    7   23   14   49   12    1 1934    4   84    1   64    1]
 [   3  109    7    9    2   60    6    2    6 2765   35    2    6    5]
 [   1   78    5    1    1    3    1    3   80   41 1092    1    1    0]
 [ 215   37   19   39   10   42    4    3    3    3    0 3894   19   12]
 [  46    8   16  248  104   13    2    1   57    2    2    7 4396   11]
 [  10    5    7   24    7    9    1   87    1    3

# Evaluation Cluster 2

In [20]:
# Loading data for cluster 1

train_sent_cluster_2 = load('/home/jay/new-approach-16-jun-2018/cluster-2-train-sent.pkl')
train_label_cluster_2 = load('/home/jay/new-approach-16-jun-2018/cluster-2-train-label.pkl')
test_sent_cluster_2 = load('/home/jay/new-approach-16-jun-2018/cluster-2-test-sent.pkl')
test_label_cluster_2 = load('/home/jay/new-approach-16-jun-2018/cluster-2-test-label.pkl')

In [21]:
# Accuracy of the model
acc_cluster_2 =cluster_2.evaluate(np.array(test_sent_cluster_2), test_label_cluster_2)
print("Accuracy of classification for Cluster 2 is: {} %".format(acc_cluster_2[1]*100))

Accuracy of classification for Cluster 2 is: 76.94794475770205 %


In [22]:
# Classification report and confusion matrix
predictions_cluster_2 = cluster_2.predict(np.array(test_sent_cluster_2))

In [23]:
print(classification_report(np.argmax(test_label_cluster_2, axis=1), np.argmax(predictions_cluster_2, axis=1)))

             precision    recall  f1-score   support

          0       0.64      0.73      0.68     12024
          1       0.67      0.64      0.65      4921
          2       0.59      0.95      0.73      3021
          3       0.52      0.78      0.63      9363
          4       0.53      0.90      0.67      3211
          5       0.54      0.94      0.69      5497
          6       0.85      0.90      0.87     19555
          7       0.87      0.72      0.79     43569
          8       0.77      0.78      0.78     15902
          9       0.82      0.74      0.78     27703
         10       0.69      0.87      0.77      9189
         11       0.71      0.82      0.76     12035
         12       0.80      0.85      0.83     15769
         13       0.80      0.66      0.72     30631
         14       0.91      0.76      0.83     48210
         15       0.69      0.86      0.77      5490
         16       0.72      0.78      0.75     16627
         17       0.75      0.77      0.76   

In [24]:
print(confusion_matrix(np.argmax(test_label_cluster_2, axis=1), np.argmax(predictions_cluster_2, axis=1)))

[[ 8740    10   718   237    14    19    14    86   236   338   500    63
     38    33    24     7   906    41]
 [    2  3128     2    19  1550     2    17     7    18    10     6   116
      9    12     7     4     6     6]
 [   48     4  2879     4     1     0     1     4     4     2     1     6
     40     3    11     2     7     4]
 [  114    63     2  7345     2     7    21    16     8    40   613   162
     11   278    35   245    90   311]
 [    2   228     4     2  2901     1    19     3    14     3     3     6
      2     6    11     3     0     3]
 [   11     4     2     8    13  5168    12    12    74    11    35     2
      7    49    75     2     1    11]
 [   22   136     9   161   238    34 17509   343   170    56    19   216
    128   168   134    24   121    67]
 [  823   159   223   481    40   186  1414 31250   586   698    95   604
    562  1722  1248   388  2325   765]
 [  382    39    14    10   215   891   121   134 12431   202    11   376
    125    43   376   

# Evaluation Main Classifier

In [25]:
# Loading data for cluster 1

train_sent_main = load('/home/jay/new-approach-16-jun-2018/cluster-main-train-sent.pkl')
train_label_main = load('/home/jay/new-approach-16-jun-2018/cluster-main-train-label.pkl')
test_sent_main = load('/home/jay/new-approach-16-jun-2018/cluster-main-test-sent.pkl')
test_label_main = load('/home/jay/new-approach-16-jun-2018/cluster-main-test-label.pkl')

In [None]:
# Accuracy of the model
acc_main = main_classifier.evaluate(np.array(test_sent_main), test_label_main)

In [27]:
print("Accuracy of classification for main classifier: {} %".format(acc_main[1]*100))

Accuracy of classification for main classifier: 88.28240413188146 %


In [28]:
# Classification report and confusion matrix
predictions_main = main_classifier.predict(np.array(test_sent_main))

In [29]:
print(classification_report(np.argmax(test_label_main, axis=1), np.argmax(predictions_main, axis=1)))

             precision    recall  f1-score   support

          0       0.74      0.94      0.83    128300
          1       0.97      0.86      0.91    305603

avg / total       0.90      0.88      0.89    433903



In [34]:
print(confusion_matrix(np.argmax(test_label_main, axis=1), np.argmax(predictions_main, axis=1)))

[[120527   7773]
 [ 43070 262533]]
