In [1]:
import numpy as np
import pandas as pd
import torch
import random
import torch.nn as nn
import re
import tensorflow as tf

In [12]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
df_test_RNN = pd.read_csv("Test_Results_RNN.csv")
df_test_FFNN = pd.read_csv("Test_Results_FFNN.csv")

In [11]:
accuracy_models = dict()

In [13]:
prediction_ffnn = df_test_FFNN['prediction_ffnn']
accuracy = accuracy_score(df_test_FFNN['labels'], prediction_ffnn)
accuracy_models['FFNN'] = accuracy
print(f'Accuracy of FFNN = {accuracy}')
print(classification_report(df_test_FFNN['labels'], prediction_ffnn))

Accuracy of FFNN = 0.9424083769633508
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        93
           1       0.92      0.96      0.94        76
           2       0.91      0.95      0.93        82
           3       0.98      0.95      0.96        42
           4       0.95      0.94      0.95        89

    accuracy                           0.94       382
   macro avg       0.95      0.94      0.94       382
weighted avg       0.94      0.94      0.94       382



In [22]:
prediction_ffnn_pre = df_test_FFNN['prediction_ffnn_pre']
accuracy = accuracy_score(df_test_FFNN['labels'], prediction_ffnn_pre)
accuracy_models['FFNN_Pre'] = accuracy
print(f'Accuracy of FFNN Fine Tuned= {accuracy}')
print(classification_report(df_test_FFNN['labels'], prediction_ffnn_pre))

Accuracy of FFNN Fine Tuned= 0.9607329842931938
              precision    recall  f1-score   support

           0       0.97      0.92      0.95        93
           1       0.95      0.97      0.96        76
           2       0.94      0.96      0.95        82
           3       1.00      0.98      0.99        42
           4       0.97      0.98      0.97        89

    accuracy                           0.96       382
   macro avg       0.96      0.96      0.96       382
weighted avg       0.96      0.96      0.96       382



In [16]:
prediction_rnn = df_test_RNN['prediction_rnn']
accuracy = accuracy_score(df_test_RNN['labels'], prediction_rnn)
accuracy_models['RNN'] = accuracy
print(f'Accuracy of FFNN Fine Tuned= {accuracy}')
print(classification_report(df_test_RNN['labels'], prediction_rnn))

Accuracy of FFNN Fine Tuned= 0.5209424083769634
              precision    recall  f1-score   support

           0       0.78      0.56      0.65        93
           1       0.38      0.83      0.53        76
           2       0.44      0.48      0.46        82
           3       0.11      0.02      0.04        42
           4       0.81      0.49      0.62        89

    accuracy                           0.52       382
   macro avg       0.51      0.48      0.46       382
weighted avg       0.56      0.52      0.51       382



In [19]:
df_test = df_test_FFNN
df_test['prediction_rnn'] = df_test_RNN['prediction_rnn']

#Ensembling

In [20]:
prediction_ensemble = []
for i in range(len(prediction_rnn)):
  count = np.zeros([5])
  count[prediction_rnn[i]] += 1
  count[prediction_ffnn[i]] += 1
  count[prediction_ffnn_pre[i]] += 1
  prediction_ensemble.append(np.argmax(count))
df_test['prediction_ensemble'] = prediction_ensemble

In [23]:
accuracy = accuracy_score(df_test['labels'], prediction_ensemble)
accuracy_models['Ensemble'] = accuracy
print(f'Accuracy of Ensembler = {accuracy}')
print(classification_report(df_test['labels'], prediction_ensemble))

Accuracy of Ensembler = 0.9450261780104712
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        93
           1       0.88      0.97      0.93        76
           2       0.93      0.96      0.95        82
           3       1.00      0.93      0.96        42
           4       0.98      0.94      0.96        89

    accuracy                           0.95       382
   macro avg       0.95      0.94      0.95       382
weighted avg       0.95      0.95      0.95       382



# Weighted Ensembling

In [24]:
weighted_ensemble = []
for i in range(len(df_test)):
  count = np.zeros([5])
  count[prediction_rnn[i]] += accuracy_models['RNN']
  count[prediction_ffnn[i]] += accuracy_models['FFNN']
  count[prediction_ffnn_pre[i]] += accuracy_models['FFNN_Pre']
  weighted_ensemble.append(np.argmax(count))
df_test['weighted_ensemble'] = weighted_ensemble

In [25]:
accuracy = accuracy_score(df_test['labels'], weighted_ensemble)
accuracy_models['Weighted_Ensembler'] = accuracy
print(f'Accuracy of Weighted Ensembler = {accuracy}')
print(classification_report(df_test['labels'], weighted_ensemble))

Accuracy of Weighted Ensembler = 0.9554973821989529
              precision    recall  f1-score   support

           0       0.98      0.91      0.94        93
           1       0.93      0.97      0.95        76
           2       0.93      0.96      0.95        82
           3       1.00      0.95      0.98        42
           4       0.97      0.98      0.97        89

    accuracy                           0.96       382
   macro avg       0.96      0.96      0.96       382
weighted avg       0.96      0.96      0.96       382



#Calculation

**Misclassified by alteast one of simple model but ensembler predicts correctly**

In [26]:
df_temp = df_test[(df_test['labels'] == df_test['prediction_ensemble']) & ((df_test['labels'] != df_test['prediction_rnn']) 
                  | (df_test['labels'] != df_test['prediction_ffnn_pre']) | (df_test['labels'] != df_test['prediction_ffnn']))]
len(df_temp)

167

#**Misclassified by all the simple model but ensembler predicts correctly = 0**

In [27]:
df_test

Unnamed: 0,Article,Class,labels,text,length,prediction_ffnn,prediction_ffnn_pre,prediction_rnn,prediction_ensemble,weighted_ensemble
0,Tsunami cost hits Jakarta shares\n\nThe stock ...,business,0,Tsunami cost hits Jakarta shares stock market ...,168,0,0,3,0,0
1,Microsoft sets sights on spyware\n\nWindows us...,tech,4,Microsoft sets sights spyware Windows users co...,210,4,4,2,4,4
2,Blair pledges unity to Labour MPs\n\nTony Blai...,politics,2,Blair pledges unity Labour MPs Tony Blair soug...,351,2,2,1,2,2
3,India's Deccan seals $1.8bn deal\n\nAir Deccan...,business,0,India Deccan seals 1 8bn deal Air Deccan order...,181,0,0,0,0,0
4,Blair told to double overseas aid\n\nTony Blai...,politics,2,Blair told double overseas aid Tony Blair urge...,277,2,2,0,2,2
...,...,...,...,...,...,...,...,...,...,...
377,Crucial decision on super-casinos\n\nA decisio...,politics,2,Crucial decision super casinos decision whethe...,108,2,2,2,2,2
378,Cyber crime booms in 2004\n\nThe last 12 month...,tech,4,Cyber crime booms 2004 last 12 months seen dra...,471,4,4,2,4,4
379,Howard rejects BNP's claim\n\nTory leader Mich...,politics,2,Howard rejects BNP claim Tory leader Michael H...,184,2,2,2,2,2
380,Card fraudsters 'targeting web'\n\nNew safegua...,business,0,Card fraudsters targeting web New safeguards c...,248,4,4,2,4,4


In [28]:
df_test.to_csv("Test_Results_Overall.csv", index = None)

In [30]:
accuracy_models

{'Ensemble': 0.9450261780104712,
 'FFNN': 0.9424083769633508,
 'FFNN_Pre': 0.9607329842931938,
 'RNN': 0.5209424083769634,
 'Weighted_Ensembler': 0.9554973821989529}