# Results

In [1]:
import pickle
import pandas as pd
import numpy as np
from pprint import pprint
from sklearn.metrics import classification_report
from collections import defaultdict

In [2]:
model_names = ['multi_lr','decision_tree','multi_NB','BNB'] # <-- add yours

In [14]:
# Generating the metric dataframe from the above model names
df_results = pd.DataFrame()

for model_name in model_names:
    path = f'../data/pickles/df_results_{model_name}.pickle'
    with open(path, 'rb') as data:
        df = pickle.load(data)
    df_results = df_results.append(df)
    
df_results = df_results.reset_index().drop('index', axis=1)

metrics = {
    'accuracy': 'Accuracy',
    'precision_macro': 'Precision (macro)',
    'recall_macro': 'Recall (macro)',    
    'f1_macro': 'F1 (macro)',
    'precision': 'Precision (micro)',
    'recall': 'Recall (micro)',    
    'f1': 'F1 (micro)',
}

for metric in metrics:
    df_results[metrics[metric]] = df_results[f'mean_test_{metric}'].map('{:,.4f}'.format).astype(str) \
    + df_results[f'std_test_{metric}'].map(' ±{:,.3f}'.format).astype(str)
    
df_results.sort_values('mean_test_f1_macro', inplace=True, ascending=False)

## Cross-validation comparison

Cross-validation results on the training set for selected metrics, feature sets and implemented methods.
The following results show the mean validation and standard deviation value for each metric.

In [19]:
display(df_results[['model', *metrics.values()]])
print('Note: Precision, Recall and F1 scores exclude the \'IRRELEVANT\' class')
# print(df_results[['model', 'Accuracy', 'Precision (macro)', 'Recall (macro)', 'F1 (macro)']].to_string())

Unnamed: 0,model,Accuracy,Precision (macro),Recall (macro),F1 (macro),Precision (micro),Recall (micro),F1 (micro)
0,multi_lr,0.7728 ±0.006,0.6284 ±0.022,0.7092 ±0.018,0.6557 ±0.019,0.6552 ±0.009,0.7277 ±0.005,0.6895 ±0.006
2,multi_NB,0.6667 ±0.010,0.5512 ±0.015,0.7105 ±0.017,0.5848 ±0.016,0.6667 ±0.010,0.6667 ±0.010,0.6667 ±0.010
3,BNB,0.7081 ±0.005,0.5682 ±0.023,0.5658 ±0.025,0.5465 ±0.025,0.6017 ±0.007,0.6479 ±0.009,0.6240 ±0.007
1,decision_tree,0.6573 ±0.003,0.4310 ±0.005,0.5611 ±0.010,0.4815 ±0.002,0.5318 ±0.006,0.6242 ±0.009,0.5743 ±0.006


Note: Precision, Recall and F1 scores exclude the 'IRRELEVANT' class


## Final results

Final results for each class calculated on the whole test set using the final selected method with its hyper-parameters.

In [5]:
selected_model = 'multi_lr' # <-- we can change this to the best model

In [6]:
# Load the selected final model
path = f'../data/pickles/best_{selected_model}.pickle'
with open(path, 'rb') as data:
    final_model = pickle.load(data)
    
# Load the test set
df_test = pd.read_csv('../data/test.csv', index_col=0).reset_index()
X_test = df_test.article_words.values
y_test = df_test.topic.values

# Load labels
labels = sorted(list(set(y_test)))
labels.remove('IRRELEVANT')

Using TensorFlow backend.


In [8]:
y_pred = final_model.predict(X_test)
print(f'Final results using the \'{selected_model}\' model:\n')
print(classification_report(y_test, y_pred, labels=labels))

Final results using the 'multi_lr' model:

                                  precision    recall  f1-score   support

      ARTS CULTURE ENTERTAINMENT       0.38      1.00      0.55         3
BIOGRAPHIES PERSONALITIES PEOPLE       0.78      0.47      0.58        15
                         DEFENCE       0.75      0.92      0.83        13
                DOMESTIC MARKETS       0.40      1.00      0.57         2
                   FOREX MARKETS       0.57      0.77      0.65        48
                          HEALTH       0.69      0.79      0.73        14
                   MONEY MARKETS       0.62      0.59      0.61        69
          SCIENCE AND TECHNOLOGY       0.33      0.33      0.33         3
                  SHARE LISTINGS       0.60      0.86      0.71         7
                          SPORTS       0.95      0.98      0.97        60

                       micro avg       0.69      0.76      0.72       234
                       macro avg       0.61      0.77      0.65    

## Final Recommendations

In [9]:
# Get probability vector for predictions
y_pred_prob = final_model.predict_proba(X_test)

# Index of topic classes in our model
model_classes = final_model.classes_.tolist()

# List of correct article ids for each topic
topic_articles = df_test.groupby('topic')['article_number'].apply(lambda x: x.values.tolist()).to_dict()

# Count of correct articles for each topic
topic_counts = {x: len(topic_articles[x]) for x in topic_articles}

In [11]:
recommendations = {label: [] for label in sorted(list(set(y_test)))}

# Loop through articles
for index, article in enumerate(df_test.article_number.values):
    # The topic we predicted
    pred_class = y_pred[index]
    # The topic it belongs to
    true_class = y_test[index]
    # Index of predicted class
    pred_index = model_classes.index(pred_class)
    # Probability of prediction
    pred_prob = y_pred_prob[index][pred_index]
    # Save recommendation
    recommendations[pred_class].append((article, pred_prob))


results = []
    
for topic in recommendations:
    if topic == 'IRRELEVANT':
        continue
    top_10 = sorted(recommendations[topic][:10], key=lambda x: x[1], reverse=True)
    
    articles = [int(x[0]) for x in top_10]
    
    tp = len(set(articles).intersection(topic_articles[topic]))
    precision = tp / len(articles) if len(articles) > 0 else 0
    recall = tp / topic_counts[topic] if topic_counts[topic] > 0 else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    results.append({
        'Topic name': topic,
        'Suggested articles': ','.join([str(x) for x in articles]),
        'Precision': precision,
        'Recall': recall,
        'F1': f1
    })

df_recommend = pd.DataFrame(results, columns=['Topic name', 'Suggested articles', 'Precision', 'Recall', 'F1'])
print(df_recommend.to_string(index=False,formatters={'Precision':'{:,.2f}'.format, 'Recall':'{:,.2f}'.format, 'F1':'{:,.2f}'.format}))

                       Topic name                                 Suggested articles Precision Recall   F1
       ARTS CULTURE ENTERTAINMENT            9952,9703,9789,9830,9604,9933,9526,9834      0.38   1.00 0.55
 BIOGRAPHIES PERSONALITIES PEOPLE       9896,9878,9940,9695,9758,9988,9783,9854,9645      0.78   0.47 0.58
                          DEFENCE  9559,9576,9616,9607,9670,9721,9731,9713,9706,9739      0.80   0.62 0.70
                 DOMESTIC MARKETS                           9994,9796,9989,9640,9923      0.40   1.00 0.57
                    FOREX MARKETS  9551,9529,9530,9548,9555,9565,9525,9503,9539,9554      0.70   0.15 0.24
                           HEALTH  9873,9661,9807,9810,9887,9735,9609,9833,9621,9575      0.80   0.57 0.67
                    MONEY MARKETS  9516,9534,9509,9531,9547,9550,9560,9506,9564,9542      0.60   0.09 0.15
           SCIENCE AND TECHNOLOGY                                     9617,9982,9722      0.33   0.33 0.33
                   SHARE LISTINGS  95