# Hybrid Filtering Recommendation Model

Import libraries

In [1]:
import time
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict

Load data

In [2]:
dtrain_target = pd.read_csv('Results/Target_train.csv')
dtest_target = pd.read_csv('Results/Target_test.csv')

Get predictions and accuracies from the demographic and collaborative filtering methods

In [3]:
def get_demo_pred():
    pred_file_train = pd.read_csv('Results/Demographic_predictions_train.csv')
    pred_file_test = pd.read_csv('Results/Demographic_predictions_test.csv')
    return pred_file_train,pred_file_test

def get_col_pred():
    pred_file_train = pd.read_csv('Results/Collaborative_predictions_train.csv')
    pred_file_test = pd.read_csv('Results/Collaborative_predictions_test.csv')
    return pred_file_train,pred_file_test

In [4]:
demo_pred_train,demo_pred_test = get_demo_pred()
col_pred_train,col_pred_test = get_col_pred()

In [5]:
def get_demo_accuracy():
    accuracy_train = pd.read_csv('Results/Demographic_accuracy_train.csv')
    return accuracy_train

def get_col_accuracy():
    accuracy_train = pd.read_csv('Results/Collaborative_accuracy_train.csv')
    return accuracy_train

In [6]:
demo_accuracy = get_demo_accuracy()
col_accuracy = get_col_accuracy()

## Hybrid filtering with switching

Get predictions from the switching hybrid approach

In [7]:
def switching_hybrid_pred(demo_pred,col_pred,demo_accuracy,col_accuracy):
    t0 = time.time()
    n_users = len(demo_pred)
    n_items = len(demo_accuracy)
    pred = np.zeros((n_users,n_items))
    
    for u in range (n_users):
        for i in range (n_items):
            if (demo_accuracy.values[i] > col_accuracy.values[i]):
                pred[u,i] = demo_pred.values[u,i]
            else:
                pred[u,i] = col_pred.values[u,i]
    t1 = time.time()
    time_pred = t1-t0
    print('Prediction time %.4g' % time_pred)
    return time_pred,pred

Train set predictions 

In [8]:
time_pred_train,switching_pred_train = switching_hybrid_pred(demo_pred_train,col_pred_train,demo_accuracy,col_accuracy)

Prediction time 4.597


Calculate the accuracy for train set

In [9]:
def accuracy(target,pred,reports=False):
    print('\nModel Report: \n')
    i=0
    accuracy_lst=[]
    for col in target.columns:
        accuracy = metrics.accuracy_score(target.values[:,i],pred[:,i])
        accuracy_lst.append(accuracy)
        print (col, '%.4g' % accuracy)
        if(reports==True):
            report = classification_report(target.values[:,i],pred[:,i])
            print(report)
            cm = confusion_matrix(target.values[:,i],pred[:,i])
            print(cm)
        i+=1

    avg_accuracy = np.average(accuracy_lst)
    print('\nTotal accuracy %.4g' % avg_accuracy)
    return accuracy_lst,avg_accuracy

In [10]:
switching_accuracy_train,switching_avg_accuracy_train = accuracy(dtrain_target,switching_pred_train,reports=True)


Model Report: 

AntAktAvtBank 0.9477
             precision    recall  f1-score   support

          0       0.87      0.59      0.70       383
          1       0.95      0.99      0.97      3247

avg / total       0.94      0.95      0.94      3630

[[ 227  156]
 [  34 3213]]
AntAktAvtInnskudd 0.9785
             precision    recall  f1-score   support

          0       0.89      0.82      0.85       277
          1       0.99      0.99      0.99      3353

avg / total       0.98      0.98      0.98      3630

[[ 227   50]
 [  28 3325]]
AntAktAvtKredittkort 0.735
             precision    recall  f1-score   support

          0       0.61      0.91      0.73      1434
          1       0.91      0.62      0.74      2196

avg / total       0.79      0.73      0.74      3630

[[1303  131]
 [ 831 1365]]
AntKredittkortDebet 0.7545
             precision    recall  f1-score   support

          0       0.87      0.79      0.83      2726
          1       0.51      0.64      0.57       9

  'precision', 'predicted', average, warn_for)



AntBrukskonto 0.841
             precision    recall  f1-score   support

          0       0.63      0.77      0.70       852
          1       0.93      0.86      0.89      2778

avg / total       0.86      0.84      0.85      3630

[[ 659  193]
 [ 384 2394]]
AntAvtBSU 0.8923
             precision    recall  f1-score   support

          0       0.89      1.00      0.94      3239
          1       0.00      0.00      0.00       391

avg / total       0.80      0.89      0.84      3630

[[3239    0]
 [ 391    0]]
AntAktiveSkadefors 0.9964
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      3258
          1       1.00      0.97      0.98       372

avg / total       1.00      1.00      1.00      3630

[[3258    0]
 [  13  359]]
AntAktiv_AndreForsikringer 0.9964
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      3259
          1       1.00      0.97      0.98       371

avg / total    

Test set predictions 

In [11]:
time_pred_test,switching_pred_test = switching_hybrid_pred(demo_pred_test,col_pred_test,demo_accuracy,col_accuracy)

Prediction time 1.693


Calculate the accuracy for the test set

In [12]:
switching_accuracy_test,switching_avg_accuracy_test = accuracy(dtest_target,switching_pred_test,reports=True)


Model Report: 

AntAktAvtBank 0.9455
             precision    recall  f1-score   support

          0       0.87      0.56      0.68       126
          1       0.95      0.99      0.97      1084

avg / total       0.94      0.95      0.94      1210

[[  71   55]
 [  11 1073]]
AntAktAvtInnskudd 0.976
             precision    recall  f1-score   support

          0       0.82      0.85      0.83        85
          1       0.99      0.99      0.99      1125

avg / total       0.98      0.98      0.98      1210

[[  72   13]
 [  16 1109]]
AntAktAvtKredittkort 0.7058
             precision    recall  f1-score   support

          0       0.58      0.91      0.71       476
          1       0.91      0.57      0.70       734

avg / total       0.78      0.71      0.71      1210

[[432  44]
 [312 422]]
AntKredittkortDebet 0.7736
             precision    recall  f1-score   support

          0       0.89      0.81      0.85       937
          1       0.50      0.65      0.56       273



  'precision', 'predicted', average, warn_for)


## Weighted Hybrid filtering

Get predictions from the weighted hybrid approach

In [13]:
def weighted_hybrid_pred(demo_pred,col_pred,demo_accuracy,col_accuracy):
    t0 = time.time()
    n_users = len(demo_pred)
    n_items = len(demo_accuracy)
    pred = np.zeros((n_users,n_items))
    
    for i in range (n_items):
        alpha = demo_accuracy.values[i]/(demo_accuracy.values[i]+col_accuracy.values[i])
        for u in range (n_users):
            pred[u,i] = np.round(alpha*demo_pred.values[u,i] + (1-alpha)*col_pred.values[u,i])
    t1 = time.time()
    time_pred = t1-t0
    print('Prediction time %.4g' % time_pred)
    return time_pred,pred

Train set predictions

In [14]:
time_pred_train,weighted_pred_train = weighted_hybrid_pred(demo_pred_train,col_pred_train,demo_accuracy,col_accuracy)

Prediction time 6.393


Train set accuracy

In [15]:
weighted_accuracy_train,weighted_avg_accuracy_train = accuracy(dtrain_target,weighted_pred_train,reports=True)


Model Report: 

AntAktAvtBank 0.9477
             precision    recall  f1-score   support

          0       0.87      0.59      0.70       383
          1       0.95      0.99      0.97      3247

avg / total       0.94      0.95      0.94      3630

[[ 227  156]
 [  34 3213]]
AntAktAvtInnskudd 0.9785
             precision    recall  f1-score   support

          0       0.89      0.82      0.85       277
          1       0.99      0.99      0.99      3353

avg / total       0.98      0.98      0.98      3630

[[ 227   50]
 [  28 3325]]
AntAktAvtKredittkort 0.735
             precision    recall  f1-score   support

          0       0.61      0.91      0.73      1434
          1       0.91      0.62      0.74      2196

avg / total       0.79      0.73      0.74      3630

[[1303  131]
 [ 831 1365]]
AntKredittkortDebet 0.7545
             precision    recall  f1-score   support

          0       0.87      0.79      0.83      2726
          1       0.51      0.64      0.57       9

  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

          0       0.89      1.00      0.94      3239
          1       0.00      0.00      0.00       391

avg / total       0.80      0.89      0.84      3630

[[3239    0]
 [ 391    0]]
AntAktiveSkadefors 0.9964
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      3258
          1       1.00      0.97      0.98       372

avg / total       1.00      1.00      1.00      3630

[[3258    0]
 [  13  359]]
AntAktiv_AndreForsikringer 0.9964
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      3259
          1       1.00      0.97      0.98       371

avg / total       1.00      1.00      1.00      3630

[[3258    1]
 [  12  359]]
AntAktSpareforsikring 0.8388
             precision    recall  f1-score   support

          0       0.83      1.00      0.91      2899
          1       1.00      0.20      0.33       731

avg / total       0.87  

Test set predictions

In [16]:
time_pred_test,weighted_pred_test = weighted_hybrid_pred(demo_pred_test,col_pred_test,demo_accuracy,col_accuracy)

Prediction time 1.886


Test set accuracy

In [17]:
weighted_accuracy_test,weighted_avg_accuracy_test = accuracy(dtest_target,weighted_pred_test,reports=True)


Model Report: 

AntAktAvtBank 0.9455
             precision    recall  f1-score   support

          0       0.87      0.56      0.68       126
          1       0.95      0.99      0.97      1084

avg / total       0.94      0.95      0.94      1210

[[  71   55]
 [  11 1073]]
AntAktAvtInnskudd 0.976
             precision    recall  f1-score   support

          0       0.82      0.85      0.83        85
          1       0.99      0.99      0.99      1125

avg / total       0.98      0.98      0.98      1210

[[  72   13]
 [  16 1109]]
AntAktAvtKredittkort 0.7058
             precision    recall  f1-score   support

          0       0.58      0.91      0.71       476
          1       0.91      0.57      0.70       734

avg / total       0.78      0.71      0.71      1210

[[432  44]
 [312 422]]
AntKredittkortDebet 0.7736
             precision    recall  f1-score   support

          0       0.89      0.81      0.85       937
          1       0.50      0.65      0.56       273



  'precision', 'predicted', average, warn_for)


AntAktUtlanAvt 0.9942
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1004
          1       0.99      0.98      0.98       206

avg / total       0.99      0.99      0.99      1210

[[1001    3]
 [   4  202]]
AntAktBoliglan 0.9942
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1006
          1       0.98      0.99      0.98       204

avg / total       0.99      0.99      0.99      1210

[[1001    5]
 [   2  202]]
DNBAntAktBoliglan 0.9579
             precision    recall  f1-score   support

          0       0.96      1.00      0.98      1158
          1       0.67      0.04      0.07        52

avg / total       0.95      0.96      0.94      1210

[[1157    1]
 [  50    2]]
AntAktGaranterte 0.9521
             precision    recall  f1-score   support

          0       0.95      1.00      0.98      1151
          1       0.67      0.03      0.06        59

avg / total       0.

### Save result files
#### Switching Hybrid filtering results

Save the predictions to csv

In [18]:
switching_pred_train_file=pd.DataFrame(switching_pred_train)
switching_pred_test_file=pd.DataFrame(switching_pred_test)

In [19]:
switching_pred_train_file.to_csv('Results/Hybrid_switching_predictions_train.csv',index=False)
switching_pred_test_file.to_csv('Results/Hybrid_switching_predictions_test.csv',index=False)

Save train and test accuracies to csv

In [20]:
switching_train_accuracy_file=pd.DataFrame(switching_accuracy_train)
switching_test_accuracy_file=pd.DataFrame(switching_accuracy_test)

In [21]:
switching_train_accuracy_file.to_csv('Results/Hybrid_switching_accuracy_train.csv',index=False)
switching_test_accuracy_file.to_csv('Results/Hybrid_switching_accuracy_test.csv',index=False)

#### Weighted Hybrid filtering results

Save the predictions to csv

In [22]:
weighted_pred_train_file=pd.DataFrame(weighted_pred_train)
weighted_pred_test_file=pd.DataFrame(weighted_pred_test)

In [23]:
weighted_pred_train_file.to_csv('Results/Hybrid_weighted_predictions_train.csv',index=False)
weighted_pred_test_file.to_csv('Results/Hybrid_weighted_predictions_test.csv',index=False)

Save train and test accuracies to csv

In [24]:
weighted_train_accuracy_file=pd.DataFrame(weighted_accuracy_train)
weighted_test_accuracy_file=pd.DataFrame(weighted_accuracy_test)

In [25]:
weighted_train_accuracy_file.to_csv('Results/Hybrid_weighted_accuracy_train.csv',index=False)
weighted_test_accuracy_file.to_csv('Results/Hybrid_weighted_accuracy_test.csv',index=False)