## Computing the Optimal Weights for Blending

In [None]:
%load_ext autoreload
%autoreload 2

Necessary imports:

In [None]:
import numpy as np
from baselines import Baselines
from MF_SGD import MF_SGD
from MF_BSGD import MF_BSGD
from MF_ALS import MF_ALS
from surprise_models import SurpriseModels
from blending import Blending
from data import Data

Set the random seed to be able to reproduce the results.

In [None]:
np.random.seed(98)

Load and prepare data.

In [None]:
data = Data(test_purpose=True)

Dictionary for the models to blend:

In [None]:
models = {'baseline_global_mean': None,
          'baseline_user_mean': None,
          'baseline_item_mean': None,
          'mf_sgd': None,
          'mf_bsgd': None, 
          'mf_als': None,
          'surprise_kNN_baseline_user': None,
          'surprise_kNN_baseline_item': None,
#          'surprise_SVD': None,
#          'surprise_SVDpp': None,
          'surprise_slope_one': None,
          'surprise_co_clustering': None}

Run Baseline models.

In [None]:
baselines = Baselines(data=data, test_purpose=True)

print('\nModelling using baseline_global_mean:')
models['baseline_global_mean'] = baselines.baseline_global_mean()['Rating']

print('\nModelling using baseline_user_mean:')
models['baseline_user_mean'] = baselines.baseline_user_mean()['Rating']

print('\nModelling using baseline_movie_mean:')
models['baseline_item_mean'] = baselines.baseline_item_mean()['Rating']

Run Matrix Factorization model trained using Stochastic Gradient Descent.

In [None]:
mf_sgd = MF_SGD(data=data, test_purpose=True)

print('\nModelling using MF_SGD:')
models['mf_sgd'] = mf_sgd.train()['Rating']

Run Matrix Factorization model trained using Biased Stochastic Gradient Descent.

In [None]:
mf_bsgd = MF_BSGD(data=data, test_purpose=True)

print('\nModelling using MF_BSGD:')
models['mf_bsgd'] = mf_bsgd.train()['Rating']

Run Matrix Factorization model trained using Alternating Least Squares.

In [None]:
mf_als = MF_ALS(data=data, test_purpose=True)

print('\nModelling using MF_ALS:')
models['mf_als'] = mf_als.train()['Rating']

Run Models from Surprise Library.

In [None]:
surprise_models = SurpriseModels(data=data, test_purpose=True)

print('\nModelling using user based Surprise kNN Baseline:')
models['surprise_kNN_baseline_user'] = surprise_models.kNN_baseline(k=150, 
                                                                    sim_options={'name': 'cosine',
                                                                                 'user_based': True})['Rating']

print('\nModelling using item based Surprise kNN Baseline:')
models['surprise_kNN_baseline_item'] = surprise_models.kNN_baseline(k=150, 
                                                                    sim_options={'name': 'pearson_baseline',
                                                                                 'user_based': False})['Rating']

print('\nModelling using Surprise SlopeOne:')
models['surprise_slope_one'] = surprise_models.slope_one()['Rating']

#print('\nModelling using Surprise SVD:')
#models['surprise_SVD'] = surprise_models.SVD()['Rating']

#print('\nModelling using Surprise SVD++:')
#models['surprise_SVDpp'] = surprise_models.SVDpp()['Rating']

print('\nModelling using Surprise Co-Clustering:')
models['surprise_co_clustering'] = surprise_models.co_clustering()['Rating']

Run blending algorihtm to find the optimal weights for the resulting blended (combined) model.

In [None]:
blending = Blending(models, data.test_df['Rating'])

print('\nModelling using weighted averaging of the previous models.')
optimal_weights = blending.optimize_weighted_average()
print('\nOptimal weights: ', optimal_weights)