# Recommending Top-N movies - Testing recommendation algorithms

- Jupyter notebook comments
- InLine comments (+TTT)
- Add conclusion to TTT (improvements to formula used, rerunning parameter tests with variation in each parameter, player 2 doesn't recognise when to block player 1 moves so add a fudge factor to move selector for this etc.)

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display
from time import perf_counter
from MovieLensData import MovieLensData
from SplitData import SplitData
from tqdm.auto import tqdm
import sys
import importlib
import EvaluateResults
import Algorithms
import Tester

import warnings
warnings.filterwarnings('ignore')

In [2]:
ml = MovieLensData()
userIDs = ml.filterIDs('userId', minRatings=1000)
movieIDs = ml.filterIDs('movieId', minRatings=1000)
ml.reduce(userIDs, 'userId', 'ratings')
ml.reduce(movieIDs, 'movieId', 'movies')

ratingsData = ml.buildPivot(printStats=True)

Loading data...[92m Done [0m
Building movies/ratings pivot df...[92m Done [0m
[94m2782 / 283228 users retained (0.98%)[0m
[94m3931 / 58098 movies retained (6.77%)[0m
[94m3161737 / 27753444 ratings retained (11.39%)[0m
[94m71.09% sparsity[0m


In [3]:
train, test, validation, LOO = SplitData(ratingsData).buildAll(testSize=0.2, validationSize=0.2, randomState=20)

Building train/test/validation split by row...[92m Done [0m
Building LeaveOneOut-CrossValidation data...[92m Done [0m


In [4]:
algo = Algorithms.Algorithms(ml, train, test)
algo.buildMatrix()
CF_itemModel, CF_userModel = algo.buildModel(modelType='CF')
KNN_itemModel = algo.buildModel(modelType='KNN', matrix=algo.userPivot_csr.transpose())
KNN_userModel = algo.buildModel(modelType='KNN', matrix=algo.userPivot_csr)
SVD_model = algo.buildModel(modelType='SVD', matrix=algo.userPivot_csr)

Building sparse matrix...[92m Done [0m
Correlating ratings for all users...[92m Done [0m
Correlating ratings for all movies...[92m Done [0m
Correlating genres for all movies...[92m Done [0m
Correlating years for all movies...[92m Done [0m
Generating combined correlation...[92m Done [0m


In [None]:
evaluator = EvaluateResults.Metrics(validation, LOO, topN=10, moviesPerPage=5, thresholdRating=3.0, csvName='ML_RecommenderMetrics')
tester = Tester.Tester(evaluator)

neighbours = 100
sample = 100

tester.addAlgorithm('Item-Based CF', algo.itemBased, model=CF_itemModel, modelType='CF', neighbours=neighbours, sample=sample, pred='calc', buildTable=True)
tester.addAlgorithm('User-Based CF', algo.userBased, model=CF_userModel, modelType='CF', neighbours=neighbours, sample=sample, pred='calc', buildTable=True)
tester.addAlgorithm('Item-Based KNN', algo.itemBased, model=KNN_itemModel, modelType='KNN', neighbours=neighbours, sample=sample, pred='calc', buildTable=True)
tester.addAlgorithm('User-Based KNN', algo.userBased, model=KNN_userModel, modelType='KNN', neighbours=neighbours, sample=sample, pred='calc', buildTable=True)
tester.addAlgorithm('SVD', algo.SVD, model=SVD_model, buildTable=True)
tester.addAlgorithm('Random Control', algo.random, randomRatings=True)

tester.runBasicTest(test, sampleTest=10)

In [None]:
parameter = 'pred'
pRange = ['rand', 'calc', 'sims', 'norm_sims']
sample = 10

tester.runParameterTest(test, param=parameter, pRange=pRange, sampleTest=sample, printResults=False)