In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

#Predictors
import Predictors as pred

In [85]:
class Recommender(object):                                                       
    def __init__(self, predictor=None):                                          
        self.predictor = predictor
        
    def learn(self):
        if self.predictor is not None:
            self.predictor.fit(self.user_ratings)
        else:
            raise ValueError('No predictor')
    
    def recommend(self, user, n=10, rec_seen=True):
        #n - number of movies
        #rec_seen - already seen
        if user not in self.users:
            raise ValueError('No user with name ' + user)
        userid = self.users.index(user)
        #print(self.predictor.predict(userid))
        mitm = [tuple(a) for a in zip(self.predictor.predict(userid), self.items)]
        if rec_seen == False:
            
            watched = (self.user_ratings[userid]<1)
            mitm = [i for (i, v) in zip(mitm, watched) if v]

        mitm = sorted(mitm, reverse=True)
        return mitm[:n]
        
            
    def parse_moviebase(self, file_name):
        data = open(file_name, 'rt', encoding='utf-8')
        self.items = []
        self.users = []
        self.user_ratings = []
        mode = 'none'
        for line in data:
            ln = line.strip()
            if not ln or ln[0] == '%': continue    # empty line or comment
            if ln == '[items]':
                # switch to parsing item data
                mode = 'items'
                continue
            if  ln == '[users]':
                # switch to parsing user/rating data
                mode = 'users'
                iCount = len(self.items)
                continue
            if mode == 'items':
                self.items.append(ln)
            elif mode == 'users':
                ln = ln.split(',')
                if len(ln) != iCount+1:    # check DB consistency
                    print("User %s has invalid number of ratings (%d)." % (ln[0], len(self.ratings[ln[0]])))
                self.user_ratings.append([])
                self.users.append(ln[0])
                for v in ln[1:]:
                    v = v.strip()
                    if v == '?': 
                        self.user_ratings[-1].append(0)
                    else:
                        self.user_ratings[-1].append(float(v))
            else:
                print('Strange line in database:')
                print(line)
        self.user_ratings = np.array(self.user_ratings, dtype=np.int8)
        
    def getuser(self, username):
        return self.users.index(username)
    
    def getitem(self, title):
        return self.items.index(title)
    
    def split(self, learnsizepercentage=0.75):
        (usersNum, moviesNum) = np.shape(self.user_ratings)
        selector = np.random.rand(usersNum)<learnsizepercentage
        learnArray = self.user_ratings[selector]
        testArray = self.user_ratings[~selector]
        learnUsernames = np.array(self.users)[selector]
        testUsernames = np.array(self.users)[~selector]
        return (((learnArray, learnUsernames),(testArray, testUsernames)))
    
    def splitKFold(self, slices=10):
        (usersNum, moviesNum) = np.shape(self.user_ratings)
        selector = np.random.rand(usersNum)
        retArr = []
        h=1/slices
        for i in range(0, slices):
            s = (selector>=i/slices) & (selector<i/slices+h)
            retArr.append((self.user_ratings[s],np.array(self.users)[s]))
        return np.array(retArr)
            
    

In [82]:
predictor = pred.RandomPredictor(1, 5)
predictor = pred.AveragePredictor(10)
predictor = pred.ViewsPredictor()
predictor = pred.DeviationPredictor()
predictor = pred.UserBasedPredictor(0,0.2)
predictor = pred.ItemBasedPredictor(0,0.2)
predictor = pred.SlopeOnePredictor()

In [87]:
r = Recommender(predictor)
r.parse_moviebase("moviebase2016.txt")
#r.learn()
#r.recommend("GP", 20, 1)
#predictor.similarity(r.getitem("Pulp Fiction"), r.getitem("The godfather"))
#r.split(0.2)
r.splitKFold(3)


array([[ array([[4, 5, 0, 4, 0, 0, 4, 3, 1, 1, 0, 4, 0, 0, 3, 3, 5, 3],
       [3, 3, 0, 2, 4, 0, 5, 2, 0, 0, 0, 5, 0, 2, 2, 2, 0, 2],
       [4, 4, 2, 2, 5, 2, 5, 3, 0, 2, 3, 4, 2, 4, 0, 0, 5, 0],
       [0, 5, 5, 4, 5, 0, 0, 2, 0, 3, 5, 4, 5, 0, 3, 0, 0, 0],
       [3, 5, 0, 3, 4, 0, 5, 2, 0, 2, 4, 5, 5, 0, 3, 2, 4, 0],
       [5, 5, 4, 4, 5, 4, 5, 5, 0, 0, 0, 5, 4, 5, 3, 3, 4, 5],
       [3, 4, 5, 4, 5, 0, 4, 3, 1, 0, 0, 5, 4, 3, 0, 1, 5, 1],
       [3, 3, 0, 4, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 2, 0, 4],
       [4, 5, 5, 3, 5, 0, 5, 4, 0, 0, 5, 0, 4, 3, 3, 3, 5, 3],
       [0, 5, 0, 3, 4, 0, 0, 0, 0, 0, 5, 0, 3, 2, 0, 2, 0, 0],
       [4, 5, 4, 3, 3, 0, 2, 4, 0, 0, 4, 5, 5, 0, 3, 0, 4, 3],
       [3, 4, 5, 3, 0, 0, 5, 5, 0, 0, 0, 5, 4, 0, 0, 3, 5, 4],
       [5, 5, 5, 4, 4, 3, 5, 4, 2, 3, 5, 5, 5, 4, 4, 5, 4, 3],
       [3, 0, 0, 2, 5, 0, 5, 3, 3, 0, 0, 0, 0, 4, 3, 3, 5, 4],
       [3, 5, 4, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3],
       [3, 5, 5, 0, 5, 0, 5, 5, 0, 5, 4, 0, 5,

In [5]:


#tst = np.array([[1,2],[3,4],[6,7]])
#print(tst)
#tst[2, 1]