In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

#Predictors
import Predictors as pred

In [2]:
class Recommender(object):                                                       
    def __init__(self, predictor=None):                                          
        self.predictor = predictor
        
    def learn(self):
        if self.predictor is not None:
            self.predictor.fit(self.user_ratings)
        else:
            raise ValueError('No predictor')
    
    def recommend(self, user, n=10, rec_seen=True):
        #n - number of movies
        #rec_seen - already seen
        if user not in self.users:
            raise ValueError('No user with name ' + user)
        userid = self.users.index(user)
        #print(self.predictor.predict(userid))
        mitm = [tuple(a) for a in zip(self.predictor.predict(userid), self.items)]
        if rec_seen == False:
            
            watched = (self.user_ratings[userid]<1)
            mitm = [i for (i, v) in zip(mitm, watched) if v]

        mitm = sorted(mitm, reverse=True)
        return mitm[:n]
        
            
    def parse_moviebase(self, file_name):
        data = open(file_name, 'rt', encoding='utf-8')
        self.items = []
        self.users = []
        self.user_ratings = []
        mode = 'none'
        for line in data:
            ln = line.strip()
            if not ln or ln[0] == '%': continue    # empty line or comment
            if ln == '[items]':
                # switch to parsing item data
                mode = 'items'
                continue
            if  ln == '[users]':
                # switch to parsing user/rating data
                mode = 'users'
                iCount = len(self.items)
                continue
            if mode == 'items':
                self.items.append(ln)
            elif mode == 'users':
                ln = ln.split(',')
                if len(ln) != iCount+1:    # check DB consistency
                    print("User %s has invalid number of ratings (%d)." % (ln[0], len(self.ratings[ln[0]])))
                self.user_ratings.append([])
                self.users.append(ln[0])
                for v in ln[1:]:
                    v = v.strip()
                    if v == '?': 
                        self.user_ratings[-1].append(0)
                    else:
                        self.user_ratings[-1].append(float(v))
            else:
                print('Strange line in database:')
                print(line)
        self.user_ratings = np.array(self.user_ratings, dtype=np.int8)
        
    def getuser(self, username):
        return self.users.index(username)
    
    def getitem(self, title):
        return self.items.index(title)
    
    

In [3]:
predictor = pred.RandomPredictor(1, 5)
predictor = pred.AveragePredictor(10)
predictor = pred.ViewsPredictor()
predictor = pred.DeviationPredictor()
predictor = pred.UserBasedPredictor(0,0.2)
predictor = pred.ItemBasedPredictor(0,0.2)

In [84]:
class SlopeOnePredictor:
    def fit(self, data):
        self.data = data

    def predict(self, number):
        (usersNum, moviesNum) = np.shape(self.data)
        
        retscores = []
        for movieId in range(0,moviesNum):
            scores = []
            weight = []
            for m in range(0,moviesNum):
                if m!=movieId:
                    s, w = self.dev(m, movieId)
                    scores.append(s)
                    weight.append(w)
                else:
                    scores.append(0)
                    weight.append(0)

            pred = np.sum((self.data[number] - np.array(scores)) * np.array(weight))/np.sum(np.array(weight))
            retscores.append(pred)
        return np.array(retscores)
        
        
    def dev(self, m1, m2):
        v1 = self.data[:, m1]
        v2 = self.data[:, m2]

        selector = (v2>0) & (v1>0)
        v1 = v1[selector]
        v2 = v2[selector]
        return (np.sum(v1-v2)/len(v1), len(v1))

predictor = SlopeOnePredictor()

In [85]:
r = Recommender(predictor)
r.parse_moviebase("moviebase2016.txt")
r.learn()
r.recommend("GP", 20, 1)
#predictor.similarity(r.getitem("Pulp Fiction"), r.getitem("The godfather"))


[(4.1337448559670777, 'Pulp Fiction'),
 (3.9881422924901186, "Schindler's List"),
 (3.9547038327526134, 'Life of Brian'),
 (3.9471830985915495, 'The godfather'),
 (3.919178082191781, 'Matrix'),
 (3.7435424354243541, 'A Beautiful Mind'),
 (3.4981949458483754, 'WALL-E'),
 (3.4056224899598395, 'Pirates of the Carribean: Black Pearl'),
 (3.3824701195219125, 'Alien'),
 (3.2845188284518829, 'Star Trek Nemesis'),
 (3.1755102040816325, 'Men in Black'),
 (3.117820324005891, 'Ice Age: Dawn of the Dinosaurs'),
 (2.9583333333333335, 'Titanic'),
 (2.8841059602649008, 'Rocky'),
 (2.7642276422764227, 'Harry Potter and the Deathly Hallows 2'),
 (2.6732026143790848, 'Petelinji zajtrk'),
 (2.5862552594670407, 'American Pie'),
 (1.2625, 'Sex and the City (film)')]

In [None]:


#tst = np.array([[1,2],[3,4],[6,7]])
#print(tst)
#tst[2, 1]