In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

#Predictors
import Predictors as pred

In [2]:
class Recommender(object):                                                       
    def __init__(self, predictor=None):                                          
        self.predictor = predictor
        
    def learn(self):
        if self.predictor is not None:
            self.predictor.fit(self.user_ratings)
        else:
            raise ValueError('No predictor')
    
    def recommend(self, user, n=10, rec_seen=True):
        #n - number of movies
        #rec_seen - already seen
        if user not in self.users:
            raise ValueError('No user with name ' + user)
        userid = self.users.index(user)
        #print(self.predictor.predict(userid))
        mitm = sorted([tuple(a) for a in zip(self.predictor.predict(userid), self.items)], reverse=True)
        
        if rec_seen == False:
            watched = (self.user_ratings[userid]<1)
            mitm = [i for (i, v) in zip(mitm, watched) if v]

        return mitm[:n]
        
            
    def parse_moviebase(self, file_name):
        data = open(file_name, 'rt', encoding='utf-8')
        self.items = []
        self.users = []
        self.user_ratings = []
        mode = 'none'
        for line in data:
            ln = line.strip()
            if not ln or ln[0] == '%': continue    # empty line or comment
            if ln == '[items]':
                # switch to parsing item data
                mode = 'items'
                continue
            if  ln == '[users]':
                # switch to parsing user/rating data
                mode = 'users'
                iCount = len(self.items)
                continue
            if mode == 'items':
                self.items.append(ln)
            elif mode == 'users':
                ln = ln.split(',')
                if len(ln) != iCount+1:    # check DB consistency
                    print("User %s has invalid number of ratings (%d)." % (ln[0], len(self.ratings[ln[0]])))
                self.user_ratings.append([])
                self.users.append(ln[0])
                for v in ln[1:]:
                    v = v.strip()
                    if v == '?': 
                        self.user_ratings[-1].append(0)
                    else:
                        self.user_ratings[-1].append(float(v))
            else:
                print('Strange line in database:')
                print(line)
        self.user_ratings = np.array(self.user_ratings, dtype=np.int8)

In [3]:
predictor = pred.RandomPredictor(1, 5)
predictor = pred.AveragePredictor(10)
predictor = pred.ViewsPredictor()
predictor = pred.DeviationPredictor()

In [4]:
r = Recommender(predictor)
r.parse_moviebase("moviebase2016.txt")
r.learn()
r.recommend("GP")

[(1.3184612971353746, 'Harry Potter and the Deathly Hallows 2'),
 (1.2307692307692308, 'Petelinji zajtrk'),
 (1.1450596061505673, 'American Pie'),
 (1.0895620944471014, 'WALL-E'),
 (1.0814574300500139, 'Rocky'),
 (1.0479767555024595, 'Pirates of the Carribean: Black Pearl'),
 (1.0290563230940146, 'Alien'),
 (0.99149158118238923, 'Ice Age: Dawn of the Dinosaurs'),
 (0.95095793159020503, 'Star Trek Nemesis'),
 (0.93755907186294896, 'The godfather')]