In [11]:
import math
import json
import os
import pandas as pd
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
sns.set_style('darkgrid')

In [116]:
def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

def saveJson(obj, path):
    ensure_dir(path)
    with open(path, 'w+') as f:
        json.dump(obj, f)

In [156]:
path='../../movielens/'
movies_dict=json.load(open(path+'movies-dictionary.json'))
genres_dict=json.load(open(path+'genres-dictionary.json'))
movies_query_dict=json.load(open(path+'movies-query-dictionary.json'))


movies_ratings=json.load(open(path+'movie-ratings.json','r'))

user_ratings=json.load(open(path+'user-ratings.json'))

users = json.load(open(path+'users.json'))


In [88]:
def get_rating(ratings, user, movie):
    return ratings[user][movie]['rating']

In [13]:
def inverse_genre_frequency(movies, genres, genre):
    # number of movies
    N = movies['meta']['number_of_movies']
    
    # number of genre movies
    n = genres[genre]['meta']['number_of_movies']
    
    return math.log(N/n)

In [139]:
def user_genre_frequency(ratings, users, user, genre):
    
    d=users[user]['meta']['number_of_ratings']
    
    if d==0: return 0
    
    user_movies = users[user]['meta']['movies']['all']
    genre_movies = users[user]['meta']['movies'][genre]
    
    if len(genre_movies)==0: return 0
    
    user_genre_movies_ratings = np.fromiter([get_rating(ratings, user, movie) for movie in genre_movies], float)
    
    return sum(user_genre_movies_ratings)/d


In [140]:
def user_genre_preference(ratings, users, movies, genres, user, genre):
    
    gf = user_genre_frequency(ratings, users, user, genre)
    
    igf = inverse_genre_frequency(movies, genres, genre)
    
    return gf*igf

### dictionary of genre prefrences

In [158]:
ug_prefs={}
ug_prefs.setdefault('data',{})
ug_prefs.setdefault('meta',{})

for user in users:
    ug_weight=0
    ug_prefs['data'].setdefault(user, {})
    ug_prefs['data'][user].setdefault('data',{})
    ug_prefs['data'][user].setdefault('meta',{})
    wmax=0
    user_genres=0
    
    for genre in genres_dict.keys():
        ug_prefs['data'][user]['meta'].setdefault('preference_weight', 0)
        
        ug_prefs['meta'].setdefault(genre, {})
        ug_prefs['meta'][genre].setdefault('appearances', 0)
        ug_prefs['meta'][genre].setdefault('preference_weight', 0)
        
        pref = user_genre_preference(user_ratings, users, movies_dict, genres_dict, user, genre)

        if pref>0: 
            ug_prefs['meta'][genre]['appearances']+=1
            user_genres+=1
        
        if wmax<pref:
            ug_prefs['data'][user]['meta']['fav_genre']=genre
            wmax=pref
        
        ug_prefs['data'][user]['data'][genre]=pref
        ug_prefs['data'][user]['meta']['preference_weight']+=pref

        ug_prefs['meta'][genre]['preference_weight']+=pref
    
    for genre,weight in ug_prefs['data'][user]['data'].items():
        ug_prefs['data'][user]['data'][genre]=weight/ug_prefs['data'][user]['meta']['preference_weight']
    
    ug_prefs['data'][user]['meta']['genres_rated']=user_genres
    

In [159]:
saveJson(ug_prefs, './user_genre_preferences.json')

In [148]:
def get_genre_preference(ug_prefs, user, genre):
    return ug_prefs['data'][user]['data'][genre]

In [176]:
def movie_genre_weight(movie_ratings, movies,ug_prefs, genres, movie, genre):
    
    users = movies_ratings[movie].keys()
    
    nm=[get_rating(movie_ratings, movie, user)*get_genre_preference(ug_prefs,user,genre) for user in users]
    
    gc=[1 for g in genres.keys() for user in users if get_genre_preference(ug_prefs, user, g)]
    
    return sum(nm)/sum(gc)*inverse_genre_frequency(movies=movies, genres=genres, genre=genre)

### dictionary of genre weights in movies