In [None]:
import pandas as pd
from apyori import apriori

In [None]:
ratings = pd.read_csv("data/ratings.csv")
movies = pd.read_csv("data/movies.csv")

In [None]:
# group by users and make it a dictionary for fast access of mean rating
rating_mean_per_user_dict = ratings.groupby("userId").rating.mean().to_dict()
# group by movies and convert it to a dataframe (from Series)
rating_mean_per_movie = ratings.groupby("movieId").rating.mean().to_frame()

# add movie infos to dataframe by joining with movies on index
rating_mean_per_movie_joined = rating_mean_per_movie.join(movies.set_index('movieId'))
# create dict from movie infos. Specific movie infos can be accessed with
# rating_mean_per_movie_dict[1]["title"] for example
rating_mean_per_movie_dict = rating_mean_per_movie_joined.to_dict("index")


In [None]:
from association_rules_helper import create_user_ratings_dict_set

user_ratings_dict_set = create_user_ratings_dict_set(ratings, rating_mean_per_movie_dict, rating_mean_per_user_dict)

In [None]:
movie_sets = [value for value in user_ratings_dict_set.values()]

In [None]:
association_rules = apriori(movie_sets, min_support=0.0003, min_confidence=0.37, min_lift=1.6, max_length=2)

In [None]:
import gc

del ratings
del rating_mean_per_movie_joined

gc.collect()

In [None]:
import pickle

#association_results = list(association_rules)
with (open("models/association_results_support_0003_confidence_37.pkl", "rb")) as openfile:
    association_results = pickle.load(openfile)

In [None]:
item_base = association_results[0].ordered_statistics[0].items_base
base = next(iter(item_base))

association_results[0]

In [None]:
import pickle

file_name = 'models/association_results_support_0003_confidence_37.pkl'
#with open(file_name, 'wb') as file:
#    pickle.dump(association_results, file)
len(association_results)

In [None]:
import association_rules_helper
from importlib import reload

reload(association_rules_helper)

recommendationDict = association_rules_helper.create_movies_recommendation_dict(association_results)

In [None]:
association_rules_helper.print_possible_matches(movies,
                                              "pulp fi;ghost in ;John wick;the transporter;Terminator;Fast and furious;lion king;hacksaw; Undisputed; fight club;Prison break; infinity avengers; inception",
                                              recommendationDict)

In [None]:
association_rules_helper.print_possible_matches(movies,
                                              "avatar;american pie;hobbit;top gun; karate kid",
                                              recommendationDict,
                                              3)

In [None]:
len(recommendationDict.keys())
#movies[movies["movieId"].isin(recommendationDict.keys())]

In [None]:
def filter_and_transform(x):
    user_id = x["userId"].iloc[0]
    only_movies_user_enjoyed = x[x["rating"] >= rating_mean_per_user_dict[user_id]]
    return list(zip(only_movies_user_enjoyed["movieId"], only_movies_user_enjoyed["rating"]))


if 'ratings' in locals():
    user_ratings_enjoyed_dict = ratings.groupby("userId").apply(filter_and_transform).to_dict()
else:
    user_ratings_enjoyed_dict = pd.read_csv("data/ratings.csv").groupby("userId").apply(filter_and_transform).to_dict()

In [None]:
import numpy as np

number_of_recommendations = 5
users_with_error = 0
users = 0

for user_id in user_ratings_enjoyed_dict.keys():
    user_movies_and_ratings = list(zip(*user_ratings_enjoyed_dict[user_id]))
    movie_ids = user_movies_and_ratings[0]
    ratings = user_movies_and_ratings[1]
    movies_and_ratings_df = pd.DataFrame({"movieId": movie_ids, "rating": ratings})

    # user has only 1 movie above average
    if len(movies_and_ratings_df) < 2:
        continue

    error_list = [
        association_rules_helper.calculate_prediction_error(user_movie_id, movies_and_ratings_df,
                                   recommendationDict, number_of_recommendations)
        for user_movie_id in movie_ids if user_movie_id in recommendationDict]

    recommendations_match = len(np.where(np.asarray(error_list) < 1.0)[0])
    users_with_error += 0 if recommendations_match > 0 else 1
    users += 1

    print("For %d out of %d user movies at least 1 out of %d recommendations matched" %
          # Where error is not maximum -> at least one recommendation
          (recommendations_match,
           len(error_list),
           number_of_recommendations))

In [None]:
print(
    "Users checked: %d\nUsers with no matching recommendations: %d\nin %%: %.2f%% of users don't have at least one matching recommendation." % (
        users, users_with_error, 100 * users_with_error / users))

In [None]:
association_rules_helper.print_possible_matches(movies, "insidious", recommendationDict)

In [17]:
import association_rules_helper
from importlib import reload
reload(association_rules_helper)

extended_recommendation_dict = association_rules_helper.extend_recommendation_dict_both_ways(recommendationDict, 5)

In [None]:
association_rules_helper.print_possible_matches(movies, "Insidious chapter 3 2015", recommendationDict, 5)
association_rules_helper.print_possible_matches(movies, "Insidious chapter 3 2015", extended_recommendation_dict, 5)

In [None]:
print(len(extended_recommendation_dict.keys()), len(recommendationDict.keys()))

In [20]:

[disliked_movie.movieId for disliked_movie in extended_recommendation_dict[596][:5]]

[3759, 85316, 3964, 2102, 7302]