## Load processed metadata combined csv so we can start creating the system

In [1]:
import pandas as pd

qualified_movies_df = pd.read_csv('.\\data\\processed_movies\\qualified_all_metadata.csv')

## Create the two vectorizers needed for the system

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

"""make vectorizers"""
tf_vectorizer = TfidfVectorizer(analyzer='word', lowercase=True, use_idf=False,
                                stop_words='english', ngram_range=(1, 1), token_pattern=r'[a-zA-Z0-9+_]+',
                                strip_accents='unicode')
tf_vectorizer_matrix = tf_vectorizer.fit_transform(qualified_movies_df['all_metadata'])

tfidf_vectorizer = TfidfVectorizer(analyzer='word', lowercase=True,
                                   stop_words='english', ngram_range=(1, 1), token_pattern=r'[a-zA-Z0-9+_]+',
                                   strip_accents='unicode')
tfidf_vectorizer_matrix = tfidf_vectorizer.fit_transform(qualified_movies_df['all_metadata'])

## Save matrices that we will need to evaluate with

In [3]:
import pickle

with open('.\\serialized_objects\\user_based\\tf_vectorizer_matrix.pkl', 'wb') as f:
    pickle.dump(tf_vectorizer_matrix, f)
    
with open('.\\serialized_objects\\user_based\\tfidf_vectorizer_matrix.pkl', 'wb') as f:
    pickle.dump(tfidf_vectorizer_matrix, f)

## Evaluate system accuracy/precision

In [3]:
from datetime import datetime
from utils.database_operations import DatabaseOperations

"""create db operations object"""
db_operations = DatabaseOperations()

In [4]:
"""VARIABLES"""

"""Minimum and maximum ratings count for user to use him"""
min_ratings_threshold = 20
max_ratings_threshold = 100

list_of_percentage_accuracy = []
list_of_percentage_precision = []
count_passed_users = 0

start_time = datetime.now()

In [5]:
from utils.recommendation_handler_external_user import RecommendationHandlerExternalUser

for i in range(1, 270000):
    movie_ids_ratings = db_operations.get_movies_ids_ratings_by_user_id(user_id=i)
    if min_ratings_threshold < len(movie_ids_ratings) < max_ratings_threshold:

        """Create object from class"""
        rec_handler = RecommendationHandlerExternalUser(external_users_ratings=movie_ids_ratings)
        count_passed_users += 1
        """call evaluate method"""
        accuracy_percentage, precision_percentage = rec_handler.evaluate_system()
        if accuracy_percentage and precision_percentage:
            list_of_percentage_accuracy.append(accuracy_percentage)
            list_of_percentage_precision.append(precision_percentage)

    """print every 100 iterations"""
    if i % 100 == 0 and len(list_of_percentage_accuracy) > 0:
        print('Average percentage accuracy for {} users: {}'.format(count_passed_users,
                                                                    sum(list_of_percentage_accuracy) / len(
                                                                        list_of_percentage_accuracy)))
        print('Average percentage precision for {} users: {}'.format(count_passed_users,
                                                                     sum(list_of_percentage_precision) / len(
                                                                         list_of_percentage_precision)))
        print('Time passed: {}'.format(datetime.now() - start_time))
        print('-----------------------------------------------')

db_operations.close_session()

Average percentage accuracy for 40 users: 0.8995312500000001
Average percentage precision for 40 users: 0.16066535572562363
Time passed: 0:00:08.036189
-----------------------------------------------


Average percentage accuracy for 82 users: 0.8885503472222223
Average percentage precision for 82 users: 0.17041268660241876
Time passed: 0:00:16.036429
-----------------------------------------------


Average percentage accuracy for 113 users: 0.8861566924066925
Average percentage precision for 113 users: 0.16223130443368547
Time passed: 0:00:22.846123
-----------------------------------------------
