In [4]:
import pandas as pd
import os
import numpy as np
import random

base_path : str = os.path.dirname(os.getcwd())
CSV_PATH  : str = base_path + '\\csv'
SRC_PATH  : str = base_path + '\\src'
JSON_PATH : str = base_path + '\\json'

In [5]:
user_rating = pd.read_json(JSON_PATH + r'\USER_RATING_DATA.json')

In [7]:
USER_HYPER_PARAMS = pd.read_json(JSON_PATH + r'\USER_HYPER_PARAMS.json')

In [8]:
movie_distance = pd.read_json(JSON_PATH + r'\movie_distance_graph.json')

In [None]:
def decision_tree(movie_id, all_distances, training_ids, rating_data, threshold) -> int:

    """
        Dla movie_id wyszukujemyt filmy, które oddalone są od movie_id o mniej niż THRESHOLD.
        Jeśli taki jest, to zapamiętujemy jego ocenę, a potem ze wszystkich wyciątgamy średnią.
    """
    
    decisions: list = []

    for rated_movie in training_ids:

        feature_counter: int = 0

        for feat_dist_i, feature_distance in enumerate(all_distances[movie_id][int(rated_movie)]):

            if feature_distance < threshold[feat_dist_i]:

                feature_counter += 1

        if feature_counter == 4:

            decisions.append(rating_data[str(rated_movie)])


    return int(np.ceil(np.average(decisions))) if len(decisions) > 0 else 0

def test_user(user, best_weights, predict_ids, train_ids, num_of_trees) -> tuple[list, list]:

    from collections import Counter

    def manipulate_weights(weights, min_percent=0.95, max_percent=1.05):
        import random
      
        zero_index = random.randint(0, len(weights) - 1)
        
        new_weights = [
            0 if i == zero_index else round(w * random.uniform(min_percent, max_percent), 2)
            for i, w in enumerate(weights)
        ]
        
        return new_weights

    def select_random_subset(training_ids_inner, percentage):
       
        import random

        num_items_to_select = int(len(training_ids_inner) * percentage)
        
        selected_subset = random.sample(training_ids_inner, num_items_to_select)
        
        return selected_subset
    
    ratings: list = []
    movies:  list = []

    for predict_movie in predict_ids:

        temp_rating_list: list = []

        for _ in range(num_of_trees):

            weights_tree = manipulate_weights(best_weights)

            training_ids_tree = select_random_subset(training_ids_inner=train_ids, percentage=0.9)

            output = decision_tree(movie_id=int(predict_movie), all_distances=movie_distance, training_ids=training_ids_tree, rating_data=user_rating["RATED"][user], threshold=weights_tree)
            temp_rating_list.append(output)

        count = Counter(temp_rating_list)
        movie_rating = count.most_common(1)[0][0]

        ratings.append(movie_rating)
        movies.append(predict_movie)

    return (movies, ratings)

In [28]:
from json import dump, load
user_rating_data_predicted : list = []
user_rating_data_movies: list = []

NUM_OF_USERS : int = len(user_rating)
# NUM_OF_USERS : int = 1

for user in range(NUM_OF_USERS):
    
    user_id :    int  = user_rating['USER_ID'][user]
    best_weight: list = USER_HYPER_PARAMS[user_id]
    preditc_ids: list = list(user_rating['NAN_RATED'][user].keys())
    train_ids:   list = list(user_rating['RATED'][user].keys())

    returned_movies, returned_reviews = test_user(user=user, best_weights=best_weight, predict_ids=preditc_ids, train_ids=train_ids, num_of_trees=100)
    
    user_rating_data_movies.append(returned_movies)
    user_rating_data_predicted.append(returned_reviews)


user_test_data = {
    user_rating['USER_ID'][i]: {
       'MOVIES' : [str(int(movie_id) + 1) for movie_id in user_rating_data_movies[i]],
       'RATINGS' : user_rating_data_predicted[i]
    }
    for i in range(NUM_OF_USERS)
}

user_test_data_df = pd.DataFrame(user_test_data)
user_test_data_df.to_json(JSON_PATH + '\\USER_PREDICTED_RATINGS.json', indent=4)

with open(JSON_PATH + '\\USER_PREDICTED_RATINGS.json', 'r') as f:
    data = load(f)

user_test_data = {}
for user_id, ratings_data in data.items():
    movies = ratings_data['MOVIES']
    ratings = ratings_data['RATINGS']
    
    user_test_data[user_id] = {movie: rating for movie, rating in zip(movies, ratings)}


with open(JSON_PATH + '\\USER_PREDICTED_RATINGS_TRANSFORMED.json', 'w') as f:
    dump(user_test_data, f, indent=4)