In [70]:
from collections import Counter
import sqlite3
import pandas as pd
import json
from datetime import datetime
import numpy as np

def gini_index(array):
    """Calculate the Gini index of a numpy array."""
    array = np.array(array)
    if array.ndim == 0:
        return 0
    array = array.flatten()
    if np.amin(array) < 0:
        array -= np.amin(array)
    array += 0.0000001  # to avoid division by zero
    array = np.sort(array)
    index = np.arange(1, array.shape[0] + 1)
    n = array.shape[0]
    return ((np.sum((2 * index - n - 1) * array)) / (n * np.sum(array)))

# Connect to the SQLite database
conn = sqlite3.connect('./instance/db.sqlite')

# Create a cursor object
cursor = conn.cursor()

# Get today's date
border = datetime.fromisoformat('2024-09-03').strftime('%Y-%m-%d')

# Query the interactions table for rows with interaction_type 'iteration-started' from today
query = """
SELECT data FROM interaction
WHERE interaction_type = 'iteration-started' AND date(time) > ?
"""
cursor.execute(query, (border,))
rows = cursor.fetchall()

# Close the connection
conn.close()

# Extract and parse the data
data_list = [json.loads(row[0]) for row in rows]
#print(data_list[0])

# Initialize dictionaries to store the shown counts for each algorithm
shown_counts = {'EASE': [], 'CNRS': [], 'CNDRS': [], 'CNFRS': [], 'CNDFRS': []}
shown_genres = {'EASE': [], 'CNRS': [], 'CNDRS': [], 'CNFRS': [], 'CNDFRS': []}

# Extract the shown field for each algorithm
for data in data_list:
    for algorithm in shown_counts.keys():
        if algorithm in data['movies']:
            shown_counts[algorithm].extend([float(item['movie_idx']) for item in data['movies'][algorithm]['movies']])
            shown_genres[algorithm].extend([genre for movie in data['movies'][algorithm]['movies'] for genre in movie['genres']])
            
#print(shown_counts)
#print(shown_genres)

genre_to_id = {}
current_id = 0.0
for algorithm, genres in shown_genres.items():
    for genre in genres:
        if genre not in genre_to_id:
            genre_to_id[genre] = current_id
            current_id += 1.0
    shown_genres[algorithm] = [genre_to_id[genre] for genre in genres]

# Calculate the Gini index for each algorithm
gini_indices = {algorithm: gini_index(shown_counts[algorithm]) for algorithm in shown_counts.keys()}
gini_indices_genres = {algorithm: gini_index(shown_genres[algorithm]) for algorithm in shown_genres.keys()}

# Print the Gini index for each algorithm
for algorithm, gini in gini_indices.items():
    print(f"{algorithm} - Gini Index: {gini:.4f}")
    
for algorithm, gini in gini_indices_genres.items():
    print(f"{algorithm} - Gini Index for genres: {gini:.4f}")

EASE - Gini Index: 0.3300
CNRS - Gini Index: 0.3569
CNDRS - Gini Index: 0.3024
CNFRS - Gini Index: 0.3379
CNDFRS - Gini Index: 0.3407
CNDRS - shift 0.2 - Gini Index: 0.3133
CNDFRS - shift 0.2 - Gini Index: 0.3479
EASE - Gini Index for genres: 0.3338
CNRS - Gini Index for genres: 0.4205
CNDRS - Gini Index for genres: 0.4338
CNFRS - Gini Index for genres: 0.3878
CNDFRS - Gini Index for genres: 0.3719
CNDRS - shift 0.2 - Gini Index for genres: 0.4208
CNDFRS - shift 0.2 - Gini Index for genres: 0.3874


In [71]:
conn = sqlite3.connect('./instance/db.sqlite')

# Create a cursor object
cursor = conn.cursor()

# Get today's date
border = datetime.fromisoformat('2024-09-03').strftime('%Y-%m-%d')

# Query the interactions table for rows with interaction_type 'iteration-started' from today
query = """
SELECT data FROM interaction
WHERE interaction_type = 'iteration-ended' AND date(time) > ?
"""
cursor.execute(query, (border,))
rows = cursor.fetchall()

# Close the connection
conn.close()

data_list = [json.loads(row[0]) for row in rows]
print(data_list[len(data_list)-1])

{'iteration': 5, 'selected': [[1174, 921, 1405, 1044, 1411, 1378, 60, 1178, 996], [314, 949, 292, 814, 77, 1534, 469, 1631, 610, 980, 504, 231, 357], [293, 1239, 1451, 1372, 327, 927, 628, 1104, 218], [86, 1041, 0, 1047, 202, 1296, 112, 1561, 1179]], 'selected_variants': [[0, 0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 1, 1, 1]], 'dont_like_anything': [False, False, False, False], 'algorithm_comparison': ['third', 'fourth', 'second', 'fourth'], 'ratings': [{'CNDRS - shift 0.2': 3, 'CNDFRS - shift 0.2': 3}, {'CNDRS - shift 0.2': 4, 'CNDFRS - shift 0.2': 3}, {'CNDRS - shift 0.2': 2, 'CNDFRS - shift 0.2': 3}, {'CNDRS - shift 0.2': 2, 'CNDFRS - shift 0.2': 3}]}


In [72]:
all_recommended_items = {algorithm: [] for algorithm in shown_counts.keys()}
all_selected_items = []

for data in data_list:
    all_selected_items.extend(data['selected'])
    
all_selected_items = set(np.hstack(np.array(all_selected_items, dtype=object)))

all_recommended_items = shown_counts
for algorithm, items in all_recommended_items.items():
    all_recommended_items[algorithm] = set(np.hstack(np.array(list(int(a) for a in items), dtype=object)))


In [73]:
def calculate_recall(recommended_items, selected_items):
    return len(recommended_items.intersection(selected_items)) / len(selected_items)

def calculate_precision(recommended_items, selected_items):
    return len(recommended_items.intersection(selected_items)) / len(recommended_items)

def calculate_f1_score(precision, recall):
    return 2 * (precision * recall) / (precision + recall)

min_recommended_items_count = min([len(items) for items in all_recommended_items.values()])

for algorithm in all_recommended_items.keys():
    recall = calculate_recall(set(list(all_recommended_items[algorithm])[:min_recommended_items_count]), all_selected_items)
    precision = calculate_precision(set(list(all_recommended_items[algorithm])[:min_recommended_items_count]), all_selected_items)
    f1_score = calculate_f1_score(precision, recall)
    print(f"{algorithm} - Recall: {recall:.4f}, Precision: {precision:.4f}, F1 Score: {f1_score:.4f}")

EASE - Recall: 0.2311, Precision: 0.9266, F1 Score: 0.3700
CNRS - Recall: 0.2311, Precision: 0.9266, F1 Score: 0.3700
CNDRS - Recall: 0.2082, Precision: 0.8349, F1 Score: 0.3333
CNFRS - Recall: 0.2403, Precision: 0.9633, F1 Score: 0.3846
CNDFRS - Recall: 0.2174, Precision: 0.8716, F1 Score: 0.3480
CNDRS - shift 0.2 - Recall: 0.2197, Precision: 0.8807, F1 Score: 0.3516
CNDFRS - shift 0.2 - Recall: 0.2265, Precision: 0.9083, F1 Score: 0.3626


In [80]:
import pandas as pd
items_popular = pd.read_csv('/Users/egorulanov/Studies/RecSys/EasyStudy/server/ratings_200_df.csv', header=0, index_col=0).to_dict()

items_high_rated = pd.read_csv('/Users/egorulanov/Studies/RecSys/EasyStudy/server/ratings_200_highest_df.csv', header=0, index_col=0).to_dict()

for algorithm in all_recommended_items.keys():
    algorithm_rec_items = set(list(all_recommended_items[algorithm])[:min_recommended_items_count])
    popular_items = set([int(item) for item in items_popular['count'].keys()])
    intersection_popular = algorithm_rec_items.intersection(popular_items)
    high_rated_items = set([int(item) for item in items_high_rated['rating'].keys()])
    intersection_high_rated = algorithm_rec_items.intersection(high_rated_items)
    print(f"{algorithm} - Popular items in recommendations: {len(intersection_popular)}")
    print(f"{algorithm} - High rated items in recommendations: {len(intersection_high_rated)}")


EASE - Popular items in recommendations: 61
EASE - High rated items in recommendations: 61
CNRS - Popular items in recommendations: 28
CNRS - High rated items in recommendations: 27
CNDRS - Popular items in recommendations: 14
CNDRS - High rated items in recommendations: 13
CNFRS - Popular items in recommendations: 49
CNFRS - High rated items in recommendations: 49
CNDFRS - Popular items in recommendations: 35
CNDFRS - High rated items in recommendations: 35
CNDRS - shift 0.2 - Popular items in recommendations: 21
CNDRS - shift 0.2 - High rated items in recommendations: 20
CNDFRS - shift 0.2 - Popular items in recommendations: 44
CNDFRS - shift 0.2 - High rated items in recommendations: 45
