In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix

In [2]:
df = pd.read_csv('datasets/archive/recommendations.csv')

In [None]:
df.head()

In [3]:
from scipy.sparse import csr_matrix

# Assuming df is your DataFrame
sample_size = 50000  # Adjust the sample size as needed

# Randomly select a subset of users
random_users = df['user_id'].sample(n=sample_size, random_state=42)

# Filter the DataFrame to include only the selected users
df_subset = df[df['user_id'].isin(random_users)]

# Create a user-item matrix from the subset
user_u = list(sorted(df_subset['user_id'].unique()))
item_u = list(sorted(df_subset['app_id'].unique()))

row = df_subset['user_id'].astype('category').cat.codes
col = df_subset['app_id'].astype('category').cat.codes

data = df_subset['is_recommended'].astype(int).tolist()

user_item_matrix = csr_matrix((data, (row, col)), shape=(len(user_u), len(item_u)))
# print(random_users)

In [None]:
len(df_subset)

In [None]:
len(item_u)

In [None]:
user_item_matrix.shape

In [None]:
print(user_item_matrix[:10, :10].toarray())

In [4]:
from sklearn.metrics.pairwise import cosine_similarity

user_similarity = cosine_similarity(user_item_matrix, dense_output=False)

In [29]:
print(user_similarity.shape)

(49442, 49442)


In [110]:
def get_top_k_neighbors(user_similarity_matrix, target_user_idx, k):
    # Extract the row of cosine similarities for the target user
    target_similarities = user_similarity_matrix[target_user_idx, :]

    # Use numpy's argpartition to get the indices of the top-K (excluding the target user)
    top_k_indices = np.argsort(target_similarities.data)[-k:][::-1]
    top_k_similarities = target_similarities.data[top_k_indices]

    print(top_k_indices)
    print(top_k_similarities)

    return top_k_indices, top_k_similarities

target_user_idx = 6  # Replace with the index of your target user
k_neighbors = 100  # Adjust the number of neighbors as needed

top_k_neighbors, top_k_similarities = get_top_k_neighbors(user_similarity, target_user_idx, k_neighbors)
print("Top K Neighbors:", top_k_neighbors)
print("Top K Similarities:", top_k_similarities)


[3692 2622 2228 1554  178 1584 2613 1579 1568  757 1557 1551  750  765
 3433 1523 1517  769 1500 3549  772 1480 3204 2583 3163 2581 1764  705
 1751 2502 1744 2518 1708 1703  149 1697 1008  151  156  159  397 1654
 1652  747 1625 3182 1437 1775 3511 2862  889  343  894  902 3080 1183
 1164 2925 1126 2949 3463  283 1062  315 1057  977 1032 3033 1238 1249
  817 2850 1432 2718 2736 1394 2745  222 3373 1360 2766 3145 1346 2775
 2790 1292 1287 3130 1270 1258 2846  134 1689 3652 3417 2019 2018 3661
  476 2011]
[1.         0.70710678 0.57735027 0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5        0.5        0.5        0.5        0.5        0.5
 0.5       

In [111]:
def predict_ratings(user_item_matrix, top_k_neighbors, top_k_similarities):
    # Get the ratings of the top-K neighbors
    neighbor_ratings = user_item_matrix[top_k_neighbors, :]
    print(neighbor_ratings)
    
    # Weighted sum of neighbor ratings based on similarity
    non_weighted_sum = neighbor_ratings.sum(axis=0)
    print(type(neighbor_ratings))
    print(non_weighted_sum)
    # print(weighted_sum.shape)

    # Sum of absolute similarities for normalization
    abs_sim_sum = np.sum(np.abs(top_k_similarities))

    # Predicted ratings for the target user
    # predicted_ratings = non_weighted_sum / abs_sim_sum

    return non_weighted_sum

predicted_ratings = predict_ratings(
                        user_item_matrix, 
                        top_k_neighbors, 
                        top_k_similarities)

  (0, 20)	1
  (0, 1136)	1
  (0, 1231)	1
  (0, 1883)	1
  (0, 4090)	1
  (0, 4520)	1
  (0, 23485)	0
  (1, 5065)	1
  (1, 9406)	1
  (2, 2505)	1
  (2, 7098)	1
  (2, 14765)	1
  (2, 17418)	1
  (3, 1581)	1
  (3, 8128)	1
  (4, 606)	1
  (4, 1501)	0
  (4, 5976)	1
  (4, 7127)	1
  (4, 8293)	0
  (4, 13048)	1
  (4, 18002)	1
  (4, 19404)	0
  (4, 19653)	0
  (4, 20789)	1
  :	:
  (97, 15724)	0
  (97, 16338)	1
  (97, 19230)	1
  (97, 19234)	1
  (97, 19358)	1
  (97, 20086)	0
  (97, 20135)	1
  (97, 20466)	1
  (97, 20468)	1
  (97, 20974)	1
  (97, 21281)	1
  (97, 22206)	1
  (97, 22537)	1
  (98, 250)	0
  (98, 1231)	1
  (98, 1389)	0
  (98, 1487)	1
  (98, 1510)	1
  (98, 1581)	1
  (98, 2040)	1
  (98, 2688)	1
  (98, 3352)	0
  (98, 9266)	0
  (98, 10155)	1
  (99, 1506)	1
<class 'scipy.sparse._csr.csr_matrix'>
[[1 0 0 ... 0 0 0]]


In [112]:
predicted_ratings.max()

7

In [120]:
# Example: Get top N recommendations
N = 10
top_recommendations = np.argsort(predicted_ratings.data)[::-1][:N]
print("Top Recommendations:", top_recommendations)


Top Recommendations: [[14346 19066 19065 ...  4705    16  1231]]


In [122]:
rec_indices = top_recommendations.tolist()

In [141]:
from operator import itemgetter

games = itemgetter(*rec_indices[0])(item_u)

In [145]:
import pickle

id_to_title = pd.read_pickle("datasets/id_to_title.pkl")

In [146]:
id_to_title

{13500: 'Prince of Persia: Warrior Within™',
 22364: 'BRINK: Agents of Change',
 113020: "Monaco: What's Yours Is Mine",
 226560: 'Escape Dead Island',
 249050: 'Dungeon of the ENDLESS™',
 250180: 'METAL SLUG 3',
 253980: 'Enclave',
 271850: 'Men of War: Assault Squad 2 - Deluxe Edition upgrade',
 282900: 'Hyperdimension Neptunia Re;Birth1',
 19810: 'The Sum of All Fears',
 15270: 'Cold Fear™',
 21130: 'LEGO® Harry Potter: Years 1-4',
 22130: 'Hearts of Iron 2 Complete',
 29180: 'Osmos',
 32750: 'Comanche 4',
 241620: 'Inquisitor',
 408520: 'FORM',
 244910: 'Homesick',
 245950: 'Borderlands 2: Headhunter 4: Wedding Day Massacre',
 250460: 'Bridge Constructor',
 278890: 'Angvik',
 305181: 'Sniper Elite 3 - Camouflage Weapons Pack',
 312200: 'Chasm',
 321290: 'Dandelion - Wishes brought to you -',
 329640: 'Eradicator',
 367670: 'Controller Companion',
 380810: 'Herald: An Interactive Period Drama - Book I & II',
 392330: 'Take Command - 2nd Manassas',
 437000: 'GUILTY GEAR 2 -OVERTURE-'

In [147]:
for game in games[:10]:
    print(id_to_title[game])

Casual Penalty
Dark Parables: Portrait of the Stained Princess Collector's Edition
MotoGP™20
Noosphere
Shisensho Solitaire
Freud Gate
Natsu no Iro no Nostalgia
The Rewinder
Talesshop Puzzle
Food Truck Simulator


: 

In [None]:
print(top_k_neighbors)

In [None]:
print("User Similarity Matrix (Subset):")
print(user_similarity[:5, :5].toarray())