In [8]:
import pandas as pd
import numpy as np

In [12]:
# Load preference data
preference_df = pd.read_csv("synthetic-dataset/preference.csv")
user_item_matrix = preference_df.pivot(index='user_id', columns='activity_id', values='preference').fillna(0)

# Adjust preference values
user_item_matrix[user_item_matrix == -1] = np.nan

user_item_matrix.head()


activity_id,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,0.481939,,,,,,,,...,,,,,,,0.222352,,,
1,,,,,0.285516,,,,,,...,,,,,0.071151,0.457692,,,,
2,,,,,,,,0.943149,,,...,0.822066,,,,,0.201957,,,,
3,,,,0.263499,,0.159453,,,,,...,0.236727,,,,0.354222,,,,,
4,,,,,,0.371497,,,,,...,,,,,,,,,,


In [14]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate user-user similarity matrix
user_similarity_matrix = cosine_similarity(user_item_matrix.fillna(0))

# Function to predict preferences using collaborative filtering
def predict_preferences(user_id, activity_id):
    # Find similar users
    similar_users = user_similarity_matrix[user_id - 1]  # user_id starts from 1
    # Get preferences of similar users for the activity
    similar_users_preferences = user_item_matrix.loc[:, activity_id] * similar_users
    # Calculate predicted preference
    predicted_preference = similar_users_preferences.sum() / (similar_users + 1e-9)
    return predicted_preference

# Example usage: Predict preference for user 1 and activity 5
predicted_preference = predict_preferences(1, 5)
print("Predicted preference:", predicted_preference)


Predicted preference: [2.18847527e-01 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 1.31247286e+00 1.96923074e+00 3.66099622e-01
 4.67446208e-01 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08 2.60393553e-01 2.18847528e+08
 2.18847528e+08 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08 5.74886023e-01 8.27644892e-01
 8.07180020e+01 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08 2.18847528e+08 2.18847528e+08
 1.29847187e+00 2.18847528e+08 2.18847528e+08 2.18847528e+08
 2.18847528e+08 2.18847528e+08]


In [15]:
from sklearn.metrics import mean_squared_error

# Predict preferences for all user-item pairs
all_predictions = np.zeros_like(user_item_matrix.values)
for i in range(user_item_matrix.shape[0]):
    for j in range(user_item_matrix.shape[1]):
        all_predictions[i, j] = predict_preferences(i+1, j)

# Calculate MSE
mse = mean_squared_error(user_item_matrix.values.flatten(), all_predictions.flatten())
print("Mean Squared Error:", mse)


ValueError: setting an array element with a sequence.