In [56]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import ndcg_score


In [57]:
# Load the data into a pandas DataFrame
data = pd.read_csv(r'D:\Recommender Systems\Arbeit\project\implicit_data.csv')

print(data.head())
print(data.columns)



   user_id  item_id  implicit_feedback
0      196      242                  1
1      186      302                  1
2       22      377                  1
3      244       51                  1
4      166      346                  1
Index(['user_id', 'item_id', 'implicit_feedback'], dtype='object')


In [58]:
# Split the data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [59]:
# Creating a User-item matrix
# KNN Recommender
user_item_matrix = train_data.pivot(index='user_id', columns='item_id', values='implicit_feedback').fillna(0)

In [60]:
# Fit the Nearest Neighbors model
k_neighbors = 25  # You can adjust this parameter
knn_model = NearestNeighbors(n_neighbors=k_neighbors, metric='cosine', algorithm='brute')
knn_model.fit(user_item_matrix.values)

In [69]:
# Matrix Factorization
user_item_matrix_scaled = StandardScaler().fit_transform(user_item_matrix)
svd_model = TruncatedSVD(n_components=20, n_iter=100, random_state=42)  # Adjusted number of components
svd_matrix = svd_model.fit_transform(user_item_matrix_scaled)



In [70]:
knn_predictions = []
svd_predictions = []

for _, row in test_data.iterrows():
    user = row['user_id']
    item = row['item_id']

    # KNN prediction
    if user in user_item_matrix.index and item in user_item_matrix.columns:
        user_index = user_item_matrix.index.get_loc(user)
        _, neighbor_indices = knn_model.kneighbors([user_item_matrix.iloc[user_index].values], n_neighbors=k_neighbors)
        knn_prediction = user_item_matrix.iloc[neighbor_indices.flatten()].mean(axis=0)[item]
        knn_predictions.append(knn_prediction)
    else:
        knn_predictions.append(np.nan)  # Handle missing values

    # SVD prediction
    if user in user_item_matrix.index and item in user_item_matrix.columns:
        user_embedding = svd_matrix[user_item_matrix.index == user]
        item_embedding = svd_model.components_[:, user_item_matrix.columns == item]
        svd_prediction = (user_embedding @ item_embedding).item()
        svd_predictions.append(svd_prediction)
    else:
        svd_predictions.append(np.nan)  # Handle missing values


In [63]:
# Create new columns for predictions in test_data
test_data['knn_prediction'] = knn_predictions
test_data['svd_prediction'] = svd_predictions


In [64]:
# Assuming your original ratings are in the range [0, 1]
min_rating = 0
max_rating = 1

# Scale the KNN predictions to the rating range
knn_predictions_implicit = (knn_predictions - np.nanmin(knn_predictions)) / (np.nanmax(knn_predictions) - np.nanmin(knn_predictions)) * (max_rating - min_rating) + min_rating

# Scale the SVD predictions to the rating range
svd_predictions_implicit = (svd_predictions - np.nanmin(svd_predictions)) / (np.nanmax(svd_predictions) - np.nanmin(svd_predictions)) * (max_rating - min_rating) + min_rating

# Create new columns for implicit predictions in test_data
test_data['knn_prediction_implicit'] = knn_predictions_implicit
test_data['svd_prediction_implicit'] = svd_predictions_implicit

In [65]:
# Evaluate the models
test_labels = test_data['implicit_feedback'].values

# Filter out NaN values from predictions and true labels
knn_rmse = mean_squared_error(test_labels[~np.isnan(knn_predictions)], [pred for pred in knn_predictions if not np.isnan(pred)], squared=False)
knn_mae = mean_absolute_error(test_labels[~np.isnan(knn_predictions)], [pred for pred in knn_predictions if not np.isnan(pred)])

svd_rmse = mean_squared_error(test_labels[~np.isnan(svd_predictions)], [pred for pred in svd_predictions if not np.isnan(pred)], squared=False)
svd_mae = mean_absolute_error(test_labels[~np.isnan(svd_predictions)], [pred for pred in svd_predictions if not np.isnan(pred)])


In [66]:
# Filter out NaN values from true labels and predictions
valid_indices = ~np.isnan(svd_predictions)
y_true_valid = test_labels[valid_indices]
y_score_valid = np.array(svd_predictions)[valid_indices]

# NDCG for SVD
ndcg = ndcg_score([y_true_valid], [y_score_valid])


In [67]:
# Print results
print(f'KNN RMSE: {knn_rmse:.4f}')
print(f'KNN MAE: {knn_mae:.4f}')

print(f'SVD RMSE: {svd_rmse:.4f}')
print(f'SVD MAE: {svd_mae:.4f}')
print(f'SVD NDCG: {ndcg:.4f}')



KNN RMSE: 0.7375
KNN MAE: 0.7094
SVD RMSE: 0.7761
SVD MAE: 0.6696
SVD NDCG: 1.0000


In [68]:
# Print Implicit rating predictions
print("KNN Implicit Rating Predictions:")
print(test_data[['user_id', 'item_id', 'implicit_feedback', 'knn_prediction_implicit']])

print("\nSVD Implicit Rating Predictions:")
print(test_data[['user_id', 'item_id', 'implicit_feedback', 'svd_prediction_implicit']])

KNN Implicit Rating Predictions:
       user_id  item_id  implicit_feedback  knn_prediction_implicit
23056      431      327                  1                 0.391304
62043      468       55                  1                 0.652174
24332       10      513                  1                 0.521739
39217      851      240                  1                 0.347826
32164       51      144                  1                 0.347826
...        ...      ...                ...                      ...
49981      711       79                  1                 0.652174
58333      884      529                  1                 0.347826
9272       409     1176                  1                 0.000000
51783      447      716                  1                 0.086957
18144      354      604                  1                 0.217391

[13612 rows x 4 columns]

SVD Implicit Rating Predictions:
       user_id  item_id  implicit_feedback  svd_prediction_implicit
23056      431      327