<a href="https://colab.research.google.com/github/Vishu31k/IML-Project/blob/main/RECOMMENDATION_SYSTEM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.sparse.linalg import svds
import numpy as np

# Sample dataset (replace with your actual data)
# This is a small example; real datasets are much larger.
data = {'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
        'item_id': [101, 102, 103, 101, 104, 105, 102, 103, 105, 101, 104, 106],
        'rating': [5, 3, 4, 4, 5, 2, 5, 4, 3, 3, 4, 5]}
df = pd.DataFrame(data)

print("Original Data:")
print(df)

# Create a user-item matrix
# Get all unique users and items
users = df['user_id'].unique()
items = df['item_id'].unique()

# Create a pivot table (user-item matrix)
# Fill missing values with 0 (or another suitable placeholder)
user_item_matrix = df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

print("\nUser-Item Matrix:")
print(user_item_matrix)

# Convert the user-item matrix to a numpy array
R = user_item_matrix.values

# Apply Singular Value Decomposition (SVD)
# k is the number of latent factors (tune this parameter)
k = min(R.shape) - 1 # Choose k less than min dimensions for dense matrices
if k <= 0: # Handle cases where k might be non-positive for small matrices
    k = 1
U, sigma, Vt = svds(R, k=k)

# Convert sigma to a diagonal matrix
sigma = np.diag(sigma)

# Reconstruct the original matrix (approximation)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Convert the reconstructed matrix back to a DataFrame
predicted_ratings_df = pd.DataFrame(all_user_predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

print("\nPredicted Ratings Matrix:")
print(predicted_ratings_df)

# Function to recommend items for a user
def recommend_items(user_id, num_recommendations=5):
    # Get the user's predicted ratings
    user_row_number = user_item_matrix.index.get_loc(user_id)
    sorted_user_predictions = predicted_ratings_df.iloc[user_row_number].sort_values(ascending=False)

    # Get items the user has already rated
    user_data = df[df['user_id'] == user_id]
    rated_items = user_data['item_id'].tolist()

    # Recommend the top items the user hasn't rated yet
    recommendations = sorted_user_predictions[~sorted_user_predictions.index.isin(rated_items)].head(num_recommendations)

    return recommendations

# Example recommendation for user 1
user_id_to_recommend = 1
recommendations_for_user = recommend_items(user_id_to_recommend)
print(f"\nTop recommendations for User {user_id_to_recommend}:")
print(recommendations_for_user)

# Evaluation (for demonstration purposes on the training data)
# In a real scenario, you would evaluate on a separate test set
# However, for this small example and to show the process, we evaluate on the whole dataset
# Flatten the original ratings and predicted ratings for comparison
original_ratings = df['rating'].values
# Get the corresponding predicted ratings for the items in the original dataframe
# This requires mapping the original df rows to the predicted_ratings_df
predicted_ratings_list = []
for index, row in df.iterrows():
    user = row['user_id']
    item = row['item_id']
    predicted_rating = predicted_ratings_df.loc[user, item]
    predicted_ratings_list.append(predicted_rating)

predicted_ratings_array = np.array(predicted_ratings_list)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(original_ratings, predicted_ratings_array))
print(f"\nRoot Mean Squared Error (RMSE) on the data used for SVD: {rmse:.2f}")

Original Data:
    user_id  item_id  rating
0         1      101       5
1         1      102       3
2         1      103       4
3         2      101       4
4         2      104       5
5         2      105       2
6         3      102       5
7         3      103       4
8         3      105       3
9         4      101       3
10        4      104       4
11        4      106       5

User-Item Matrix:
item_id  101  102  103  104  105  106
user_id                              
1        5.0  3.0  4.0  0.0  0.0  0.0
2        4.0  0.0  0.0  5.0  2.0  0.0
3        0.0  5.0  4.0  0.0  3.0  0.0
4        3.0  0.0  0.0  4.0  0.0  5.0

Predicted Ratings Matrix:
item_id       101       102       103       104       105       106
user_id                                                            
1        4.734861  2.982413  3.700779  0.731918  0.926397 -1.010584
2        4.458330  0.030401  0.517244  3.734777  0.398593  1.746936
3        0.153851  5.010205  4.173627 -0.424706  2.462445  0.5