In [None]:
%pip install scikit-surprise pandas



In [15]:
import pandas as pd
import json

# Load user activity data
with open("/content/drive/MyDrive/Phone Recommendation and Similarity/datasets/userActivity_500.json", "r") as f:
    user_activity = json.load(f)

# Convert JSON to a DataFrame
interaction_data = []

for user in user_activity:
    user_id = user["user"]

    # Track purchases in a dictionary { product_id: True } for quick lookup
    purchased_products = {p["product"]: True for p in user["purchasedProducts"]}

    # Add viewed products with a default rating
    for product in user["viewedProducts"]:
        interaction_data.append({
            "user": user_id,
            "product": product,
            "rating": 2.5  # Neutral rating for views
        })

    # Add rated products (keep actual ratings, even if purchased)
    rated_products = {}
    for rating in user["ratings"]:
        rated_products[rating["product"]] = rating["rating"]  # Store user's actual rating

    # Add purchases, but avoid duplicates if already rated
    for product in purchased_products:
        if product in rated_products:
            # If user has already rated, use their rating instead of 4.5
            interaction_data.append({
                "user": user_id,
                "product": product,
                "rating": rated_products[product]
            })
        else:
            # If no rating exists, assign a default rating for purchases
            interaction_data.append({
                "user": user_id,
                "product": product,
                "rating": 3.5
            })

df = pd.DataFrame(interaction_data)

# Check dataset structure
print(df.head())

     user                   product  rating
0  user_1  67c6fbf92680f82968f146a6     2.5
1  user_1  67c6fb322680f82968f14362     2.5
2  user_1  67c6fc002680f82968f146c3     2.5
3  user_1  67c6fb9e2680f82968f14527     2.5
4  user_1  67c6fb412680f82968f1439e     2.5


In [16]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Define rating scale (1 to 5)
reader = Reader(rating_scale=(1, 5))

# Load data into Surprise dataset format
data = Dataset.load_from_df(df[["user", "product", "rating"]], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2)

# Train the SVD model
model = SVD()
model.fit(trainset)

# Evaluate model performance (Root Mean Squared Error - RMSE)
predictions = model.test(testset)
print("Model RMSE:", accuracy.rmse(predictions))

RMSE: 0.6281
Model RMSE: 0.6281119981249477


In [17]:
def get_top_recommendations(user_id, model, df, top_n=5):
    unique_products = df["product"].unique()  # Get all unique product IDs
    predictions = [model.predict(user_id, pid) for pid in unique_products]
    predictions.sort(key=lambda x: x.est, reverse=True)  # Sort by estimated rating

    recommended_products = [pred.iid for pred in predictions[:top_n]]
    return recommended_products

# Get recommendations for a user
recommended = get_top_recommendations("user_1", model, df, top_n=5)
print("Recommended Products:", recommended)

Recommended Products: ['67c6fb282680f82968f14338', '67c6fb512680f82968f143e1', '67c6fbcf2680f82968f145f7', '67c6fba72680f82968f1454c', '67c6fb572680f82968f143fe']


In [None]:
import pickle
# # Save the trained model to a file
with open("/content/drive/MyDrive/Phone Recommendation and Similarity/ML_Model/trained_cf_model.pkl", "wb") as f:
    pickle.dump(model, f)


# Load trained collaborative filtering model
with open("/content/drive/MyDrive/Phone Recommendation and Similarity/ML_Model/trained_cf_model.pkl", "rb") as f:
    model = pickle.load(f)

print("Model loaded successfully!")

Model loaded successfully!
