<a href="https://colab.research.google.com/github/Vasantha-Meghana/Predictive_Analytics_Projects/blob/Recommending_Products_to_Users/predictive7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load the dataset
df = pd.read_csv("Online Retail Data Set.csv", encoding='ISO-8859-1')

# Step 2: Preprocess the data
df = df[df['Quantity'] > 0]                            # Remove returns (negative quantity)
df = df.dropna(subset=['CustomerID'])                 # Remove missing customer IDs
df['CustomerID'] = df['CustomerID'].astype(int)       # Ensure CustomerID is integer

# Step 3: Create user-product matrix
basket = df.pivot_table(index='CustomerID',
                        columns='StockCode',
                        values='Quantity',
                        aggfunc='sum',
                        fill_value=0)

# Step 4: Compute cosine similarity between users
similarity_matrix = cosine_similarity(basket)
user_similarity_df = pd.DataFrame(similarity_matrix, index=basket.index, columns=basket.index)

# Step 5: Recommendation function
def recommend_products(customer_id, top_n_similar_users=3, top_k_products=5):
    if customer_id not in basket.index:
        return f"Customer ID {customer_id} not found in dataset."

    # Find top similar users (excluding the target user)
    similarity_scores = user_similarity_df[customer_id].drop(customer_id)
    top_users = similarity_scores.sort_values(ascending=False).head(top_n_similar_users).index

    # Mean quantity bought by similar users
    similar_users_data = basket.loc[top_users]
    mean_ratings = similar_users_data.mean()

    # Remove products already purchased by the target user
    target_user_data = basket.loc[customer_id]
    products_to_recommend = mean_ratings[target_user_data == 0]

    # Recommend top products
    recommended_products = products_to_recommend.sort_values(ascending=False).head(top_k_products)

    # Return as a DataFrame with proper heading
    return pd.DataFrame({'Avg Quantity': recommended_products})

# 🧪 Example usage
customer_id = 17850
print("Top product recommendations for Customer", customer_id)
print(recommend_products(customer_id))

Top product recommendations for Customer 17850
           Avg Quantity
StockCode              
21733         21.333333
84077         16.000000
21754         13.000000
85066         10.000000
22457         10.000000
