In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

# Load your data
reviews_df = pd.read_csv("clothing_reviews.csv",low_memory=False)
products_df = pd.read_csv("clothing_description.csv")

customer_id=float(input("enter customer id"))

# Prepare ratings data
ratings_df = reviews_df[['Customer ID', 'product_id', 'Rating']].dropna()
user_item_matrix = ratings_df.pivot_table(index='Customer ID', columns='product_id', values='Rating').fillna(0)
sparse_matrix = csr_matrix(user_item_matrix.values)

# Build index maps
index_to_customer = dict(enumerate(user_item_matrix.index))
customer_to_index = {v: k for k, v in index_to_customer.items()}

# KNN model
knn_model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6, n_jobs=-1)
knn_model.fit(sparse_matrix)

# Item similarity
item_user_matrix = user_item_matrix.T
item_similarity = cosine_similarity(csr_matrix(item_user_matrix.values))
item_sim_df = pd.DataFrame(item_similarity, index=item_user_matrix.index, columns=item_user_matrix.index)

# Hybrid recommender function
def hybrid_recommendations(customer_id, top_n=5, alpha=0.5):
    if customer_id not in customer_to_index:
        return pd.DataFrame([{"error": f"Customer ID {customer_id} not found."}])

    cust_idx = customer_to_index[customer_id]
    distances, indices = knn_model.kneighbors(sparse_matrix[cust_idx], n_neighbors=6)
    neighbor_indices = indices[0][1:]
    neighbor_ids = [index_to_customer[idx] for idx in neighbor_indices]
    neighbor_ratings = user_item_matrix.loc[neighbor_ids]
    user_based_scores = neighbor_ratings.mean()

    user_ratings = user_item_matrix.loc[customer_id]
    rated_items = user_ratings[user_ratings > 0].index
    item_scores = pd.Series(dtype=float)
    for item in rated_items:
        item_scores = item_scores.add(item_sim_df[item] * user_ratings[item], fill_value=0)

    combined_scores = (alpha * user_based_scores.add((1 - alpha) * item_scores, fill_value=0))
    combined_scores = combined_scores.drop(rated_items, errors='ignore')
    top_items = combined_scores.sort_values(ascending=False).head(top_n).index.astype(float)

    return products_df[products_df['product_id'].isin(top_items)][[
        'product_id', 'product_name', 'product_brand', 'product_description', 'price'
    ]]

# User-based recommender function
def user_based_recommendations(customer_id, top_n=5):
    if customer_id not in customer_to_index:
        return pd.DataFrame([{"error": f"Customer ID {customer_id} not found."}])

    cust_idx = customer_to_index[customer_id]
    distances, indices = knn_model.kneighbors(sparse_matrix[cust_idx], n_neighbors=6)
    neighbor_indices = indices[0][1:]
    neighbor_ids = [index_to_customer[idx] for idx in neighbor_indices]
    neighbor_ratings = user_item_matrix.loc[neighbor_ids]
    mean_ratings = neighbor_ratings.mean().sort_values(ascending=False)

    user_rated_products = user_item_matrix.loc[customer_id]
    unrated_products = user_rated_products[user_rated_products == 0]

    recommendations = mean_ratings[unrated_products.index].dropna().head(top_n)
    recommended_product_ids = recommendations.index.astype(float)

    return products_df[products_df['product_id'].isin(recommended_product_ids)][[
        'product_id', 'product_name', 'product_brand', 'product_description', 'price'
    ]]

# Item-based recommender function
def item_based_recommendations(customer_id, top_n=5):
    if customer_id not in user_item_matrix.index:
        return pd.DataFrame([{"error": f"Customer ID {customer_id} not found."}])

    user_ratings = user_item_matrix.loc[customer_id]
    rated_items = user_ratings[user_ratings > 0].index

    scores = pd.Series(dtype=float)
    for item in rated_items:
        similar_scores = item_sim_df[item] * user_ratings[item]
        scores = scores.add(similar_scores, fill_value=0)

    scores = scores.drop(rated_items, errors='ignore')
    top_items = scores.sort_values(ascending=False).head(top_n).index.astype(float)

    return products_df[products_df['product_id'].isin(top_items)][[
        'product_id', 'product_name', 'product_brand', 'product_description', 'price','size','color','material','gender'

    ]]

# Example usage
print("\nHybrid Recommendations")
print(hybrid_recommendations(customer_id, top_n=2))  # Hybrid

print("\nUser-Based Recommendations")
print(user_based_recommendations(customer_id, top_n=2))  # User-based

print("\nItem-Based Recommendations")
print(item_based_recommendations(customer_id, top_n=2))  # Item-based



Hybrid Recommendations
     product_id                          product_name product_brand  \
262      750360  Oversized Blazer with Premium Fabric        Adidas   
276      932236  Oversized Jacket with Stretch Fabric          Zara   

                                product_description     price  
262  Designed for both casual and formal occasions.   1384.44  
276            Trendy and versatile clothing piece.  12436.72  

User-Based Recommendations
     product_id                         product_name product_brand  \
16       106958  Acid Wash T-Shirt with Denim Fabric        Levi's   
118      100978    Ripped Tank Top with Denim Fabric        Levi's   

                                product_description     price  
16   Designed for both casual and formal occasions.  12924.76  
118  Designed for both casual and formal occasions.  13766.38  

Item-Based Recommendations
     product_id                          product_name product_brand  \
262      750360  Oversized Blazer with P