In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers_df = pd.read_csv(r"C:\Users\venka\Downloads\Customers.csv")
products_df = pd.read_csv(r"C:\Users\venka\Downloads\Products.csv")
transactions_df = pd.read_csv(r"C:\Users\venka\Downloads\Transactions.csv")

# Merge transactions with customers and products
df = transactions_df.merge(customers_df, on='CustomerID', how='left')
df = df.merge(products_df, on='ProductID', how='left')

# Check for correct column names after merging
print("Columns in merged dataset:", df.columns)

# Ensure correct reference to the Price column (use 'Price_x' or 'Price_y' if needed)
price_column = 'Price'
if 'Price_x' in df.columns:
    price_column = 'Price_x'
elif 'Price_y' in df.columns:
    price_column = 'Price_y'


customer_features = df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    price_column: 'mean'  # Use the correct column name
}).reset_index()

# Normalize data
scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features.iloc[:, 1:])

# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_features_scaled)

# Create a dictionary mapping customers to their top 3 similar customers
customer_ids = customer_features['CustomerID'].tolist()
lookalike_map = {}

for i, cust_id in enumerate(customer_ids[:20]): 
    similar_indices = similarity_matrix[i].argsort()[::-1][1:4]  
    lookalike_map[cust_id] = [(customer_ids[j], round(similarity_matrix[i, j], 4)) for j in similar_indices]

# Convert to DataFrame and save
lookalike_df = pd.DataFrame(list(lookalike_map.items()), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv(r"C:\Users\venka\Downloads\Lookalike.csv", index=False)

# Print example output
print(lookalike_df.head())
