In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Load Data
customers = pd.read_csv("/Intern Project/Customers.csv")
transactions = pd.read_csv("/Intern Project/Transactions.csv")

# Aggregate Transaction Data
customer_spending = transactions.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum"
}).reset_index()

# Merge with Customer Data
customer_profile = customers.merge(customer_spending, on="CustomerID", how="left").fillna(0)

# Encode Categorical Data
customer_profile = pd.get_dummies(customer_profile, columns=["Region"], drop_first=True)

# Feature Scaling
scaler = StandardScaler()
features = ["TotalValue", "Quantity"] + [col for col in customer_profile.columns if "Region_" in col]
scaled_features = scaler.fit_transform(customer_profile[features])

# Train Nearest Neighbors Model
knn = NearestNeighbors(n_neighbors=4, metric="euclidean")  # 4 to exclude self-match
knn.fit(scaled_features)

# Find Similar Customers
lookalike_results = {}
for i, cust_id in enumerate(customer_profile["CustomerID"][:20]):
    distances, indices = knn.kneighbors([scaled_features[i]])
    similar_customers = [(customer_profile.iloc[idx]["CustomerID"], round(1 - dist, 2)) for idx, dist in zip(indices[0][1:], distances[0][1:])]
    lookalike_results[cust_id] = similar_customers

# Convert to DataFrame and Save CSV
lookalike_df = pd.DataFrame([(key, val) for key, val in lookalike_results.items()], columns=["CustomerID", "SimilarCustomers"])
lookalike_df.to_csv("/Intern Project/Lookalike.csv", index=False)

# Print Sample Output
print(lookalike_df.head())


  CustomerID                               SimilarCustomers
0      C0001  [(C0107, 0.85), (C0137, 0.84), (C0184, 0.84)]
1      C0002  [(C0088, 0.85), (C0142, 0.74), (C0159, 0.66)]
2      C0003  [(C0147, 0.88), (C0190, 0.86), (C0174, 0.66)]
3      C0004  [(C0113, 0.66), (C0102, 0.47), (C0012, 0.35)]
4      C0005  [(C0186, 0.85), (C0159, 0.84), (C0140, 0.67)]
