In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")

# ---- Data Preparation ----
# Merge datasets to create customer profiles
merged = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Create customer-level aggregated features
customer_features = merged.groupby('CustomerID').agg(
    TotalSpend=('TotalValue', 'sum'),
    AverageSpend=('TotalValue', 'mean'),
    TotalTransactions=('TransactionID', 'count'),
    FavoriteCategory=('Category', lambda x: x.mode()[0] if not x.mode().empty else None)
).reset_index()

# One-hot encode the favorite product category
customer_features = pd.get_dummies(customer_features, columns=['FavoriteCategory'], drop_first=True)

# Normalize numerical features
scaler = StandardScaler()
numerical_columns = ['TotalSpend', 'AverageSpend', 'TotalTransactions']
customer_features[numerical_columns] = scaler.fit_transform(customer_features[numerical_columns])

# ---- Similarity Calculation ----
# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(customer_features[numerical_columns])

# Convert similarity matrix to a DataFrame for easy indexing
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# ---- Generate Recommendations ----
# Function to get top N similar customers
def get_top_similar(customers_df, customer_id, top_n=3):
    scores = customers_df.loc[customer_id].sort_values(ascending=False)[1:top_n+1]  # Skip self-similarity
    return [(index, round(score, 2)) for index, score in scores.items()]

# Generate recommendations for first 20 customers (C0001 to C0020)
lookalike_recommendations = {}
for customer_id in customer_features['CustomerID'][:20]:
    lookalike_recommendations[customer_id] = get_top_similar(similarity_df, customer_id)

# ---- Save to CSV ----
# Convert recommendations to the required format
lookalike_list = []
for cust_id, recommendations in lookalike_recommendations.items():
    for similar_cust, score in recommendations:
        lookalike_list.append({'CustomerID': cust_id, 'SimilarCustomer': similar_cust, 'Score': score})

lookalike_df = pd.DataFrame(lookalike_list)
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)

print("Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'")


Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'


In [2]:
import pandas as pd

lookalike_output = pd.read_csv("FirstName_LastName_Lookalike.csv")
print(lookalike_output.head())


  CustomerID SimilarCustomer  Score
0      C0001           C0137   1.00
1      C0001           C0152   1.00
2      C0001           C0121   0.99
3      C0002           C0029   1.00
4      C0002           C0199   1.00
