## Task 2: Lookalike Model

In [2]:
# Importing the libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [3]:
# Loading the dataset
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [5]:
# Merging the datasets
merged = pd.merge(transactions, customers, on='CustomerID')
merged = pd.merge(merged, products, on='ProductID')

In [7]:
# Feature Engineering - Aggregate Customer transaction data
customer_features = merged.groupby('CustomerID').agg({
    'TotalValue': 'sum', 
    'ProductID': 'nunique',  
    'Category': lambda x: x.value_counts().idxmax(),  
    'Region': 'first'  
}).reset_index()

In [9]:
# Encoding the columns
customer_features_encoded = pd.get_dummies(customer_features, columns=['Category', 'Region'], drop_first=True)

In [10]:
# Standardize the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features_encoded.drop(columns=['CustomerID']))

In [12]:
#Similarity calculation 
similarity_matrix = cosine_similarity(scaled_features)

# Function to get top 3 similar customers
def get_top_3_similar(customers_df, similarity_matrix, customer_id, top_n=3):
    customer_idx = customers_df[customers_df['CustomerID'] == customer_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[customer_idx]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    return [(customers_df.iloc[idx]['CustomerID'], score) for idx, score in sorted_scores]

In [13]:
# Recommendations for top 20 customers
recommendations = {}
for customer_id in customers['CustomerID'][:20]:
    recommendations[customer_id] = get_top_3_similar(customers, similarity_matrix, customer_id)

In [14]:
recommendations_df = pd.DataFrame([
    {'CustomerID': cust, 'Lookalikes': rec}
    for cust, rec in recommendations.items()
])
recommendations_df.to_csv("YourName_Lookalike.csv", index=False)