## Importing Libraries

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score

## Loading the datasets

In [4]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

## Convert dates to datetime format

In [6]:
transactions_df["TransactionDate"] = pd.to_datetime(transactions_df["TransactionDate"])
customers_df["SignupDate"] = pd.to_datetime(customers_df["SignupDate"])

## Merging the transaction data with customer data for lookalike modeling

In [8]:
customer_transactions = transactions_df.merge(customers_df, on="CustomerID")

# Aggregating the purchase behavior for each customer

In [10]:
customer_features = customer_transactions.groupby("CustomerID").agg({"Quantity": "sum", "TotalValue": "sum", }).reset_index()

## Normalizing the data for similarity calculation

In [12]:
scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features[["Quantity", "TotalValue"]])

## Compute similarity matrix using cosine similarity

In [14]:
similarity_matrix = cosine_similarity(customer_features_scaled)

In [15]:
customer_ids = customer_features["CustomerID"].tolist()  # Extracting CustomerIDs

lookalike_results = {}   # Lookalike model calculation

for i, cust_id in enumerate(customer_ids[:20]):
    similar_customers = list(enumerate(similarity_matrix[i]))
    similar_customers = sorted(similar_customers, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_results[cust_id] = [(customer_ids[j], score) for j, score in similar_customers]

lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_results.keys(),
    "Lookalikes": [str(v) for v in lookalike_results.values()]    # Convert results to DataFrame for Lookalike.csv
})


## Saving the lookalike results

In [17]:
lookalike_file_path = "Lookalike.csv"
lookalike_df.to_csv(lookalike_file_path, index=False)

In [18]:
print("Lookalike Recommendations for First 20 Customers:")
lookalike_df.head(20) #Displaying the first 20 rows 


Lookalike Recommendations for First 20 Customers:


Unnamed: 0,CustomerID,Lookalikes
0,C0001,"[('C0085', 0.9999990504724361), ('C0042', 0.99..."
1,C0002,"[('C0157', 0.9999942410168485), ('C0166', 0.99..."
2,C0003,"[('C0111', 0.9940081095432594), ('C0160', 0.99..."
3,C0004,"[('C0162', 0.9999999965087093), ('C0165', 0.99..."
4,C0005,"[('C0080', 0.999982235548051), ('C0167', 0.999..."
5,C0006,"[('C0079', 0.9999656845154902), ('C0117', 0.99..."
6,C0007,"[('C0146', 0.9999895943808703), ('C0125', 0.99..."
7,C0008,"[('C0109', 0.9998419065580372), ('C0136', 0.99..."
8,C0009,"[('C0015', 0.9999998280836607), ('C0131', 0.99..."
9,C0010,"[('C0176', 0.9977495907269393), ('C0027', 0.99..."
