# Lookalike.csv


In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Load the datasets
customers = pd.read_csv("C:\\Users\\Admin\\Downloads\\Customers.csv")
products = pd.read_csv("C:\\Users\\Admin\\Downloads\\Products.csv")
transactions = pd.read_csv("C:\\Users\\Admin\\Downloads\\Transactions.csv")


In [4]:
# Step 1: Data Preparation
# Merge customer and transaction data
customer_transactions = pd.merge(transactions, customers, on="CustomerID", how="inner")
customer_transactions = pd.merge(customer_transactions, products, on="ProductID", how="inner")

In [7]:
customer_transactions.

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price_x,CustomerName,Region,SignupDate,ProductName,Category,Price_y
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,Andrea Jenkins,Europe,2022-12-03,ComfortLiving Bluetooth Speaker,Electronics,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,Brittany Harvey,Asia,2024-09-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,Kathryn Stevens,Europe,2024-04-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,Travis Campbell,South America,2024-04-11,ComfortLiving Bluetooth Speaker,Electronics,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,Timothy Perez,Europe,2022-03-15,ComfortLiving Bluetooth Speaker,Electronics,300.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,T00630,C0031,P093,2024-10-08 23:58:14,2,609.88,304.94,Tina Miller,South America,2024-04-11,TechPro Vase,Home Decor,304.94
996,T00672,C0165,P044,2024-07-28 00:09:49,4,75.28,18.82,Juan Mcdaniel,South America,2022-04-09,ActiveWear Running Shoes,Clothing,18.82
997,T00711,C0165,P044,2024-06-11 15:51:14,4,75.28,18.82,Juan Mcdaniel,South America,2022-04-09,ActiveWear Running Shoes,Clothing,18.82
998,T00878,C0165,P044,2024-09-24 21:15:21,3,56.46,18.82,Juan Mcdaniel,South America,2022-04-09,ActiveWear Running Shoes,Clothing,18.82


In [8]:
print(customer_transactions.columns)


Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')


In [10]:
# Aggregate transaction data by customer
customer_profiles = customer_transactions.groupby("CustomerID").agg({
    "TotalValue": "sum",       # Total spending
    "TransactionID": "count", # Transaction frequency
    "Price_x": "mean"           # Average transaction value
}).reset_index()

In [11]:
# Add customer attributes
customer_profiles = pd.merge(customer_profiles, customers, on="CustomerID", how="inner")


In [12]:
# Encode categorical variables (e.g., Region)
customer_profiles = pd.get_dummies(customer_profiles, columns=["Region"], drop_first=True)


In [13]:
# Normalize numerical features for similarity calculation
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(customer_profiles.drop(columns=["CustomerID", "CustomerName", "SignupDate"]))


In [14]:
# Step 2: Compute Similarity
# Calculate pairwise cosine similarity
similarity_matrix = cosine_similarity(normalized_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles["CustomerID"], columns=customer_profiles["CustomerID"])


In [15]:
# Step 3: Find Top 3 Lookalikes for Each Customer
def get_top_lookalikes(customer_id, n=3):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:n+1]
    return list(zip(similar_customers.index, similar_customers.values))


In [16]:
# Generate lookalike recommendations for the first 20 customers
lookalike_map = {}
for customer_id in customer_profiles["CustomerID"].iloc[:20]:
    lookalike_map[customer_id] = get_top_lookalikes(customer_id)


In [17]:
# Step 4: Create Lookalike.csv
lookalike_results = []
for customer_id, lookalikes in lookalike_map.items():
    row = {
        "CustomerID": customer_id,
        "SimilarCustomerID1": lookalikes[0][0],
        "Score1": lookalikes[0][1],
        "SimilarCustomerID2": lookalikes[1][0],
        "Score2": lookalikes[1][1],
        "SimilarCustomerID3": lookalikes[2][0],
        "Score3": lookalikes[2][1]
    }
    lookalike_results.append(row)

lookalike_df = pd.DataFrame(lookalike_results)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike.csv has been created successfully!")


Lookalike.csv has been created successfully!


In [18]:
lookalike_df

Unnamed: 0,CustomerID,SimilarCustomerID1,Score1,SimilarCustomerID2,Score2,SimilarCustomerID3,Score3
0,C0001,C0137,0.99986,C0152,0.999824,C0191,0.999594
1,C0002,C0088,0.996978,C0056,0.995522,C0142,0.995234
2,C0003,C0190,0.995968,C0052,0.995484,C0031,0.995471
3,C0004,C0113,0.999053,C0108,0.996941,C0012,0.996382
4,C0005,C0007,0.99811,C0186,0.997565,C0146,0.996693
5,C0006,C0158,0.996602,C0048,0.996445,C0171,0.99532
6,C0007,C0115,0.99917,C0005,0.99811,C0186,0.996402
7,C0008,C0109,0.995144,C0098,0.988798,C0139,0.984735
8,C0009,C0061,0.994927,C0132,0.994446,C0167,0.993385
9,C0010,C0121,0.994672,C0111,0.993962,C0119,0.990599


In [19]:
# Save the DataFrame as a CSV file
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike.csv has been created and saved.")

Lookalike.csv has been created and saved.


In [20]:
from IPython.display import FileLink

# Provide a download link
FileLink("Lookalike.csv")
