In [25]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [2]:
customers=pd.read_csv("Customers.csv")
products=pd.read_csv("Products.csv")
transactions=pd.read_csv("Transactions.csv")

In [6]:
products=products.drop('Price', axis=1)
df=transactions.merge(products,on="ProductID",how='left')
df.head()

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price,ProductName,Category
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,ComfortLiving Bluetooth Speaker,Electronics
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,ComfortLiving Bluetooth Speaker,Electronics
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,ComfortLiving Bluetooth Speaker,Electronics
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,ComfortLiving Bluetooth Speaker,Electronics
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,ComfortLiving Bluetooth Speaker,Electronics


In [9]:
# Aggregate transaction data (e.g., total spending, products purchased)
transaction_summary = df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Category': lambda x: ','.join(x)  # Concatenate product categories
}).reset_index()
transaction_summary.head()

Unnamed: 0,CustomerID,TotalValue,Quantity,Category
0,C0001,3354.52,12,"Books,Home Decor,Electronics,Electronics,Elect..."
1,C0002,1862.74,10,"Home Decor,Home Decor,Clothing,Clothing"
2,C0003,2725.38,14,"Home Decor,Home Decor,Clothing,Electronics"
3,C0004,5354.88,23,"Books,Home Decor,Home Decor,Home Decor,Books,B..."
4,C0005,2034.24,7,"Home Decor,Electronics,Electronics"


In [12]:
# Merge with customer data
data = pd.merge(customers, transaction_summary, on='CustomerID', how='left')
data=data.drop('SignupDate', axis=1)
data.head()


Unnamed: 0,CustomerID,CustomerName,Region,TotalValue,Quantity,Category
0,C0001,Lawrence Carroll,South America,3354.52,12.0,"Books,Home Decor,Electronics,Electronics,Elect..."
1,C0002,Elizabeth Lutz,Asia,1862.74,10.0,"Home Decor,Home Decor,Clothing,Clothing"
2,C0003,Michael Rivera,South America,2725.38,14.0,"Home Decor,Home Decor,Clothing,Electronics"
3,C0004,Kathleen Rodriguez,South America,5354.88,23.0,"Books,Home Decor,Home Decor,Home Decor,Books,B..."
4,C0005,Laura Weber,Asia,2034.24,7.0,"Home Decor,Electronics,Electronics"


In [15]:
# Preprocess and encode features
data.fillna(0, inplace=True)  # Fill missing values
data['Region'] = data['Region'].astype('category').cat.codes  # Encode categorical region
data['Category'] = data['Category'].astype('category').cat.codes  # Encode product category
data.head()


Unnamed: 0,CustomerID,CustomerName,Region,TotalValue,Quantity,Category
0,C0001,Lawrence Carroll,3,3354.52,12.0,53
1,C0002,Elizabeth Lutz,0,1862.74,10.0,171
2,C0003,Michael Rivera,3,2725.38,14.0,173
3,C0004,Kathleen Rodriguez,3,5354.88,23.0,59
4,C0005,Laura Weber,0,2034.24,7.0,162


In [17]:
# Select features for similarity calculation
features = ['Region', 'TotalValue', 'Quantity', 'Category']
X = data[features]


In [18]:
# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [21]:
#Cosine Similarity
similarity_matrix = cosine_similarity(X_scaled)


In [23]:
# Create recommendations for the first 20 customers (C0001 to C0020)
lookalike_results = {}
for i, customer_id in enumerate(data['CustomerID'][:20]):
    # Get similarity scores for the customer
    similarity_scores = list(enumerate(similarity_matrix[i]))
    # Sort by similarity score (descending), exclude self (index == i)
    similar_customers = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    # Map customer ID to similar customers and their scores
    lookalike_results[customer_id] = [
        (data['CustomerID'][idx], round(score, 4)) for idx, score in similar_customers
    ]

# Save lookalike results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': str(lookalikes)}
    for cust_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv("Gouri_Agrawal_Lookalike.csv", index=False)

print("Lookalike Model completed. Results saved in Lookalike.csv.")

Lookalike Model completed. Results saved in Lookalike.csv.
