# Task 2: Lookalike Model

## Importing required libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import csv

## Loading datasets

In [2]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

## Feature Engineering: Combining customer and transaction data to create a feature set.

In [3]:
transactions_products_df = pd.merge(transactions_df, products_df, on="ProductID")
customer_behavior_df = transactions_products_df.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum",
    "Category": lambda x: x.mode()[0]  # Most purchased category
}).reset_index()

## Similarity Calculation: Using cosine similarity to compute similarity scores between customers

In [4]:
# Encode category as numeric for similarity calculation
customer_behavior_df["Category_Encoded"] = customer_behavior_df["Category"].astype('category').cat.codes

# Merge with customer profiles
customer_profile_df = pd.merge(customers_df, customer_behavior_df, on="CustomerID")

# Prepare features for similarity calculation
features = customer_profile_df[["TotalValue", "Quantity", "Category_Encoded"]].values

# Compute cosine similarity
similarity_matrix = cosine_similarity(features)

## Generate Recommendations: For each customer, finding the top three most similar customers and saving them with their scores

In [5]:
lookalike_results = {}
for idx, customer_id in enumerate(customer_profile_df["CustomerID"]):
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)  # Sort by similarity
    top_similar = [(customer_profile_df.iloc[i]["CustomerID"], score) for i, score in similarity_scores[1:4]]  # Skip self-match
    lookalike_results[customer_id] = top_similar

## Saving results to a CSV file

In [7]:
import csv
with open("Akashnil_Garai_Lookalike.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["CustomerID", "Lookalike1", "Score1", "Lookalike2", "Score2", "Lookalike3", "Score3"])
    for customer_id, lookalikes in lookalike_results.items():
        row = [customer_id] + [item for sublist in lookalikes for item in sublist]
        writer.writerow(row)

print("Lookalike results saved to 'Akashnil_Garai_Lookalike.csv'.")

Lookalike results saved to 'Akashnil_Garai_Lookalike.csv'.
