# Task 2: Lookalike Model 

### Import Libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

### Load Datasets

In [2]:
customers = pd.read_csv("C:/Users/APURVA/ZeoTap-Assign/Customers.csv")
products = pd.read_csv("C:/Users/APURVA/ZeoTap-Assign/Products.csv")
transactions = pd.read_csv("C:/Users/APURVA/ZeoTap-Assign/Transactions.csv")

### Merge Datasets

In [3]:
transaction_product = pd.merge(transactions, products, on='ProductID', how='left')

In [4]:
customer_product_data = transaction_product.groupby(['CustomerID', 'Category']).agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

In [5]:
customer_product_data_pivot = customer_product_data.pivot_table(
    index='CustomerID',
    columns='Category',
    values=['TotalValue', 'Quantity'],
    fill_value=0
)

In [6]:
customer_product_data_pivot.columns = [f"{metric}_{category}" for metric, category in customer_product_data_pivot.columns]

In [7]:
customer_product_data_pivot.reset_index(inplace=True)

### Merge with customer data

In [8]:
customer_data = pd.merge(customers, customer_product_data_pivot, on='CustomerID', how='left')

In [9]:
customer_data.fillna(0, inplace=True)

### Create a feature set

In [10]:
features = customer_data[['CustomerID', 'Region'] + list(customer_product_data_pivot.columns[1:])]

In [11]:
features = pd.get_dummies(features, columns=['Region'], drop_first=True)

In [12]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features.drop('CustomerID', axis=1))

In [13]:
similarity_matrix = cosine_similarity(scaled_features)

In [14]:
similarity_df = pd.DataFrame(similarity_matrix, index=features['CustomerID'], columns=features['CustomerID'])

### Function to get top 3 lookalikes for a given customer

In [15]:
def get_top_lookalikes(customer_id, n=3):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:n+1]
    return similar_customers.index.tolist(), similar_customers.values.tolist()

In [16]:
lookalike_results = []
for customer_id in features['CustomerID'].head(20):
    lookalikes, scores = get_top_lookalikes(customer_id)
    for lookalike, score in zip(lookalikes, scores):
        lookalike_results.append({'CustomerID': customer_id, 'LookalikeID': lookalike, 'Score': score})

### DataFrame to save the results

In [17]:
lookalike_df = pd.DataFrame(lookalike_results)

### Save the results to a CSV file

In [18]:
lookalike_df.to_csv('Apurva_Waghmare_Lookalike.csv', index=False)
print("Lookalike model is generated successfully and saved to Apurva_Waghmare_Lookalike.csv!")

Lookalike model is generated successfully and saved to Apurva_Waghmare_Lookalike.csv!
