Task 2: Lookalike Model

In [10]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from google.colab import files
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Display the first few rows of each dataset to confirm successful loading
print("Customers DataFrame:")
print(customers.head())
print("\nTransactions DataFrame:")
print(transactions.head())

# Aggregate transaction data to get customer-level features
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Display the aggregated customer features
print("\nAggregated Customer Features:")
print(customer_features.head())

# Merge with customer profile data
customer_profiles = pd.merge(customers, customer_features, on='CustomerID', how='left').fillna(0)

# Display the merged customer profiles
print("\nMerged Customer Profiles:")
print(customer_profiles.head())

# Encode categorical variables
customer_profiles_encoded = pd.get_dummies(customer_profiles, columns=['Region'])

# Display the encoded customer profiles
print("\nEncoded Customer Profiles:")
print(customer_profiles_encoded.head())

# Standardize the features
scaler = StandardScaler()
features = ['TotalValue', 'Quantity'] + [col for col in customer_profiles_encoded.columns if col.startswith('Region_')]
customer_profiles_scaled = scaler.fit_transform(customer_profiles_encoded[features])

# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_profiles_scaled)

# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])

# Display the similarity matrix
print("\nSimilarity Matrix:")
print(similarity_df.head())

# Function to get top N similar customers
def get_top_similar_customers(customer_id, top_n=3):
    if customer_id not in similarity_df.index:
        return {}
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:top_n+1]
    return similar_customers

# Generate Lookalike recommendations for the first 20 customers
lookalike_list = []
for customer_id in customers['CustomerID'][:20]:
    similar_customers = get_top_similar_customers(customer_id)
    for similar_customer_id, similarity_score in similar_customers.items():
        lookalike_list.append({
            'CustomerID': customer_id,
            'SimilarCustomerID': similar_customer_id,
            'SimilarityScore': similarity_score
        })

# Convert to DataFrame for export
lookalike_df = pd.DataFrame(lookalike_list)

# Display the lookalike recommendations
print("\nLookalike Recommendations:")
print(lookalike_df.head())

# Save the lookalike recommendations to a CSV file
lookalike_df.to_csv('Abhiram_Shanmuga_Lookalike.csv', index=False)


Customers DataFrame:
  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15

Transactions DataFrame:
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue   Price  
0      300.68  300.68  
1      300.68  300.68  
2      300.68  300.68  
3      601.36  300.68  
4      902.04  300.68  

Aggregated Customer Fe

In [11]:
files.download('Abhiram_Shanmuga_Lookalike.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>