In [40]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers_df = pd.read_csv(r'C:\Users\shaik\Desktop\newintern\Customers.csv')
products_df = pd.read_csv(r'C:\Users\shaik\Desktop\newintern\Products.csv')
transactions_df = pd.read_csv(r'C:\Users\shaik\Desktop\newintern\Transactions.csv')

# Merge datasets
transactions_products = pd.merge(transactions_df, products_df, on='ProductID', how='left')
merged_data = pd.merge(transactions_products, customers_df, on='CustomerID', how='left')

# Feature Engineering
customer_features = merged_data.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    total_transactions=('TransactionID', 'count'),
    avg_spending=('TotalValue', 'mean'),
    distinct_categories=('Category', lambda x: x.nunique()),
    distinct_products=('ProductID', lambda x: x.nunique())
).reset_index()

category_prefs = pd.get_dummies(merged_data[['CustomerID', 'Category']], columns=['Category'])
category_prefs = category_prefs.groupby('CustomerID').sum().reset_index()
customer_features = pd.merge(customer_features, category_prefs, on='CustomerID', how='left')

# Normalize features
scaler = StandardScaler()
numerical_features = customer_features.drop(columns=['CustomerID']).set_index(customer_features['CustomerID'])
normalized_features = scaler.fit_transform(numerical_features)

# Compute cosine similarity
similarity_matrix = cosine_similarity(normalized_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Generate Lookalike Map for customers C0001 - C0020
target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]
lookalike_map = {}

for customer_id in target_customers:
    if customer_id in similarity_df.index:
        similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
        lookalike_map[customer_id] = [
            {
                "LookalikeID": similar_customer_id,
                "SimilarityScore": round(score, 4)
            }
            for similar_customer_id, score in zip(similar_customers.index, similar_customers.values)
        ]

# Display top 3 lookalikes for each of the first 20 customers
print("Top 3 lookalikes for each of the first 20 customers:")
for customer, lookalikes in lookalike_map.items():
    print(f"Customer {customer}: {lookalikes}")

# Create Lookalike.csv
lookalike_data = {
    'CustomerID': [],
    'Lookalikes': []
}

for cust_id, lookalikes in lookalike_map.items():
    lookalike_data['CustomerID'].append(cust_id)
    lookalike_data['Lookalikes'].append(
        ", ".join([f"{{'LookalikeID': '{item['LookalikeID']}', 'SimilarityScore': {item['SimilarityScore']}}}" for item in lookalikes])
    )

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv(r'C:\Users\shaik\Desktop\newintern\Lookalike.csv', index=False)

print("Lookalike.csv has been generated.")


Top 3 lookalikes for each of the first 20 customers:
Customer C0001: [{'LookalikeID': 'C0069', 'SimilarityScore': 0.8732}, {'LookalikeID': 'C0127', 'SimilarityScore': 0.8294}, {'LookalikeID': 'C0091', 'SimilarityScore': 0.8074}]
Customer C0002: [{'LookalikeID': 'C0134', 'SimilarityScore': 0.8929}, {'LookalikeID': 'C0133', 'SimilarityScore': 0.8841}, {'LookalikeID': 'C0062', 'SimilarityScore': 0.8698}]
Customer C0003: [{'LookalikeID': 'C0166', 'SimilarityScore': 0.9754}, {'LookalikeID': 'C0031', 'SimilarityScore': 0.898}, {'LookalikeID': 'C0159', 'SimilarityScore': 0.7922}]
Customer C0004: [{'LookalikeID': 'C0065', 'SimilarityScore': 0.9105}, {'LookalikeID': 'C0017', 'SimilarityScore': 0.8593}, {'LookalikeID': 'C0075', 'SimilarityScore': 0.8567}]
Customer C0005: [{'LookalikeID': 'C0197', 'SimilarityScore': 0.998}, {'LookalikeID': 'C0007', 'SimilarityScore': 0.9457}, {'LookalikeID': 'C0095', 'SimilarityScore': 0.918}]
Customer C0006: [{'LookalikeID': 'C0185', 'SimilarityScore': 0.8794}, 

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\shaik\\Desktop\\newintern\\Lookalike.csv'