In [18]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import csv
import os

# Load the uploaded datasets
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

# Merge the datasets to create a comprehensive view of customer transactions
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Check the merged data columns
print(data.columns)

# Calculate total amount spent, total number of transactions, and average price paid by each customer
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total money spent by customer
    'TransactionID': 'count',  # Number of purchases made
    'Price_y': 'mean'  # Average amount spent per purchase
}).rename(columns={
    'TotalValue': 'TotalSpent',
    'TransactionID': 'TransactionCount',
    'Price_y': 'AvgPrice'
})

print("Customer features head:")
print(customer_features.head())  # Ensure customer features are created correctly

# Normalize the features before similarity calculation
scaler = StandardScaler()
customer_features_normalized = scaler.fit_transform(customer_features)

# Calculate similarity among customers using cosine_similarity
similarity_matrix = cosine_similarity(customer_features_normalized)

# List of all customer IDs
customer_ids = customer_features.index.tolist()

def find_top_similar_customers(customer_id, similarity_matrix, customer_ids, top_n=3):
    # Find the index of the given customer
    customer_index = customer_ids.index(customer_id)
    # Get similarity scores for the given customer with all other customers
    similarity_scores = similarity_matrix[customer_index]
    # Find the indices of the top 3 most similar customers (excluding the customer themselves)
    similar_customers_indices = similarity_scores.argsort()[::-1][1:top_n+1]
    # Map the indices to customer IDs and their similarity scores
    similar_customers = [(customer_ids[i], similarity_scores[i]) for i in similar_customers_indices]
    return similar_customers

# Create recommendations for the first 20 customers
lookalike_recommendations = {}
for customer_id in customer_ids[:20]:
    similar_customers = find_top_similar_customers(customer_id, similarity_matrix, customer_ids)
    lookalike_recommendations[customer_id] = similar_customers

# Print lookalike recommendations for the first 20 customers
print("Lookalike recommendations for the first 20 customers:")
for customer_id, recommendations in lookalike_recommendations.items():
    print(f"Customer {customer_id}: {recommendations}")

# Ensure the directory exists before saving the file
output_dir = '/content/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save recommendations to a CSV file
output_file = os.path.join(output_dir, 'Lookalike.csv')
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'SimilarCustomers'])
    for customer_id, recommendations in lookalike_recommendations.items():
        writer.writerow([customer_id, recommendations])

print("Lookalike recommendations saved to Lookalike.csv")



Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')
Customer features head:
            TotalSpent  TransactionCount    AvgPrice
CustomerID                                          
C0001          3354.52                 5  278.334000
C0002          1862.74                 4  208.920000
C0003          2725.38                 4  195.707500
C0004          5354.88                 8  240.636250
C0005          2034.24                 3  291.603333
Lookalike recommendations for the first 20 customers:
Customer C0001: [('C0103', 0.9909419845675972), ('C0137', 0.9589202544085182), ('C0056', 0.954715953371692)]
Customer C0002: [('C0029', 0.9998519555573054), ('C0077', 0.9975987068385926), ('C0032', 0.9931786546349639)]
Customer C0003: [('C0060', 0.9877398568640355), ('C0142', 0.9851097417374808), ('C0151', 0.984571819667784)]
C

In [19]:
print("Customer Features:")
print(customer_features.head())

print("Normalized Customer Features:")
print(customer_features_normalized[:5])

print("Similarity Matrix:")
print(similarity_matrix[:5, :5])
for customer_id, recommendations in lookalike_recommendations.items():
    print(f"Customer {customer_id}: {recommendations}")
    break  # Print only the first customer for sanity check
# Sample customer and their top 3 recommendations
sample_customer = 'C0001'
sample_recommendations = lookalike_recommendations[sample_customer]

print(f"Customer {sample_customer} features:")
print(customer_features.loc[sample_customer])

for rec_customer, score in sample_recommendations:
    print(f"\nRecommended Customer {rec_customer} with similarity score {score}:")
    print(customer_features.loc[rec_customer])
import numpy as np

# Extract top N similarity scores
top_n_scores = [score for recs in lookalike_recommendations.values() for _, score in recs]

# Calculate and print mean and standard deviation of similarity scores
mean_score = np.mean(top_n_scores)
std_score = np.std(top_n_scores)

print(f"Mean Similarity Score: {mean_score:.4f}")
print(f"Standard Deviation of Similarity Scores: {std_score:.4f}")




Customer Features:
            TotalSpent  TransactionCount    AvgPrice
CustomerID                                          
C0001          3354.52                 5  278.334000
C0002          1862.74                 4  208.920000
C0003          2725.38                 4  195.707500
C0004          5354.88                 8  240.636250
C0005          2034.24                 3  291.603333
Normalized Customer Features:
[[-0.06170143 -0.01145819  0.09467022]
 [-0.87774353 -0.46749414 -0.90401592]
 [-0.40585722 -0.46749414 -1.09410928]
 [ 1.03254704  1.35664965 -0.44770193]
 [-0.78392861 -0.92353008  0.28558127]]
Similarity Matrix:
[[ 1.         -0.17077524 -0.51252328 -0.60755484  0.60827528]
 [-0.17077524  1.          0.92563832 -0.47944799  0.5151387 ]
 [-0.51252328  0.92563832  1.         -0.25427773  0.279593  ]
 [-0.60755484 -0.47944799 -0.25427773  1.         -0.99834599]
 [ 0.60827528  0.5151387   0.279593   -0.99834599  1.        ]]
Customer C0001: [('C0103', 0.9909419845675972), (