In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Load datasets
customers = pd.read_csv('/content/drive/MyDrive/Customers.csv')
transactions = pd.read_csv('/content/drive/MyDrive/Transactions.csv')
products = pd.read_csv('/content/drive/MyDrive/Products.csv')

In [None]:
# Feature Engineering
def create_customer_profile(customers, transactions, products):
    # Aggregate transaction data for each customer
    customer_transactions = transactions.groupby('CustomerID').agg({
        'TransactionID': 'count',
        'TotalValue': 'sum',
        'Quantity': 'sum'
    }).reset_index()

     # Merge customer information with transaction data
    customer_profile = pd.merge(customers, customer_transactions, on='CustomerID')

     # Product category preferences
    product_category_counts = transactions.merge(products, on='ProductID') \
        .groupby('CustomerID')['Category'].value_counts().unstack(fill_value=0)

        # Normalize category counts
    product_category_prefs = product_category_counts.div(product_category_counts.sum(axis=1), axis=0)

    # Merge product category preferences
    customer_profile = customer_profile.merge(product_category_prefs, left_on='CustomerID', right_index=True)

    return customer_profile

In [None]:
def preprocess_features(customer_profile):
    # Preprocessing for different feature types
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['TotalValue', 'Quantity', 'TransactionID']),
            ('cat', OneHotEncoder(handle_unknown='ignore'), ['Region'])
        ])

    # Include product category columns
    category_columns = [col for col in customer_profile.columns if col.startswith(('Books', 'Electronics', 'Home', 'Clothing'))]

    # Combine numerical and categorical features for similarity calculation
    features = customer_profile[['CustomerID', 'TotalValue', 'Quantity', 'TransactionID', 'Region'] + category_columns]

    # Fit and transform features
    processed_features = preprocessor.fit_transform(features.drop('CustomerID', axis=1))

    return features['CustomerID'], processed_features

In [None]:
# Lookalike Model
def find_lookalikes(customer_profile, top_n=3):
    customer_ids, processed_features = preprocess_features(customer_profile)

    # Compute cosine similarity
    similarity_matrix = cosine_similarity(processed_features)

    # Store results
    lookalikes = {}


     # Find top N similar customers for first 20 customers
    for i, cust_id in enumerate(customer_ids[:20]):
        # Get similarity scores, excluding self
        sim_scores = list(enumerate(similarity_matrix[i]))
        sim_scores = [(idx, score) for idx, score in sim_scores if customer_ids[idx] != cust_id]

        # Sort and get top N
        sim_scores.sort(key=lambda x: x[1], reverse=True)
        top_similar = sim_scores[:top_n]

        lookalikes[cust_id] = [
            (customer_ids[idx], round(score, 4)) for idx, score in top_similar
        ]

    return lookalikes

In [None]:
# Create customer profile and find lookalikes
customer_profile = create_customer_profile(customers, transactions, products)
lookalike_results = find_lookalikes(customer_profile)

In [None]:
# Create Lookalike.csv
lookalike_df = pd.DataFrame([
    {'OriginalCustomer': orig,
     'LookalikeCustomer1': looks[0][0], 'Similarity1': looks[0][1],
     'LookalikeCustomer2': looks[1][0], 'Similarity2': looks[1][1],
     'LookaalikeCustomer3': looks[2][0], 'Similarity3': looks[2][1]}
    for orig, looks in lookalike_results.items()
])


In [None]:
# Save Lookalike.csv
lookalike_df.to_csv('/content/drive/MyDrive/Lookalike.csv', index=False)

print(lookalike_df)

   OriginalCustomer LookalikeCustomer1  Similarity1 LookalikeCustomer2  \
0             C0001              C0107       0.9894              C0137   
1             C0002              C0142       0.9886              C0177   
2             C0003              C0190       0.9172              C0133   
3             C0004              C0113       0.9943              C0165   
4             C0005              C0186       0.9975              C0159   
5             C0006              C0168       0.9538              C0048   
6             C0007              C0159       0.9883              C0005   
7             C0008              C0109       0.9264              C0156   
8             C0009              C0198       0.9874              C0060   
9             C0010              C0166       0.9645              C0199   
10            C0011              C0107       0.9854              C0048   
11            C0012              C0102       0.9878              C0155   
12            C0013              C0155

In [None]:
def recommend_similar_customers(input_customer_id, customer_profile, lookalike_results):
    """
    Recommends similar customers for a given customer ID

    Parameters:
    - input_customer_id: CustomerID to find lookalikes for
    - customer_profile: Preprocessed customer profile dataframe
    - lookalike_results: Dictionary of lookalike customers

    Returns:
    - Detailed recommendations with customer details
    """
    # Check if customer exists in lookalike results
    if input_customer_id not in lookalike_results:
        print(f"No recommendations found for CustomerID: {input_customer_id}")
        return None

    # Get lookalike customers
    lookalikes = lookalike_results[input_customer_id]

    # Fetch full details of similar customers
    recommendations = []
    for customer, similarity in lookalikes:
        customer_info = customer_profile[customer_profile['CustomerID'] == customer].iloc[0]
        recommendations.append({
            'Similar_CustomerID': customer,
            'Region': customer_info['Region'],
            'Similarity_Score': similarity,
            'Total_Transactions': customer_info['TransactionID'],
            'Total_Value': round(customer_info['TotalValue'], 2)
        })

    return recommendations

# Interactive recommendation function
def get_customer_recommendations(customer_id):
    """
    Interactive function to get customer recommendations

    Parameters:
    - customer_id: CustomerID to find lookalikes for

    Returns:
    - Prints and returns detailed recommendations
    """
    # Ensure the model is already run (lookalike_results exists)
    recommendations = recommend_similar_customers(customer_id, customer_profile, lookalike_results)

    if recommendations:
        print(f"\nRecommendations for Customer {customer_id}:")
        for idx, rec in enumerate(recommendations, 1):
            print(f"\nRecommendation {idx}:")
            for key, value in rec.items():
                print(f"{key}: {value}")

    return recommendations



In [18]:
print("\nExample: Get recommendations for first customer")
get_customer_recommendations('C0006')

# Optional: Interactive input
def interactive_recommendations():
    while True:
        customer_id = input("\nEnter CustomerID (or 'exit' to quit): ")
        if customer_id.lower() == 'exit':
            break
        get_customer_recommendations(customer_id)


Example: Get recommendations for first customer

Recommendations for Customer C0006:

Recommendation 1:
Similar_CustomerID: C0168
Region: South America
Similarity_Score: 0.9538
Total_Transactions: 4
Total_Value: 5053.83

Recommendation 2:
Similar_CustomerID: C0048
Region: South America
Similarity_Score: 0.9075
Total_Transactions: 5
Total_Value: 3850.94

Recommendation 3:
Similar_CustomerID: C0187
Region: South America
Similarity_Score: 0.9034
Total_Transactions: 5
Total_Value: 4612.44
