<a href="https://colab.research.google.com/github/YAsH12377777/YAsHDEEP/blob/main/YAsHDEEP_G_lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Data Preparation
# Merge datasets to create a comprehensive view
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Aggregate transaction data to create customer profiles
customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',           # Total spending
    'TransactionID': 'count',      # Number of transactions
    'Category': lambda x: ','.join(x),  # Concatenate purchased categories
    'Region': 'first',             # Region info
}).reset_index()

# Function to split categories and get dummies
def get_category_dummies(df):
    category_dummies = df['Category'].str.get_dummies(sep=',')
    df = pd.concat([df, category_dummies], axis=1)
    return df.drop(columns=['Category'])

# Apply the function to customer_profiles
customer_profiles = get_category_dummies(customer_profiles)

# Preprocessing pipeline
numeric_features = ['TotalValue', 'TransactionID']
categorical_features = [col for col in customer_profiles.columns if col not in ['CustomerID', 'TotalValue', 'TransactionID', 'Region']]

# Create transformers for both numeric and categorical data
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

# Define a column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Apply preprocessing pipeline
processed_data = preprocessor.fit_transform(customer_profiles.drop(columns=['CustomerID', 'Region']))

# Compute Similarity Matrix
similarity_matrix = cosine_similarity(processed_data)

# Function to recommend top 3 similar customers
def recommend_similar(customers_list, similarity_matrix, customer_ids):
    recommendations = {}
    for cust_id in customers_list:
        idx = customer_ids.index(cust_id)
        # Get similarity scores and sort them
        similar_scores = list(enumerate(similarity_matrix[idx]))
        similar_scores = sorted(similar_scores, key=lambda x: x[1], reverse=True)
        # Exclude self and take top 3 similar customers
        top_3 = [(customer_ids[i], score) for i, score in similar_scores[1:4]]
        recommendations[cust_id] = top_3
    return recommendations

# Get recommendations for first 20 customers
customer_ids = customer_profiles['CustomerID'].tolist()
recommendations = recommend_similar(customer_ids[:20], similarity_matrix, customer_ids)

# Create "Lookalike.csv"
lookalike_data = []
for cust_id, similar_customers in recommendations.items():
    for similar_cust_id, score in similar_customers:
        lookalike_data.append([cust_id, similar_cust_id, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'SimilarCustomerID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike.csv has been generated.")

Lookalike.csv has been generated.
