<a href="https://colab.research.google.com/github/dipalishinde112003/Dipali_Shinde/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load the datasets
customer_data = pd.read_csv("Customers.csv")
product_data = pd.read_csv("Products.csv")
transaction_data = pd.read_csv("Transactions.csv")

# Step 2: Preprocessing
# One-hot encode 'Region'
encoder = OneHotEncoder(sparse_output=False)
region_encoded = encoder.fit_transform(customer_data[['Region']])
region_columns = encoder.get_feature_names_out(['Region'])

# Convert 'SignupDate' to days since signup
customer_data['SignupDate'] = pd.to_datetime(customer_data['SignupDate'])
customer_data['DaysSinceSignup'] = (pd.Timestamp.now() - customer_data['SignupDate']).dt.days

# Combine all features
customer_features = pd.concat([
    customer_data[['CustomerID', 'CustomerName']],
    pd.DataFrame(region_encoded, columns=region_columns),
    customer_data[['DaysSinceSignup']]
], axis=1)

# Step 3: Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.iloc[:, 2:])  # Exclude CustomerID and CustomerName

# Step 4: Compute similarity
similarity_matrix = cosine_similarity(normalized_features)

# Step 5: Find top 3 similar customers for each customer
lookalike_map = {}
for i, customer_id in enumerate(customer_features['CustomerID']):
    # Get similarity scores for the current customer
    similarity_scores = list(enumerate(similarity_matrix[i]))

    # Exclude self-similarity and sort by score
    similar_customers = sorted(
        [score for score in similarity_scores if score[0] != i],
        key=lambda x: x[1], reverse=True
    )[:3]

    # Map customer ID to similar customers and their scores
    lookalike_map[customer_id] = [
        (customer_features['CustomerID'].iloc[similar[0]], round(similar[1], 4))
        for similar in similar_customers
    ]

# Step 6: Create Lookalike.csv
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': lookalike_map[cust_id]}
    for cust_id in customer_features['CustomerID'][:20]
])

lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike.csv generated successfully!")


Lookalike.csv generated successfully!
