FirstName_LastName_Lookalike

In [2]:
# Import necessary libraries for Lookalike Model
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import requests
import io

# Function to download CSV from Google Drive 'view' link
def download_csv_from_google_drive(url):
    """Downloads a CSV file from a Google Drive 'view' link.

    Args:
        url (str): The Google Drive 'view' link.

    Returns:
        str: The content of the CSV file.
    """
    # Get the file ID from the URL
    file_id = url.split('/')[-2]
    download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
    response = requests.get(download_url)

    # If there's a confirmation required, handle it:
    if 'confirm' in response.text:
        confirm_code = response.text.split('confirm=')[1].split('&')[0]
        download_url = f'https://drive.google.com/uc?export=download&id={file_id}&confirm={confirm_code}'
        response = requests.get(download_url)

    return response.content.decode('utf-8')


# Load the data
customers_csv_data = download_csv_from_google_drive('https://drive.google.com/file/d/1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE/view')
customers_df = pd.read_csv(io.StringIO(customers_csv_data))

prodcuts_csv_data = download_csv_from_google_drive('https://drive.google.com/file/d/1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0/view')
products_df = pd.read_csv(io.StringIO(prodcuts_csv_data))


transactions_csv_data = download_csv_from_google_drive('https://drive.google.com/file/d/1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF/view')
transactions_df = pd.read_csv(io.StringIO(transactions_csv_data))

# Get product purchase data from transactions
merged_df = pd.merge(transactions_df, customers_df, on='CustomerID')
purchase_data = merged_df.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', aggfunc='sum', fill_value=0)

# Calculate Cosine Similarity between customers based on product purchases
similarity_matrix = cosine_similarity(purchase_data)

# Get top 3 similar customers for each of the first 20 customers
lookalike_data = []

for customer_id in purchase_data.index[:20]:  # First 20 customers
    customer_idx = purchase_data.index.get_loc(customer_id)
    similarities = similarity_matrix[customer_idx]

    # Get the top 3 most similar customers
    similar_customer_indices = similarities.argsort()[-4:-1]  # Skip self by excluding the first one
    similar_customers = purchase_data.index[similar_customer_indices]
    similarity_scores = similarities[similar_customer_indices]

    # Store results
    lookalike_data.append({
        'CustomerID': customer_id,
        'Lookalike1': similar_customers[0], 'Score1': similarity_scores[0],
        'Lookalike2': similar_customers[1], 'Score2': similarity_scores[1],
        'Lookalike3': similar_customers[2], 'Score3': similarity_scores[2],
    })

# Create DataFrame and save as CSV
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)