In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [4]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


In [5]:
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")


In [6]:
# Create a customer-product matrix
customer_product_matrix = merged_data.pivot_table(
    index="CustomerID", columns="ProductID", values="Quantity", aggfunc="sum", fill_value=0
)

In [7]:
# Add customer profile features (e.g., region encoded as numerical values)
profile_data = customers.copy()
profile_data["Region"] = profile_data["Region"].astype("category").cat.codes
customer_features = profile_data.set_index("CustomerID")[["Region"]]


In [8]:
# Merge customer-product matrix with profile features
combined_features = customer_product_matrix.join(customer_features, how="left").fillna(0)


In [9]:
# Normalize the data for similarity calculation
scaler = StandardScaler()
normalized_features = scaler.fit_transform(combined_features)

In [10]:
# Step 4: Calculate Similarities
# Compute cosine similarity between all customers
similarity_matrix = cosine_similarity(normalized_features)


In [11]:
# Convert similarity matrix to a DataFrame for better readability
similarity_df = pd.DataFrame(similarity_matrix, index=combined_features.index, columns=combined_features.index)

In [12]:
# Step 5: Generate Lookalike Recommendations
# Function to get top N similar customers for a given customer
def get_top_lookalikes(customer_id, top_n=3):
    if customer_id not in similarity_df.index:
        return []
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1 : top_n + 1]
    return [(cust_id, round(score, 4)) for cust_id, score in similar_customers.items()]

In [13]:
# Get lookalikes for the first 20 customers
lookalikes = {}
for customer_id in customers["CustomerID"][:20]:
    lookalikes[customer_id] = get_top_lookalikes(customer_id)

In [14]:
# Step 6: Save Results
# Convert lookalikes to the required format
lookalike_list = [{"cust_id": cust, "lookalikes": lookalikes[cust]} for cust in lookalikes]
lookalike_df = pd.DataFrame(lookalike_list)

In [15]:
# Save to CSV
lookalike_csv_output = []
for cust_id, lookalike_data in lookalikes.items():
    for lookalike_id, score in lookalike_data:
        lookalike_csv_output.append({"cust_id": cust_id, "lookalike_id": lookalike_id, "score": score})


In [16]:
lookalike_csv = pd.DataFrame(lookalike_csv_output)
lookalike_csv.to_csv("FirstName_LastName_Lookalike.csv", index=False)

In [17]:
print("Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'")

Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'
