In [3]:
pip install pandas numpy matplotlib seaborn scikit-learn



In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

print("Customers Data:")
print(customers.head(), '\n')

print("Products Data:")
print(products.head(), '\n')

print("Transactions Data:")
print(transactions.head(), '\n')

Customers Data:
  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15 

Products Data:
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31 

Transactions Data:
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C012

In [16]:
#Merge Transactions with Customers and Products
merged_data = transactions_df.merge(customers_df, on="CustomerID", how="inner")
merged_data = merged_data.merge(products_df, on="ProductID", how="inner")

#Create a Customer-Product Matrix
customer_product_matrix = merged_data.groupby(["CustomerID", "Category"]).size().unstack(fill_value=0)

#Calculate Cosine Similarity
similarity_matrix = cosine_similarity(customer_product_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)


In [17]:
#Extract Top 3 Lookalikes for the First 20 Customers
lookalike_map = {}
for customer_id in similarity_df.index[:20]:
    similar_customers = similarity_df.loc[customer_id].sort_values(ascending=False)[1:4]
    lookalike_map[customer_id] = [(sim_cust_id, score) for sim_cust_id, score in similar_customers.items()]

print("Top 3 Lookalikes for the first 20 customers:")
for customer, lookalikes in lookalike_map.items():
    print(f"{customer}: {lookalikes}")

Top 3 Lookalikes for the first 20 customers:
C0001: [('C0146', 0.9847319278346621), ('C0035', 0.9847319278346621), ('C0045', 0.9684002494125045)]
C0002: [('C0144', 0.9999999999999998), ('C0002', 0.9999999999999998), ('C0134', 0.9805806756909201)]
C0003: [('C0166', 1.0000000000000002), ('C0158', 1.0000000000000002), ('C0031', 1.0000000000000002)]
C0004: [('C0085', 0.984731927834662), ('C0047', 0.9782319760890368), ('C0172', 0.9686648999069226)]
C0005: [('C0007', 0.9999999999999999), ('C0197', 0.9999999999999999), ('C0069', 0.9761870601839527)]
C0006: [('C0135', 1.0000000000000002), ('C0147', 0.9797958971132715), ('C0139', 0.968962790249909)]
C0007: [('C0007', 0.9999999999999999), ('C0197', 0.9999999999999999), ('C0069', 0.9761870601839527)]
C0008: [('C0039', 0.9814954576223637), ('C0154', 0.9814954576223637), ('C0181', 0.9814954576223637)]
C0009: [('C0009', 0.9999999999999999), ('C0092', 0.9899494936611666), ('C0049', 0.9647638212377322)]
C0010: [('C0042', 0.9899494936611665), ('C0176',

In [18]:
#Prepare the Lookalike CSV
output = []
for cust_id, lookalikes in lookalike_map.items():
    for sim_cust_id, score in lookalikes:
        output.append([cust_id, sim_cust_id, score])

lookalike_df = pd.DataFrame(output, columns=["CustomerID", "SimilarCustomerID", "Score"])

In [19]:
#Save the Lookalike CSV
lookalike_csv_path = "Diva_Tejaswi_Lookalike.csv"
lookalike_df.to_csv(lookalike_csv_path, index=False)

print(f"Lookalike.csv created successfully! File path: {lookalike_csv_path}")

Lookalike.csv created successfully! File path: Diva_Tejaswi_Lookalike.csv
