## LookAlike

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
df1 = pd.read_csv('Customers.csv')
df2 = pd.read_csv('Transactions.csv')
df3 = pd.read_csv('Products.csv')

In [6]:
customer_transactions = pd.merge(df2, df1, on='CustomerID')
customer_data = customer_transactions.groupby('CustomerID').agg(
    total_spending=('TotalValue', 'sum'),
    num_transactions=('TotalValue', 'count'),
    product_preferences=('ProductID', 'nunique')
).reset_index()

In [8]:
product_data = pd.merge(customer_transactions, df3, on='ProductID')
product_category_data = product_data.groupby('CustomerID')['Category'].value_counts().unstack(fill_value=0)

In [9]:
combined_data = pd.concat([customer_data, product_category_data], axis=1).fillna(0)


In [12]:
numeric_data = combined_data.select_dtypes(include=[float, int])

scaler = StandardScaler()
normalized_data = pd.DataFrame(scaler.fit_transform(numeric_data), columns=numeric_data.columns)





In [13]:
cosine_sim = cosine_similarity(normalized_data)


In [16]:
lookalike_map = {}

for customer_id in df1['CustomerID'][:20]:
    customer_idx = df1[df1['CustomerID'] == customer_id].index[0]
    similarity_scores = cosine_sim[customer_idx]
    similar_customers_idx = similarity_scores.argsort()[-4:-1]  # Top 3 lookalikes (excluding the customer itself)
    
    similar_customers = df1.iloc[similar_customers_idx]
    scores = similarity_scores[similar_customers_idx]
    
    lookalike_map[customer_id] = [(similar_customers.iloc[i]['CustomerID'], scores[i]) for i in range(3)]

In [17]:
lookalike_df = pd.DataFrame(lookalike_map.items(), columns=['CustomerID', 'Lookalikes'])
lookalike_df['Lookalikes'] = lookalike_df['Lookalikes'].apply(lambda x: ', '.join([f'{cust_id}: {score:.2f}' for cust_id, score in x]))

lookalike_df.to_csv('Ravi_Kumar_Lookalike.csv', index=False)

print("Task Finished Boss!!!!!!!!.")

Task Finished Boss!!!!!!!!.
