In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load your dataset
df = pd.read_csv("Dataset.csv", encoding="ISO-8859-1")
df.head()


# Step 1: Prepare the dataset
# Create a unique customer ID for tracking (optional)
df['Customer_ID'] = range(1, len(df) + 1)

# Step 2: Create a User-Product Matrix
# Columns for product purchases
product_columns = [
    'Product_Category_Appliances',
    'Product_Category_Electronics',
    'Product_Category_Groceries',
    'Product_Category_Personal_Care',
    'Product_Category_Clothing'
]

# Convert 'YES/NO' to 1/0 for easier computation
df[product_columns] = df[product_columns].applymap(lambda x: 1 if x == "YES" else 0)

# Create a user-item matrix
user_product_matrix = df[product_columns].values

# Step 3: Calculate Cosine Similarity between Users
user_similarity = cosine_similarity(user_product_matrix)

# Step 4: Recommendation Function
def recommend_products(customer_id, num_recommendations=3):
    # Get similar users
    similar_users = user_similarity[customer_id - 1]  # Customer_ID is 1-indexed
    similar_users_sorted = np.argsort(similar_users)[::-1]  # Sort in descending order
    
    # Aggregate product preferences from similar users
    aggregated_preferences = np.sum(user_product_matrix[similar_users_sorted], axis=0)

    # Exclude products the user already purchased
    user_purchases = user_product_matrix[customer_id - 1]
    recommended_products = (aggregated_preferences * (1 - user_purchases))

    # Get the top recommended products
    product_indices = recommended_products.argsort()[::-1][:num_recommendations]
    product_names = [product_columns[i] for i in product_indices]
    
    return product_names

# Step 5: Test the Recommendation System
customer_id = 1  # Change to any customer ID you want to recommend for
recommended_products = recommend_products(customer_id)
print(f"Recommended products for customer {customer_id}: {recommended_products}")


Recommended products for customer 1: ['Product_Category_Clothing', 'Product_Category_Personal_Care', 'Product_Category_Groceries']


In [5]:
# Step 5: Test the Recommendation System
customer_id = 15 # Change to any customer ID you want to recommend for
recommended_products = recommend_products(customer_id)
print(f"Recommended products for customer {customer_id}: {recommended_products}")

Recommended products for customer 15: ['Product_Category_Personal_Care', 'Product_Category_Appliances', 'Product_Category_Groceries']


In [9]:
# Step 5: Test the Recommendation System
customer_id = 2 # Change to any customer ID you want to recommend for
recommended_products = recommend_products(customer_id)
print(f"Recommended products for customer {customer_id}: {recommended_products}")

Recommended products for customer 2: ['Product_Category_Clothing', 'Product_Category_Personal_Care', 'Product_Category_Groceries']


In [7]:
# Step 5: Test the Recommendation System
customer_id = 51  # Change to any customer ID you want to recommend for
recommended_products = recommend_products(customer_id)
print(f"Recommended products for customer {customer_id}: {recommended_products}")

Recommended products for customer 51: ['Product_Category_Appliances', 'Product_Category_Clothing', 'Product_Category_Personal_Care']


In [8]:
df.head(5)

Unnamed: 0,Country,Online_Consumer,Age,Annual_Salary,Gender,Education,Payment_Method_Credit/Debit,Living_Region,Online_Service_Preference,AI_Endorsement,...,AI_Tools_Used_Virtual_Assistant,AI_Tools_Used_Voice&Photo_Search,Payment_Method_COD,Payment_Method_Ewallet,Product_Category_Appliances,Product_Category_Electronics,Product_Category_Groceries,Product_Category_Personal_Care,Product_Category_Clothing,Customer_ID
0,INDIA,YES,Gen X,Medium High,Female,Masters Degree,NO,Metropolitan,NO,YES,...,YES,YES,YES,NO,1,1,1,1,1,1
1,INDIA,YES,Gen Z,Low,Male,University Graduate,YES,Metropolitan,NO,YES,...,YES,YES,YES,YES,1,1,0,0,0,2
2,INDIA,YES,Gen X,Medium High,Male,University Graduate,YES,Rural Areas,YES,YES,...,YES,NO,YES,YES,1,1,1,0,0,3
3,INDIA,YES,Gen X,High,Male,University Graduate,YES,Rural Areas,YES,YES,...,NO,NO,YES,YES,1,1,1,0,1,4
4,INDIA,YES,Gen Z,Low,Male,University Graduate,NO,Rural Areas,YES,YES,...,NO,NO,YES,YES,1,0,1,0,1,5


In [32]:
import plotly.express as px

# Reset the index to make it a column
df_reset = df.reset_index()

# Create two separate DataFrames: one for before and one for after purchases
df_before = df_reset.melt(id_vars=["index"], 
                          value_vars=[col for col in product_columns], 
                          var_name="Product_Type", 
                          value_name="Products_Purchased")

# Plot the "Before" product purchases
fig_before = px.line(
    df_before,
    x="index",
    y="Products_Purchased",
    color="Product_Type",
    title="Product Purchases Before Recommendations",
    labels={"index": "Customer Index", "Products_Purchased": "Number of Products Purchased"},
    template="plotly_dark",
)

# Plot the "After" product purchases

# Show both plots
fig_before.show()

In [33]:
# Step 1: Convert product columns to numeric
for col in product_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(int)

# Simulate Recommendations - Add more products after recommendations
np.random.seed(42)
for col in product_columns:
    df[col + '_After'] = df[col] + np.random.binomial(1, 0.2, size=len(df))

# Continue with visualization as before


In [34]:
import pandas as pd
import numpy as np
import plotly.express as px

# Load your dataset
df = pd.read_csv("Dataset.csv", encoding="ISO-8859-1")
df.head()

# Step 1: Initial Data - Calculate product engagement before recommendations
product_columns = [
    'Product_Category_Appliances', 'Product_Category_Electronics',
    'Product_Category_Groceries', 'Product_Category_Personal_Care',
    'Product_Category_Clothing'
]

# Calculate the number of unique products purchased per customer
df['Products_Purchased_Before'] = df[product_columns].sum(axis=1)

# show graph of products purchased before recommendations
fig_before = px.histogram(
    df,
    x="Products_Purchased_Before",
    title="Product Purchases Before Recommendations",
    labels={"Products_Purchased_Before": "Number of Products Purchased"},
    template="plotly_dark",
)
fig_before.show()


In [37]:
import pandas as pd
import plotly.graph_objects as go

# Load your dataset
df = pd.read_csv("Dataset.csv", encoding="ISO-8859-1")

# Step 1: Convert 'YES' to 1 and 'NO' to 0
product_columns = [
    'Product_Category_Appliances', 'Product_Category_Electronics',
    'Product_Category_Groceries', 'Product_Category_Personal_Care',
    'Product_Category_Clothing'
]

# Replace 'YES' with 1 and 'NO' with 0
df[product_columns] = df[product_columns].replace({'YES': 1, 'NO': 0})

# Step 2: Calculate the number of products purchased per customer
df['Products_Purchased_Before'] = df[product_columns].sum(axis=1)

# Step 3: Create an advanced Plotly graph with custom annotations
fig = go.Figure()

# Add histogram trace
fig.add_trace(go.Histogram(
    x=df['Products_Purchased_Before'],
    nbinsx=6,  # Set the number of bins
    marker=dict(color='royalblue'),
    opacity=0.75
))

# Add annotations for 0 and 5
fig.add_annotation(
    x=0,
    y=0.05,  # Adjust based on your data scale
    text="0: No Products Purchased",
    showarrow=True,
    arrowhead=2,
    ax=-40,
    ay=-30,
    font=dict(size=12, color="white"),
    align="center",
)

fig.add_annotation(
    x=5,
    y=0.05,  # Adjust based on your data scale
    text="5: All Categories Purchased",
    showarrow=True,
    arrowhead=2,
    ax=40,
    ay=-30,
    font=dict(size=12, color="white"),
    align="center",
)

# Update layout for a more polished look
fig.update_layout(
    title="Product Purchases Before Recommendations",
    xaxis_title="Number of Products Purchased",
    yaxis_title="Frequency",
    template="plotly_dark",
    showlegend=False,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    paper_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(color="white"),
    margin=dict(t=50, b=50, l=50, r=50),
)

# Show the plot
fig.show()



In [29]:
# Step 1: Convert product columns to numeric
for col in product_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(int)

# Simulate Recommendations - Add more products after recommendations
np.random.seed(42)
for col in product_columns:
    df[col + '_After'] = df[col] + np.random.binomial(1, 0.2, size=len(df))

# Continue with visualization as before


In [30]:
# Step 1: Convert product-related columns to numeric
for col in product_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(int)

# Step 2: Generate "After Recommendation" data
np.random.seed(42)
for col in product_columns:
    df[col + '_After'] = df[col] + np.random.binomial(1, 0.2, size=len(df))

# Step 3: Ensure all "After" columns are numeric
for col in [col + '_After' for col in product_columns]:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(int)

# Step 4: Visualize the difference in product purchases
import plotly.express as px

fig1 = px.bar(
    df,
    x=df.index,
    y=[col + '_After' for col in product_columns],
    title="Product Purchases After Recommendations",
    labels={"x": "Customer Index", "y": "Number of Products Purchased"},
    barmode="group",
    template="plotly_dark",
)

fig1.show()






In [28]:
# Reset the index to make it a column
df_reset = df.reset_index()

# Reshape the DataFrame to long format for line plot (melt the before/after columns)
df_melted = df_reset.melt(id_vars=["index"], 
                          value_vars=[col for col in product_columns] + [col + '_After' for col in product_columns], 
                          var_name="Product_Type", 
                          value_name="Products_Purchased")

# Plot using Plotly
fig2 = px.line(
    df_melted,
    x="index",
    y="Products_Purchased",
    color="Product_Type",
    title="Product Purchases Before and After Recommendations",
    labels={"index": "Customer Index", "Products_Purchased": "Number of Products Purchased"},
    template="plotly_dark",
)

fig2.show()



KeyError: "The following 'value_vars' are not present in the DataFrame: ['Product_Category_Appliances_After', 'Product_Category_Clothing_After', 'Product_Category_Electronics_After', 'Product_Category_Groceries_After', 'Product_Category_Personal_Care_After']"

In [16]:
import plotly.express as px

# Reset the index to make it a column
df_reset = df.reset_index()

# Create two separate DataFrames: one for before and one for after purchases
df_before = df_reset.melt(id_vars=["index"], 
                          value_vars=[col for col in product_columns], 
                          var_name="Product_Type", 
                          value_name="Products_Purchased")

df_after = df_reset.melt(id_vars=["index"], 
                         value_vars=[col + '_After' for col in product_columns], 
                         var_name="Product_Type", 
                         value_name="Products_Purchased")

# Plot the "Before" product purchases
fig_before = px.line(
    df_before,
    x="index",
    y="Products_Purchased",
    color="Product_Type",
    title="Product Purchases Before Recommendations",
    labels={"index": "Customer Index", "Products_Purchased": "Number of Products Purchased"},
    template="plotly_dark",
)

# Plot the "After" product purchases
fig_after = px.line(
    df_after,
    x="index",
    y="Products_Purchased",
    color="Product_Type",
    title="Product Purchases After Recommendations",
    labels={"index": "Customer Index", "Products_Purchased": "Number of Products Purchased"},
    template="plotly_dark",
)

# Show both plots
fig_before.show()
fig_after.show()


In [20]:
import pandas as pd
import plotly.express as px

# Step 1: Create a long-format DataFrame for plotting
product_data = []

# Collect 'Before' and 'After' purchase data
for col in product_columns:
    product_data.append(pd.DataFrame({
        'Customer': df.index,
        'Product': col,
        'Status': 'Before',
        'Quantity': df[col]
    }))
    product_data.append(pd.DataFrame({
        'Customer': df.index,
        'Product': col,
        'Status': 'After',
        'Quantity': df[col + '_After']
    }))

# Combine all data into a single DataFrame
long_df = pd.concat(product_data, ignore_index=True)

# Step 2: Plot the data
fig = px.bar(
    long_df,
    x="Customer",
    y="Quantity",
    color="Status",
    facet_col="Product",
    title="Product Purchases Before and After Recommendations",
    labels={"Quantity": "Number of Products Purchased"},
    barmode="group",
    template="plotly_dark"
)

# Show the figure
fig.show()


In [26]:
df.head(5)

Unnamed: 0,Country,Online_Consumer,Age,Annual_Salary,Gender,Education,Payment_Method_Credit/Debit,Living_Region,Online_Service_Preference,AI_Endorsement,...,Product_Category_Electronics,Product_Category_Groceries,Product_Category_Personal_Care,Product_Category_Clothing,Products_Purchased_Before,Product_Category_Appliances_After,Product_Category_Electronics_After,Product_Category_Groceries_After,Product_Category_Personal_Care_After,Product_Category_Clothing_After
0,INDIA,YES,Gen X,Medium High,Female,Masters Degree,NO,Metropolitan,NO,YES,...,0,0,0,0,YESYESYESYESYES,0,0,0,0,0
1,INDIA,YES,Gen Z,Low,Male,University Graduate,YES,Metropolitan,NO,YES,...,0,0,0,0,YESYESNONONO,1,0,1,0,1
2,INDIA,YES,Gen X,Medium High,Male,University Graduate,YES,Rural Areas,YES,YES,...,0,0,0,0,YESYESYESNONO,0,0,0,0,0
3,INDIA,YES,Gen X,High,Male,University Graduate,YES,Rural Areas,YES,YES,...,0,0,0,0,YESYESYESNOYES,0,0,0,1,0
4,INDIA,YES,Gen Z,Low,Male,University Graduate,NO,Rural Areas,YES,YES,...,0,0,0,0,YESNOYESNOYES,0,0,0,0,0


In [38]:
# Reset the index to make it a column
df_reset = df.reset_index()

# Reshape the data to long format (melt)
df_melted = df_reset.melt(id_vars=["index"], value_vars=["Products_Purchased_Before", "Products_Purchased_After"], var_name="Purchase_Time", value_name="Products_Purchased")

# Plot using Plotly
fig1 = px.bar(df_melted, x='index', y='Products_Purchased', color='Purchase_Time', title="Product Purchases Before and After")
fig1.show()


KeyError: "The following 'value_vars' are not present in the DataFrame: ['Products_Purchased_After']"

In [22]:
# Step 3: Visualize the difference in product purchases
fig1 = px.bar(
    df,
    x=df.index,
    y=['Products_Purchased_Before', 'Products_Purchased_After'],
    title="Comparison of Products Purchased Before and After Recommendations",
    labels={"value": "Number of Products Purchased", "index": "Customer ID"},
    barmode="group",
    template="plotly_white"
)
fig1.show()



ValueError: All arguments should have the same length. The length of argument `y` is 2, whereas the length of  previously-processed arguments ['index'] is 656

In [None]:
# Step 4: Satisfaction Level Comparison (Simulated Data for Demo)
# Simulate satisfaction scores before and after
np.random.seed(42)
df['Satisfaction_Before'] = np.random.randint(60, 80, size=len(df))
df['Satisfaction_After'] = df['Satisfaction_Before'] + np.random.randint(5, 15, size=len(df))

# Step 5: Line Chart to Show Satisfaction Improvement
fig2 = px.line(
    df, 
    x=df.index, 
    y=['Satisfaction_Before', 'Satisfaction_After'],
    title="Satisfaction Levels Before and After Recommendations",
    labels={"value": "Satisfaction Score", "index": "Customer ID"},
    template="plotly_white"
)
fig2.show()