In [1]:
import mysql.connector
import pandas as pd

# Database connection details
HOST = "localhost"
USER = "root"
PASSWORD = "root"
DATABASE = "eco_friendly"

try:
    # Connect to MySQL database
    connection = mysql.connector.connect(
        host=HOST,
        user=USER,
        password=PASSWORD,
        database=DATABASE
    )

    if connection.is_connected():
        print("✅ Connected to MySQL Database")

        # Query to fetch all data from the Products table
        query = "SELECT * FROM Products"

        # Load data into a Pandas DataFrame
        df = pd.read_sql(query, connection)

        print("✅ Data successfully extracted from MySQL")
        print(df.head())  # Display first 5 rows

except mysql.connector.Error as e:
    print(f"❌ Error: {e}")

finally:
    if connection.is_connected():
        connection.close()
        print("✅ MySQL connection is closed")


✅ Connected to MySQL Database


  df = pd.read_sql(query, connection)


✅ Data successfully extracted from MySQL
   Product_ID                 Product_Name    Category          Material  \
0           1            Bamboo Toothbrush  Toothbrush            Bamboo   
1           2  Recycled Plastic Toothbrush  Toothbrush  Recycled Plastic   
2           3        Cornstarch Toothbrush  Toothbrush        Cornstarch   
3           4          Silicone Toothbrush  Toothbrush          Silicone   
4           5  Charcoal-infused Toothbrush  Toothbrush  Charcoal-infused   

   Price                                        Description     Brand  \
0  45.67  Eco-friendly Toothbrush made from Bamboo. Sust...  EcoBrand   
1  24.41  Eco-friendly Toothbrush made from Recycled Pla...  EcoBrand   
2  16.19  Eco-friendly Toothbrush made from Cornstarch. ...  EcoBrand   
3  48.35  Eco-friendly Toothbrush made from Silicone. Su...  EcoBrand   
4  15.17  Eco-friendly Toothbrush made from Charcoal-inf...  EcoBrand   

   Availability  User_ID  Ratings  
0      In Stock      100   

### CONTENT BASED FILTERING

In [2]:
df.columns

Index(['Product_ID', 'Product_Name', 'Category', 'Material', 'Price',
       'Description', 'Brand', 'Availability', 'User_ID', 'Ratings'],
      dtype='object')

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Ensure column names are clean
df.columns = df.columns.str.strip()

# Debugging: Check available categories
print("Available Categories:", df['Category'].unique())

# Step 2: Ask User to Select a Category
selected_category = input("Enter the category you want recommendations from: ").strip()

# Step 3: Filter Products by Selected Category
df_filtered = df[df['Category'].str.strip() == selected_category].reset_index(drop=True)

# Debugging: Check if filtering worked
print(f"\nNumber of products in '{selected_category}':", len(df_filtered))
if df_filtered.empty:
    print("No products found in this category. Please try again.")
    exit()

# Step 4: Ensure column names are clean in df_filtered
df_filtered.columns = df_filtered.columns.str.strip()

# Debugging: Check if 'Product_Name' exists
print("\nColumns in df_filtered:", df_filtered.columns.tolist())
if 'Product_Name' not in df_filtered.columns:
    print("Error: 'Product_Name' column is missing. Please check the dataset.")
    exit()

# Step 5: Combine 'Category', 'Material', and 'Description' into a single feature
df_filtered['combined_features'] = df_filtered['Category'] + " " + df_filtered['Material'] + " " + df_filtered['Description']

# Step 6: Convert text data into numerical form (TF-IDF Vectorization)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_filtered['combined_features'])

# Step 7: Compute Similarity Matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 8: Create Recommendation Function
def recommend_products(product_name, num_recommendations=5):
    # Check if the product exists
    if product_name not in df_filtered['Product_Name'].values:
        print("Product not found in the selected category.")
        return None

    # Find index of the product
    idx = df_filtered[df_filtered['Product_Name'] == product_name].index[0]

    # Get similarity scores for all products in the selected category
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort products by similarity score (highest first)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top N similar products (excluding itself)
    sim_scores = sim_scores[1:num_recommendations+1]

    # Get recommended product indices
    product_indices = [i[0] for i in sim_scores]

    # Return top recommended products
    return df_filtered[['Product_ID', 'User_ID', 'Product_Name', 'Category', 'Material',
                        'Price', 'Description', 'Brand', 'Availability', 'Ratings']].iloc[product_indices]

# Step 9: Ask User for a Product Name in the Selected Category
print("\nAvailable Products in Category:", selected_category)
print(df_filtered['Product_Name'].tolist())

product_to_search = input("Enter the product name for recommendations: ").strip()

# Step 10: Get Recommendations
recommendations = recommend_products(product_to_search, 5)

if recommendations is not None:
    print("\nRecommended Products:")
    display(recommendations)


Available Categories: ['Toothbrush' 'Bag' 'Water Bottle' 'Clothing' 'Cutlery' 'Notebook' 'Shoes'
 'Toys' 'Straws' 'Phone Cases']


Enter the category you want recommendations from:  Bag



Number of products in 'Bag': 10

Columns in df_filtered: ['Product_ID', 'Product_Name', 'Category', 'Material', 'Price', 'Description', 'Brand', 'Availability', 'User_ID', 'Ratings']

Available Products in Category: Bag
['Jute Bag', 'Hemp Bag', 'Organic Cotton Bag', 'Recycled Plastic Bag', 'Cork Bag', 'Canvas Bag', 'Bamboo Fiber Bag', 'Paper Bag', 'Upcycled Denim Bag', 'Tyvek Bag']


Enter the product name for recommendations:  Organic Cotton Bag



Recommended Products:


Unnamed: 0,Product_ID,User_ID,Product_Name,Category,Material,Price,Description,Brand,Availability,Ratings
0,11,110,Jute Bag,Bag,Jute,8.28,Eco-friendly Bag made from Jute. Sustainable a...,EcoBrand,In Stock,8.77
1,12,111,Hemp Bag,Bag,Hemp,35.79,Eco-friendly Bag made from Hemp. Sustainable a...,EcoBrand,In Stock,8.47
4,15,114,Cork Bag,Bag,Cork,25.95,Eco-friendly Bag made from Cork. Sustainable a...,EcoBrand,In Stock,6.45
5,16,115,Canvas Bag,Bag,Canvas,16.58,Eco-friendly Bag made from Canvas. Sustainable...,EcoBrand,In Stock,8.64
7,18,117,Paper Bag,Bag,Paper,21.42,Eco-friendly Bag made from Paper. Sustainable ...,EcoBrand,In Stock,7.12


### COLABRATIVE FILTERING

In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import jaccard
import ipywidgets as widgets
from IPython.display import display, clear_output

# Sample DataFrame (Replace with your dataset)
df 

# Step 1: Get User ID Input
user_id = int(input("Enter User ID for recommendations: "))

# Step 2: Dropdown for Product Selection
product_dropdown = widgets.Dropdown(
    options=df['Product_Name'].unique(),
    description="Select Product:",
    disabled=False
)

# Step 3: Button to Trigger Recommendations
recommend_button = widgets.Button(
    description="Get Recommendations",
    button_style="primary"
)

# Output widget to display results
output = widgets.Output()

# Function to Calculate Jaccard Similarity
def calculate_jaccard_similarity(df):
    features = df[['Price', 'Ratings']].values
    binary_features = np.where(features > np.median(features, axis=0), 1, 0)
    sim_matrix = np.zeros((len(df), len(df)))

    for i in range(len(df)):
        for j in range(len(df)):
            sim_matrix[i][j] = 1 - jaccard(binary_features[i], binary_features[j])
    
    return sim_matrix

# Function to Get Recommendations
def recommend_products(user_id, product_name, num_recommendations=5):
    # Check if product exists
    if product_name not in df['Product_Name'].values:
        with output:
            clear_output()
            print("\n⚠️ Product not found in dataset.")
        return None

    # Compute Jaccard similarity
    similarity_matrix = calculate_jaccard_similarity(df)

    # Get index of selected product
    product_idx = df[df['Product_Name'] == product_name].index[0]

    # Find similar products
    sim_scores = list(enumerate(similarity_matrix[product_idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations + 1]

    # Get recommended product indices
    product_indices = [i[0] for i in sim_scores]

    # Display recommended products
    with output:
        clear_output()
        print("\n🔹 Recommended Products Based on:", product_name, " (Jaccard Similarity)")
        display(df.iloc[product_indices])

# Function to Handle Button Click
def on_button_click(b):
    selected_product = product_dropdown.value
    if selected_product:
        recommend_products(user_id, selected_product, 5)

# Link button to function
recommend_button.on_click(on_button_click)

# Display widgets
display(product_dropdown, recommend_button, output)


Enter User ID for recommendations:  102


Dropdown(description='Select Product:', options=('Bamboo Toothbrush', 'Recycled Plastic Toothbrush', 'Cornstar…

Button(button_style='primary', description='Get Recommendations', style=ButtonStyle())

Output()

### HYBRID FILTERING 

In [11]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import jaccard
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display, clear_output
# User Input Widgets
user_id_dropdown = widgets.IntText(value=1, description="User_ID:", disabled=False)
product_dropdown = widgets.Dropdown(options=df['Product_Name'].unique(), description="Select Product:", disabled=False)
recommend_button = widgets.Button(description="Get Recommendations", button_style="primary")
output = widgets.Output()
# Function to Compute Jaccard Similarity
def calculate_jaccard_similarity(df):
    df[['Price', 'Ratings']] = df[['Price', 'Ratings']].apply(pd.to_numeric, errors='coerce')
    binary_features = np.where(df[['Price', 'Ratings']] > df[['Price', 'Ratings']].median(), 1, 0)
    num_products = len(df)
    sim_matrix = np.zeros((num_products, num_products))
    for i in range(num_products):
        for j in range(num_products):
            if i != j:
                sim_matrix[i][j] = 1 - jaccard(binary_features[i], binary_features[j])
    return sim_matrix
# Function to Compute Content Similarity
def calculate_content_similarity(df):
    df['combined_features'] = df['Category'] + " " + df['Material'] + " " + df['Description']
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['combined_features'])
    return cosine_similarity(tfidf_matrix, tfidf_matrix)
# Hybrid Recommendation Function
def recommend_products(user_id, product_name, num_recommendations=5):
    with output:
        clear_output()
        if product_name not in df['Product_Name'].values:
            print(":warning: Product not found in dataset.")
            return
        jaccard_matrix = calculate_jaccard_similarity(df)
        content_matrix = calculate_content_similarity(df)
        product_idx = df[df['Product_Name'] == product_name].index[0]
        hybrid_scores = (jaccard_matrix[product_idx] + content_matrix[product_idx]) / 2
        sim_scores = list(enumerate(hybrid_scores))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:num_recommendations + 1]
        product_indices = [i[0] for i in sim_scores]
        print(":small_blue_diamond: Recommended Products Based on:", product_name)
        display(df.iloc[product_indices])
# Button Click Function
def on_button_click(b):
    selected_product = product_dropdown.value
    user_id = user_id_dropdown.value
    if selected_product:
        recommend_products(user_id, selected_product, 5)
recommend_button.on_click(on_button_click)
# Display Widgets
display(user_id_dropdown, product_dropdown, recommend_button, output)


IntText(value=1, description='User_ID:')

Dropdown(description='Select Product:', options=('Bamboo Toothbrush', 'Recycled Plastic Toothbrush', 'Cornstar…

Button(button_style='primary', description='Get Recommendations', style=ButtonStyle())

Output()