In [None]:
from flask import Flask, request, jsonify
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

app = Flask(__name__)

In [None]:
# Load product data
df = pd.read_csv("amazon_products_final.csv")
df['price'] = pd.to_numeric(df['price'], errors='coerce')

In [None]:
# Keywords for category-level matching
category_keywords = ['tshirt', 't-shirt','shirt', 'toothbrush', 'shelf', 'box', 'floss', 'bag', 'cup', 'bottle']

# Sustainability keywords
sustainability_keywords = {
    'bamboo': 10,
    'recycled': 8,
    'organic': 7,
    'eco': 7,
    'eco-friendly': 7,
    'biodegradable': 9,
    'sustainable': 10,
    'compostable': 8,
    'natural': 5,
    'plant-based': 6,
    'fair trade': 7,
    'vegan': 6,
    'reusable': 7
}

# Sustainability score calculator
def calculate_sustainability_score(title):
    base_score = 40
    keyword_score = 0
    title = title.lower()
    for keyword, value in sustainability_keywords.items():
        if re.search(r'\b' + re.escape(keyword) + r'\b', title):
            keyword_score += value
    return base_score + keyword_score * 3



In [10]:
# Vectorizer setup
vectorizer = TfidfVectorizer(stop_words="english")
vectorizer.fit(df['title'])

In [11]:
@app.route("/api/alternatives", methods=["POST"])
def get_alternatives():
    data = request.get_json()
    title = data.get("title", "")
    print("🔍 Received title:", title)

    if not title:
        return jsonify({"error": "Title is required"}), 400

    title_lower = title.lower()
    matched_keywords = [kw for kw in category_keywords if kw in title_lower]
    print("📌 Matched keywords:", matched_keywords)

    if not matched_keywords:
        print("❌ No matching keywords found.")
        return jsonify({"alternatives": []})

    # Step 2: Filter relevant products
    def has_keyword(text):
        return any(kw in text.lower() for kw in matched_keywords)

    filtered_df = df[df['title'].apply(has_keyword)]
    print(f"🔎 Filtered products count: {len(filtered_df)}")

    if filtered_df.empty:
        print("⚠️ No products matched the keyword filter.")
        return jsonify({"alternatives": []})

    # Step 3: Similarity calculation
    title_vec = vectorizer.transform([title])
    product_vecs = vectorizer.transform(filtered_df['title'])
    similarities = cosine_similarity(title_vec, product_vecs).flatten()

    filtered_df = filtered_df.copy()
    filtered_df['similarity'] = similarities

    # Log similarity scores
    print("🧪 Top 5 similar products by score:")
    print(filtered_df[['title', 'similarity']].sort_values(by='similarity', ascending=False).head(5))

    # Step 4: Apply sustainability threshold
    better_alternatives = filtered_df[filtered_df['sustainability_score'] > 100]
    print(f"🌱 Better alternatives count: {len(better_alternatives)}")

    if better_alternatives.empty:
        print("⚠️ No better alternatives found. Using top similar products as fallback.")
        better_alternatives = filtered_df

    top_matches = better_alternatives.sort_values(
        ['similarity', 'sustainability_score'], ascending=[False, False]
    ).head(5)

    print("✅ Top matches:", top_matches['title'].tolist())

    result = top_matches[['title', 'sustainability_score', 'price', 'productURL']].to_dict(orient='records')
    sus_score=calculate_sustainability_score(title)
    return jsonify({"alternatives": result,
                    "sustainability_score":sus_score
                    })



In [12]:
if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


🔍 Received title: T-Shirt
📌 Matched keywords: ['t-shirt', 'shirt']
🔎 Filtered products count: 29468


127.0.0.1 - - [19/Apr/2025 22:17:26] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                title  similarity
921025        T-Shirt    1.000000
88882   Kids' T-Shirt    0.830013
227745  Girls T-Shirt    0.823093
227951  Girls T-Shirt    0.823093
227626  Girls T-Shirt    0.823093
🌱 Better alternatives count: 5
✅ Top matches: ['[100 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopping Bags | Green Eco Plastic Bags (100 per Pack) | T-Shirt Carryout Bags 100 count Restaurant Quality, Durable, Reusable and Econ Friendly | Measures 11. 5" X 6. 25" X 21"( large size 1/6) , 16 Mic (0. 63 Mil)', '[200 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopping Bags | Green Eco Plastic Bags (200 per Pack) | T-Shirt Carryout Bags 200 count Restaurant Quality, Durable, Reusable and Econ Friendly | Measures 11. 5" X 6. 25" X 21"( large size 1/6) , 16 Mic (0. 63 Mil)', '[500 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocer

127.0.0.1 - - [19/Apr/2025 22:17:45] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                    title  similarity
8888                Men's Polo Shirt Nylon/Elastane Blend    0.642639
823724   Women's Slim-Fit Half Sleeve Square Neck T-Shirt    0.582106
88950   Girls Turtleneck Shirt Cotton Turtle Neck T Sh...    0.571591
1861    Men's Slim-Fit Short-Sleeve V-Neck T-Shirt, Pa...    0.537630
11045   Men's Slim-Fit Short-Sleeve V-Neck T-Shirt, Pa...    0.537630
🌱 Better alternatives count: 5
✅ Top matches: ["Men's Organic Bamboo Polo T-Shirt  Plain Solid Regular Fit Collar Neck Half Sleeves  Eco-Friendly Breathable Comfort Perfect for Casual and Everyday Wear", '[100 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopping Bags | Green Eco Plastic Bags (100 per Pack) | T-Shirt Carryout Bags 100 count Restaurant Quality, Durable, Reusable and Econ Friendly | Measures 11. 5" X 6. 25" X 21"( large size 1/6) , 16 Mic (0. 63 Mil)', '[200 per box] | Recyclable Co

127.0.0.1 - - [19/Apr/2025 22:18:46] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                    title  similarity
8888                Men's Polo Shirt Nylon/Elastane Blend    0.642639
823724   Women's Slim-Fit Half Sleeve Square Neck T-Shirt    0.582106
88950   Girls Turtleneck Shirt Cotton Turtle Neck T Sh...    0.571591
1861    Men's Slim-Fit Short-Sleeve V-Neck T-Shirt, Pa...    0.537630
11045   Men's Slim-Fit Short-Sleeve V-Neck T-Shirt, Pa...    0.537630
🌱 Better alternatives count: 5
✅ Top matches: ["Men's Organic Bamboo Polo T-Shirt  Plain Solid Regular Fit Collar Neck Half Sleeves  Eco-Friendly Breathable Comfort Perfect for Casual and Everyday Wear", '[100 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopping Bags | Green Eco Plastic Bags (100 per Pack) | T-Shirt Carryout Bags 100 count Restaurant Quality, Durable, Reusable and Econ Friendly | Measures 11. 5" X 6. 25" X 21"( large size 1/6) , 16 Mic (0. 63 Mil)', '[200 per box] | Recyclable Co

127.0.0.1 - - [19/Apr/2025 22:19:09] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                    title  similarity
567534  Small Storage Containers, 4 Packs Rectangular ...    0.470639
568609  Clear Plastic Beads Storage Containers Empty M...    0.469225
567877  Clear Plastic Beads Storage Containers Empty M...    0.453825
567077  Clear Plastic Beads Storage Containers Empty M...    0.449915
570987  30pcs Small Clear Plastic Beads Storage Contai...    0.431752
🌱 Better alternatives count: 12
✅ Top matches: ["100% Home Compostable 50 Pack Disposable Clamshell Take Out Food Containers, 8X8'' 1-Compartment to go Containers, Heavy-Duty to go Boxes, Eco-Friendly Biodegradable, Made of Sugarcane Fibers", '100% Compostable Clamshell To Go Boxes For Food [8X8 3-Compartment 50-Pack] Disposable Take Out Containers, Made of Biodegradable Sugar Cane, Eco-Friendly Bagasse, Heavy-Duty ToGo Containers', 'Bamboo Facial Tissues Box by Cloud Paper - 12 Bamboo Tissue Boxes, 100 Hypoallergenic Facial Tissues per Bo

127.0.0.1 - - [19/Apr/2025 22:19:41] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                  title  similarity
1424  Men's Organic Bamboo Polo T-Shirt  Plain Solid...    1.000000
2663  Men's Dress Shirt Regular Fit Casual Long Slee...    0.395155
6239  Men's Dress Shirt Regular Fit Stretch Solid Bu...    0.389496
2504                Men's Dress Shirt Regular Fit Solid    0.386531
2551  Men's Dress Shirt Regular Fit Flex Collar Stre...    0.380635
🌱 Better alternatives count: 5
✅ Top matches: ["Men's Organic Bamboo Polo T-Shirt  Plain Solid Regular Fit Collar Neck Half Sleeves  Eco-Friendly Breathable Comfort Perfect for Casual and Everyday Wear", 'Eco-Friendly Plastic T-Shirt Bags | 100 per Box | Recyclable, Compostable, Biodegradable & Reusable | Grocery Shopping Bags | Restaurant Quality | Durable & Economical | Large Size 1/6 (11.5" x 6.25" x 21") | 16 Mic (0.63 Mil) Thickness | Pack of 100', '[100 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopp

127.0.0.1 - - [19/Apr/2025 22:20:14] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                  title  similarity
1424  Men's Organic Bamboo Polo T-Shirt  Plain Solid...    1.000000
2663  Men's Dress Shirt Regular Fit Casual Long Slee...    0.395155
6239  Men's Dress Shirt Regular Fit Stretch Solid Bu...    0.389496
2504                Men's Dress Shirt Regular Fit Solid    0.386531
2551  Men's Dress Shirt Regular Fit Flex Collar Stre...    0.380635
🌱 Better alternatives count: 5
✅ Top matches: ["Men's Organic Bamboo Polo T-Shirt  Plain Solid Regular Fit Collar Neck Half Sleeves  Eco-Friendly Breathable Comfort Perfect for Casual and Everyday Wear", 'Eco-Friendly Plastic T-Shirt Bags | 100 per Box | Recyclable, Compostable, Biodegradable & Reusable | Grocery Shopping Bags | Restaurant Quality | Durable & Economical | Large Size 1/6 (11.5" x 6.25" x 21") | 16 Mic (0.63 Mil) Thickness | Pack of 100', '[100 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopp

127.0.0.1 - - [19/Apr/2025 22:21:28] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                    title  similarity
567534  Small Storage Containers, 4 Packs Rectangular ...    0.470639
568609  Clear Plastic Beads Storage Containers Empty M...    0.469225
567877  Clear Plastic Beads Storage Containers Empty M...    0.453825
567077  Clear Plastic Beads Storage Containers Empty M...    0.449915
570987  30pcs Small Clear Plastic Beads Storage Contai...    0.431752
🌱 Better alternatives count: 12
✅ Top matches: ["100% Home Compostable 50 Pack Disposable Clamshell Take Out Food Containers, 8X8'' 1-Compartment to go Containers, Heavy-Duty to go Boxes, Eco-Friendly Biodegradable, Made of Sugarcane Fibers", '100% Compostable Clamshell To Go Boxes For Food [8X8 3-Compartment 50-Pack] Disposable Take Out Containers, Made of Biodegradable Sugar Cane, Eco-Friendly Bagasse, Heavy-Duty ToGo Containers', 'Bamboo Facial Tissues Box by Cloud Paper - 12 Bamboo Tissue Boxes, 100 Hypoallergenic Facial Tissues per Bo

127.0.0.1 - - [19/Apr/2025 22:21:45] "POST /api/alternatives HTTP/1.1" 200 -


🧪 Top 5 similar products by score:
                                                    title  similarity
8888                Men's Polo Shirt Nylon/Elastane Blend    0.642639
823724   Women's Slim-Fit Half Sleeve Square Neck T-Shirt    0.582106
88950   Girls Turtleneck Shirt Cotton Turtle Neck T Sh...    0.571591
1861    Men's Slim-Fit Short-Sleeve V-Neck T-Shirt, Pa...    0.537630
11045   Men's Slim-Fit Short-Sleeve V-Neck T-Shirt, Pa...    0.537630
🌱 Better alternatives count: 5
✅ Top matches: ["Men's Organic Bamboo Polo T-Shirt  Plain Solid Regular Fit Collar Neck Half Sleeves  Eco-Friendly Breathable Comfort Perfect for Casual and Everyday Wear", '[100 per box] | Recyclable Compostable Reusable Biodegradable Plastic T-Shirt Bags | Grocery Shopping Bags | Green Eco Plastic Bags (100 per Pack) | T-Shirt Carryout Bags 100 count Restaurant Quality, Durable, Reusable and Econ Friendly | Measures 11. 5" X 6. 25" X 21"( large size 1/6) , 16 Mic (0. 63 Mil)', '[200 per box] | Recyclable Co