# Smart Gift Planner

## Objective

Create and design a High-Fidelity Mobile App Prototype for a Smart Gift Planner. The prototype must demonstrate the user flow from defining a gift recipient to viewing the personalized, algorithmic recommendations.

## Import Libraries

In [10]:
# Dataframes
import pandas as pd
pd.set_option('display.float_format', '{:.2f}'.format)

# AI
from google import genai
from dotenv import load_dotenv
import time
import os
import math
import ast

# Visualizations
import plotly.express as px

## Import Data

In [2]:
products = pd.read_csv("amazon_products.csv")
categories = pd.read_csv("amazon_categories.csv")

In [3]:
products.head()

Unnamed: 0,asin,title,imgUrl,productURL,stars,reviews,price,listPrice,category_id,isBestSeller,boughtInLastMonth
0,B014TMV5YE,"Sion Softside Expandable Roller Luggage, Black...",https://m.media-amazon.com/images/I/815dLQKYIY...,https://www.amazon.com/dp/B014TMV5YE,4.5,0,139.99,0.0,104,False,2000
1,B07GDLCQXV,Luggage Sets Expandable PC+ABS Durable Suitcas...,https://m.media-amazon.com/images/I/81bQlm7vf6...,https://www.amazon.com/dp/B07GDLCQXV,4.5,0,169.99,209.99,104,False,1000
2,B07XSCCZYG,Platinum Elite Softside Expandable Checked Lug...,https://m.media-amazon.com/images/I/71EA35zvJB...,https://www.amazon.com/dp/B07XSCCZYG,4.6,0,365.49,429.99,104,False,300
3,B08MVFKGJM,Freeform Hardside Expandable with Double Spinn...,https://m.media-amazon.com/images/I/91k6NYLQyI...,https://www.amazon.com/dp/B08MVFKGJM,4.6,0,291.59,354.37,104,False,400
4,B01DJLKZBA,Winfield 2 Hardside Expandable Luggage with Sp...,https://m.media-amazon.com/images/I/61NJoaZcP9...,https://www.amazon.com/dp/B01DJLKZBA,4.5,0,174.99,309.99,104,False,400


In [4]:
categories.head()

Unnamed: 0,id,category_name
0,1,Beading & Jewelry Making
1,2,Fabric Decorating
2,3,Knitting & Crochet Supplies
3,4,Printmaking Supplies
4,5,Scrapbooking & Stamping Supplies


In [5]:
# Merge the 2 dfs on category ID
merged = products.merge(
    categories,
    left_on="category_id",
    right_on="id",
    how="left"
)

# Remove numerical category identifiers
merged = merged.drop(columns=["id", "category_id"])

# Save to new .csv
merged.to_csv("merged_products.csv", index=False, encoding="utf-8")

# Save to JSON for SE/UI/UX
merged.to_json("merged_products.json", orient="records", lines=True)

merged.head()

Unnamed: 0,asin,title,imgUrl,productURL,stars,reviews,price,listPrice,isBestSeller,boughtInLastMonth,category_name
0,B014TMV5YE,"Sion Softside Expandable Roller Luggage, Black...",https://m.media-amazon.com/images/I/815dLQKYIY...,https://www.amazon.com/dp/B014TMV5YE,4.5,0,139.99,0.0,False,2000,Suitcases
1,B07GDLCQXV,Luggage Sets Expandable PC+ABS Durable Suitcas...,https://m.media-amazon.com/images/I/81bQlm7vf6...,https://www.amazon.com/dp/B07GDLCQXV,4.5,0,169.99,209.99,False,1000,Suitcases
2,B07XSCCZYG,Platinum Elite Softside Expandable Checked Lug...,https://m.media-amazon.com/images/I/71EA35zvJB...,https://www.amazon.com/dp/B07XSCCZYG,4.6,0,365.49,429.99,False,300,Suitcases
3,B08MVFKGJM,Freeform Hardside Expandable with Double Spinn...,https://m.media-amazon.com/images/I/91k6NYLQyI...,https://www.amazon.com/dp/B08MVFKGJM,4.6,0,291.59,354.37,False,400,Suitcases
4,B01DJLKZBA,Winfield 2 Hardside Expandable Luggage with Sp...,https://m.media-amazon.com/images/I/61NJoaZcP9...,https://www.amazon.com/dp/B01DJLKZBA,4.5,0,174.99,309.99,False,400,Suitcases


## Exploratory Data Analysis

In [6]:
merged.info()
print(merged.isna().sum())
merged.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1426337 entries, 0 to 1426336
Data columns (total 11 columns):
 #   Column             Non-Null Count    Dtype  
---  ------             --------------    -----  
 0   asin               1426337 non-null  object 
 1   title              1426336 non-null  object 
 2   imgUrl             1426337 non-null  object 
 3   productURL         1426337 non-null  object 
 4   stars              1426337 non-null  float64
 5   reviews            1426337 non-null  int64  
 6   price              1426337 non-null  float64
 7   listPrice          1426337 non-null  float64
 8   isBestSeller       1426337 non-null  bool   
 9   boughtInLastMonth  1426337 non-null  int64  
 10  category_name      1426337 non-null  object 
dtypes: bool(1), float64(3), int64(2), object(5)
memory usage: 110.2+ MB
asin                 0
title                1
imgUrl               0
productURL           0
stars                0
reviews              0
price                0
lis

Unnamed: 0,stars,reviews,price,listPrice,boughtInLastMonth
count,1426337.0,1426337.0,1426337.0,1426337.0,1426337.0
mean,4.0,180.75,43.38,12.45,141.98
std,1.34,1761.45,130.29,46.11,836.27
min,0.0,0.0,0.0,0.0,0.0
25%,4.1,0.0,11.99,0.0,0.0
50%,4.4,0.0,19.95,0.0,0.0
75%,4.6,0.0,35.99,0.0,50.0
max,5.0,346563.0,19731.81,999.99,100000.0


In [7]:
# How many products have reviews?
print((merged['reviews'] != 0).sum())

# How many products are best sellers?
print((merged['isBestSeller'] == True).sum())

295834
8520


In [8]:
# Drop row with missing product title
merged = merged.dropna(subset=['title'])

In [20]:
# Check for unique product categories before using AI to categorize them
print(merged['category_name'].unique())
print(len(merged['category_name'].unique()))

['Suitcases' "Men's Clothing" 'Xbox 360 Games, Consoles & Accessories'
 "Men's Shoes" "Men's Accessories" 'Vacuum Cleaners & Floor Care'
 'Televisions & Video Products' 'Additive Manufacturing Products'
 'Headphones & Earbuds' 'PlayStation Vita Games, Consoles & Accessories'
 'Wii U Games, Consoles & Accessories'
 'PlayStation 4 Games, Consoles & Accessories' "Boys' Watches"
 "Girls' Clothing" "Boys' Clothing" 'Pregnancy & Maternity Products'
 'Shaving & Hair Removal Products' 'Fabric Decorating'
 'Industrial Materials' 'Smart Home: Security Cameras and Systems'
 'Office Electronics' 'Sports & Outdoor Play Toys' "Kids' Play Tractors"
 'Slot Cars, Race Tracks & Accessories' 'Video Games'
 'Smart Home: Voice Assistants and Hubs' 'Light Bulbs' 'Toys & Games'
 "Kids' Furniture" 'Automotive Tires & Wheels'
 'Wellness & Relaxation Products' 'Automotive Tools & Equipment'
 'Baby & Toddler Toys' "Kids' Play Boats" 'Computer Monitors'
 "Girls' Jewelry" 'Luggage' 'Printmaking Supplies' "Women's 

The merged df only contains products from 248 out of a total of 270 possible product categories. Regardless, I will categorize all of them with Gemini.

## Broad Product Categorization (Gemini)

In [23]:
# -----------------------------
# Setup client
# -----------------------------
load_dotenv()
api_key_env = os.getenv("API_KEY")  # from local environment variable
client = genai.Client(api_key=api_key_env)

MODEL_NAME = "gemini-2.5-flash"

# -----------------------------
# Free tier batching parameters
# -----------------------------
batch_size = 5                # number of articles per batch (≤ free tier limit)
sleep_time = 35               # seconds to sleep between batches

# List of 270 Amazon categories
amazon_categories = merged['category_name'].unique()
total_categories = len(amazon_categories)
total_batches = (total_categories + batch_size - 1) // batch_size

# Function to categorize into broader categories
def categorize_batch(categories):
    prompt = f"""
You are an AI assistant. I have a list of Amazon product categories. 
Please categorize each of the following categories into a broader, high-level category such as 'Electronics', 'Clothing', 'Home', 'Beauty', 'Food', 'Sports', 'Toys', etc. 

Return the result as a Python dictionary where keys are the original categories and values are the broad category.
Categories:
{categories}
"""
    response = client.models.generate_content(
        model=MODEL_NAME,
        contents=prompt
    )
    
# Access the text
    text_response = response.text.strip()
    print(text_response)
    try:
        return ast.literal_eval(text_response)  # convert string dict to Python dict
    except:
        print("Error parsing batch, returning raw text")
        return text_response

# Loop over batches
all_results = {}
for i in range(total_batches):
    start_idx = i * batch_size
    end_idx = min(start_idx + batch_size, total_categories)
    batch = amazon_categories[start_idx:end_idx].tolist()
    
    print(f"Processing batch {i+1}/{total_batches}...")
    batch_result = categorize_batch(batch)
    
    if isinstance(batch_result, dict):
        all_results.update(batch_result)
    else:
        print(f"Batch {i+1} returned invalid format, saving as text")
        all_results[f"batch_{i+1}"] = batch_result
    
    if i < total_batches - 1:
        time.sleep(sleep_time)  # avoid exceeding free-tier rate limit

# Final results
print("Categorization complete!")
print(all_results)

# Use all_results to create a new df column
merged['broad_category'] = merged['category_name'].map(all_results)

Processing batch 1/50...


ServerError: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The model is overloaded. Please try again later.', 'status': 'UNAVAILABLE'}}

## Visualizations for Dashboard

### # Items per Category

In [None]:
category_counts = merged['category_name'].value_counts().reset_index()

fig = px.bar(category_counts, 
             x='category_name',
             y='count',
             color='category_name',
             title="Products per Category")
#fig.show()
fig.write_html("items_per_category.html")

### Price Distribution

In [None]:
max_price = 300
filtered = merged[merged['price'] <= max_price]

fig = px.histogram(filtered, 
                   x='price', 
                   nbins=100, 
                   title=f"Price Distribution (<= ${max_price})")
#fig.show()
fig.write_html("price_distribution.html")

### Rating vs. Reviews

In [None]:
fig = px.scatter(
    merged,
    x='stars',
    y='reviews',
    size='reviews',
    color='category_name',
    hover_data=['title', 'price', 'stars', 'reviews'],
    title="Product Ratings vs Review Count",
    size_max=30
)

fig.update_layout(yaxis_type='log')  # optional, if reviews range widely
#fig.show()
fig.write_html("rating_vs_reviews.html")