# Data Prep
This first section is used for reading in raw data files and creating subsets of data that can be used for development

In [1]:
import pandas as pd
import json
import random
import ast
import requests
import io
import os
import platform

In [2]:
# set folder path to data when using Linux or Windows

if os.name == 'posix' or platform.system() == 'Linux':  # For Ubuntu/Linux
    path_char = "/"
elif os.name == 'nt' or platform.system() == 'Windows':  # For Windows
    path_char = "\\"

In [4]:
# read in jsonl files for all Appliances reviews
data = []
file = '..'+path_char+'..'+path_char+'Appliances.jsonl'
with open(file, 'r') as fp:
    for line in fp:
        data.append(json.loads(line.strip()))

In [5]:
# read in jsonl files for meta_Appliances for all products
meta_data = []
file = '..'+path_char+'..'+path_char+'meta_Appliances.jsonl'
with open(file, 'r') as fp:
    for line in fp:
        meta_data.append(json.loads(line.strip()))

In [6]:
# look at size of datasets
print("data length: ", len(data))
print("meta data lenth: ", len(meta_data))

data length:  2128605
meta data lenth:  94327


In [7]:
# define function to split data into chunks
def split_list(data, chunk_size):
    # Shuffle the data randomly
    random.shuffle(data)
    
    # Split the list into chunks of specified size
    return [data[i:i + chunk_size] for i in range(0, len(data), chunk_size)]

# spit data into smaller chunks
smaller_lists_appliances = split_list(data, 200000)
smaller_lists_meta = split_list(meta_data, 30000)


In [8]:
# save smaller appliance files
for i, chunk in enumerate(smaller_lists_appliances):
    # Define the filename for each chunk (e.g., "list_1.txt", "list_2.txt", etc.)
    filename = 'data'+path_char+f'Appliances_{i+1}.txt'
    with open(filename, 'w') as file:
        # Write each item from the list to a new line in the file
        file.write('\n'.join(map(str, chunk)))

In [9]:
# save smaller meta files 
for i, chunk in enumerate(smaller_lists_meta):
    # Define the filename for each chunk (e.g., "list_1.txt", "list_2.txt", etc.)
    filename = 'data'+path_char+f'meta_{i+1}.txt'
    with open(filename, 'w') as file:
        # Write each item from the list to a new line in the file
        file.write('\n'.join(map(str, chunk)))

In [10]:
# # read in smaller Appliance and meta files
# with open('..'+path_char+'Project Main'+path_char+'data'+path_char+'Appliances_1.txt', 'r') as file:
#     appliances = file.read().splitlines()  
# with open('..'+path_char+'Project Main'+path_char+'data'+'path_char'+path_char+'meta_4.txt', 'r') as file:
#     meta = file.read().splitlines()  

# # put Appliance and meta data into data frames
# parsed_data = [ast.literal_eval(item) for item in appliances]
# df_appliance = pd.DataFrame(parsed_data)

# parsed_data = [ast.literal_eval(item) for item in meta]
# df_meta = pd.DataFrame(parsed_data)


In [11]:
# read in all files and create a dataframe, then save to parquet file
df_appliances = []
df_metas = []

#combine all appliance files
for i in range(1, 11 + 1):
    print(f"Appliance file: {i}")
    filename = '..'+path_char+'Project Main'+path_char+'data'+path_char+f'Appliances_{i}.txt'
    with open(filename, 'r') as file:
        appliances = file.read().splitlines()  

    parsed_data = [ast.literal_eval(item) for item in appliances]
    df_appliance = pd.DataFrame(parsed_data)

    df_appliances.append(df_appliance)

# combine all meta files
for i in range(1, 4 + 1):
    print(f"meta file: {i}")
    filename = '..'+path_char+'Project Main'+path_char+'data'+path_char+f'meta_{i}.txt'
    with open(filename, 'r') as file:
        meta = file.read().splitlines()  

    parsed_data = [ast.literal_eval(item) for item in meta]
    df_meta = pd.DataFrame(parsed_data)

    df_metas.append(df_meta)

combined_df_appliance = pd.concat(df_appliances, ignore_index=True)
combined_df_meta = pd.concat(df_metas, ignore_index=True)

#write large data frames to parquet files
combined_df_appliance.to_parquet('..'+path_char+'..'+path_char+'Appliance_file_large.parquet',compression='Brotli', engine='pyarrow')
combined_df_meta.to_parquet('..'+path_char+'..'+path_char+'meta_file_large.parquet', compression='Brotli',engine='pyarrow')



Appliance file: 1
Appliance file: 2
Appliance file: 3
Appliance file: 4
Appliance file: 5
Appliance file: 6
Appliance file: 7
Appliance file: 8
Appliance file: 9
Appliance file: 10
Appliance file: 11
meta file: 1
meta file: 2
meta file: 3
meta file: 4


In [None]:
# this code can be used to pull the combined large datafiles from google drive.  The file format is parquet.

import gdown  # for downloading from google drive

# pull large Appliance files from google drive
file_id = '17-NNDAjDtI-PhQJ2P8U-O-l9sZhI71rp'
download_url = f'https://drive.google.com/uc?id={file_id}'
gdown.download(download_url, 'downloaded_file.parquet', quiet=False)
Appliance_file_large = pd.read_parquet('downloaded_file.parquet')

# Display the dataframe
Appliance_file_large.head()

# pull large meta file from google drive
file_id = '19F34NIW3QVfzd1g7BuhR6DBiFHbSVRbH'
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
response = requests.get(download_url)
response.raise_for_status()  
file_content = io.BytesIO(response.content)
meta_file_large = pd.read_parquet(file_content)

# Display the dataframe
meta_file_large.head()

In [13]:
# read in large dataset from parquet to confirm correct
meta_df_read_large = pd.read_parquet('..'+path_char+'..'+path_char+'meta_file_large.parquet', engine='pyarrow')
meta_df_read_large

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Amazon Home,Upgraded Lifetime Appliance W10752646 Defrost ...,4.6,23,"[✅ Part Numbers: W10752646, AP5956381, C894670...","[Part Numbers: W10752646, AP5956381, C8946703,...",,[{'hi_res': 'https://m.media-amazon.com/images...,[],Lifetime Appliance Parts,"[Appliances, Parts & Accessories]","{'': None, 'AC Adapter Current': None, 'Access...",B075FF3DT7,,,
1,Industrial & Scientific,Supplying Demand WR60X162 WR60X0162 Refrigerat...,4.3,12,[Please see Model Number fitment information a...,[],22.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'How To Find The Model Number On Yo...,Supplying Demand,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B08BJC7BVQ,,,
2,Tools & Home Improvement,Kitchen Basics 101 WD15X10014 WD15X10011 Dishw...,4.6,195,[【DESCRIPTION】This water inlet valve replaces ...,[],19.41,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Supplying Demand WD15X10011 Produc...,KITCHEN BASICS 101,"[Appliances, Parts & Accessories, Dishwasher P...","{'': None, 'AC Adapter Current': None, 'Access...",B07K3P1BNZ,,,
3,Appliances,Lorenz Butterfly 811195 Range Hood Vent Knob R...,3.9,16,[❤[Overview] 811195 Ventilation Hood Knob for ...,"[❤811195 Vent Hood Knob for Sub-Zero Wolf Pro,...",11.55,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': '811195 Wolf Hood Knob', 'url': 'ht...",Lorenz Butterfly,"[Appliances, Parts & Accessories, Range Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B083WJBZS1,,,
4,Tools & Home Improvement,Whirlpool 4453909 Surface Element for Range,3.6,25,[Works with the following models: Whirlpool GJ...,"[Product Description, This is a Genuine Origin...",,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Whirlpool,"[Appliances, Parts & Accessories, Range Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B004XL1KLU,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94322,,Replacement Water Filter Cartridge for Samsung...,4.2,35,"[Filter Capacity: 300 Gallon, Filter Life: 6-M...",[Replacement Filter for: Samsung DA29-00020BFi...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],BLOSSOMZ,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B074QQ54Q7,,,
94323,Amazon Home,Range Kleen Stove/Counter Mat 8.5 in. x 20 in....,5.0,1,[],[Range Kleen Stove/Counter Mat 8.5 in. x 20 in...,39.36,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Range Kleen,"[Appliances, Parts & Accessories, Range Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B01N3PKU9U,,,
94324,Industrial & Scientific,"AGLUCKY Nugget Ice Maker Countertop, Portable ...",4.2,86,[【CHEWABLE NUGGET ICE IN MINUTES】Cold drinks r...,[],229.99,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': 'Demo - Fast Icemaker, Ice within m...",AGLUCKY,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0CB5FKT63,,,
94325,Amazon Home,2-Pack Replacement for Whirlpool GI5FSAXVY00 R...,4.2,7,[Replacement for UKF8001 Filter],"[This is a Denali Pure Brand replacement part,...",21.99,[{'hi_res': 'https://m.media-amazon.com/images...,[],Upstart Battery,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B00YD3931W,,,


In [14]:
#randomly select a subset of appliance data that can be used for development.  A larger dataset will be used once model is developed

#choose random subset
subset_appliances = split_list(data, 50000)
subset_appliances_df = pd.DataFrame(subset_appliances[0])

# find unique asin's from subset of data
asin_unique= subset_appliances_df['parent_asin'].unique()
subset_meta_df = meta_df_read_large.loc[meta_df_read_large['parent_asin'].isin(asin_unique)]

# save subset to parquet files
subset_meta_df.to_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'meta_file_subset.parquet', compression='Brotli',engine='pyarrow')
subset_appliances_df.to_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'Appliance_file_subset.parquet', compression='Brotli',engine='pyarrow')
subset_meta_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
2,Tools & Home Improvement,Kitchen Basics 101 WD15X10014 WD15X10011 Dishw...,4.6,195,[【DESCRIPTION】This water inlet valve replaces ...,[],19.41,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Supplying Demand WD15X10011 Produc...,KITCHEN BASICS 101,"[Appliances, Parts & Accessories, Dishwasher P...","{'': None, 'AC Adapter Current': None, 'Access...",B07K3P1BNZ,,,
3,Appliances,Lorenz Butterfly 811195 Range Hood Vent Knob R...,3.9,16,[❤[Overview] 811195 Ventilation Hood Knob for ...,"[❤811195 Vent Hood Knob for Sub-Zero Wolf Pro,...",11.55,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': '811195 Wolf Hood Knob', 'url': 'ht...",Lorenz Butterfly,"[Appliances, Parts & Accessories, Range Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B083WJBZS1,,,
9,Tools & Home Improvement,Samsung Cap Drain Pp - - Cntk-P- DC61-10673A,4.8,123,"[Samsung Cap Drain Pp, Cntk-P, DC61-10673A]",[Samsung Cap Drain Pp - Cntk-P- DC61-10673A],5.42,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Blutoget creates a happy life for ...,SAMSUNG,"[Appliances, Parts & Accessories, Washer Parts...","{'': None, 'AC Adapter Current': None, 'Access...",B00ZPMMEJS,,,
11,Amazon Home,Wood Texture Floral Kitchen Decor Dishwasher M...,3.0,1,[★Material:PVC and PET films and magnets.Durab...,[Do you want simple stickers to make your kitc...,37.85,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Appliance Art Magnetic Dishwasher ...,FRES,"[Appliances, Parts & Accessories, Dishwasher P...","{'': None, 'AC Adapter Current': None, 'Access...",B0B24S35KG,,,
12,Amazon Home,Think Crucial 6 Replacements for Honeywell HC-...,3.8,13,[],[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],Crucial Air,"[Appliances, Parts & Accessories, Humidifier P...","{'': None, 'AC Adapter Current': None, 'Access...",B00K88G7YC,,,


# Reviews prep for model
This section is used to prep the data from evaluation in the model

In [11]:
# load libraries
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')

# define characters to remove and stop words
regex = re.compile("[^a-zA-Z ]")
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/laserlon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/laserlon/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [16]:
# function to clean text
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = regex.sub(' ', text) # Substitute everything that is not a letter with an empty string
    words = word_tokenize(text) # tokenize text
    words = [word for word in words if word not in stop_words]  # Remove stopwords
    return words


# read in subset of data files
meta_subset_df = pd.read_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'meta_file_subset.parquet', engine='pyarrow')
Appliance_subset_df = pd.read_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'Appliance_file_subset.parquet', engine='pyarrow')


# Apply the preprocessing function to your text column
Appliance_subset_df['text_clean'] = Appliance_subset_df['text'].apply(preprocess_text)

In [17]:
Appliance_subset_df.iloc[10]

rating                                                             5.0
title                Best filter. Greatly improved indoor air quali...
text                 Works perfectly. No more dust. Very much impro...
images                                                              []
asin                                                        B008RNPOIU
parent_asin                                                 B008RNPOIU
user_id                                   AFAD5KA7XL33L2JNJHAYTM5LKGIQ
timestamp                                                1518204752796
helpful_vote                                                         0
verified_purchase                                                 True
text_clean           [works, perfectly, dust, much, improves, air, ...
Name: 10, dtype: object

In [18]:
# view raw vs processed reviews
Appliance_subset_df[['text','text_clean']].iloc[5]

text          Filters came really fast and were a great pric...
text_clean    [filters, came, really, fast, great, price, wo...
Name: 5, dtype: object

# flatten meta data
This section flattens the meta data.  Categores is a list of all products and needs to be flattened in order to use.

In [19]:
# define a list of products to be used as part of the app
my_list = ['Dishwashers','Freezers','Ranges','Ice Makers','Cooktops','Refrigerators','Beverage Refrigerators','Dryers','Washers']

# remove parts and accessories from data
mask = ~meta_subset_df.apply(lambda col: col.map(lambda x: 'Parts & Accessories' in str(x))).any(axis=1)
filtered_meta_df = meta_subset_df[mask]

# select specific items
mask = filtered_meta_df.apply(lambda col: col.map(lambda x: any(item in str(x) for item in my_list))).any(axis=1)
 
# create a dataframe of filtered dataset
filtered_meta_df = filtered_meta_df[mask]
filtered_meta_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
50,Industrial & Scientific,"Kndko Nugget Ice Maker with Chewy Ice,High Ice...",4.1,20,"[Good Taste, Soft, Crushed Ice for freezing dr...",[],299.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Silonn Nugget Ice Maker Countertop...,Kndko,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0BZH3YWD7,,,
149,Health & Personal Care,O3 Pure Professional Eco Laundry Washer System...,4.5,594,[Don't settle for older technology. The O3 Pur...,[],297.0,"[{'hi_res': None, 'large': 'https://m.media-am...","[{'title': 'Lasted about a year', 'url': 'http...",O3 Pure,"[Appliances, Laundry Appliances, Washers & Dry...","{'': None, 'AC Adapter Current': None, 'Access...",B00AUWTYN2,,,
270,Appliances,Maytag Bravos Series MVWB850WL 28 5 cu. Ft. To...,1.9,100,"[Maytag, Maytag Bravos Series MVWB850WL 28 Top...",[Maytag Bravos Series MVWB850WL 28 TopLoader W...,,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Maytag,"[Appliances, Laundry Appliances, Washers & Dry...","{'': None, 'AC Adapter Current': None, 'Access...",B002LSO9WE,,,
336,Appliances,Air King QZ2368 Quiet Zone 36-Inch Stainless S...,2.9,19,[Lets your hood be noticed for the stylized at...,"[From the Manufacturer, Let your range hood be...",,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Air King,"[Appliances, Ranges, Ovens & Cooktops, Range H...","{'': None, 'AC Adapter Current': None, 'Access...",B001561CNC,,,
491,Tools & Home Improvement,"AKDY 30"" Under Cabinet Stainless Steel Kitchen...",4.0,131,"[567 CFM, 65dB at Max Speed, Dishwasher Friend...","[Beautiful, sleek, and modern. This AKDY RH030...",,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': 'Great Strong fan.', 'url': 'https:...",AKDY,"[Appliances, Ranges, Ovens & Cooktops, Range H...","{'': None, 'AC Adapter Current': None, 'Access...",B00OBXXD42,,,


In [20]:
# flatten the categories field into individual columns

# create a df of just categories
df = pd.DataFrame(filtered_meta_df['categories'])
 
# Convert the list in the categories column to strings to work with the values
df['categories'] = df['categories'].apply(lambda x: ', '.join(x))

# Split the lists into unique categories
unique_categories = set([item for sublist in df['categories'].str.split(', ') for item in sublist])

# Create new columns for each unique category
for category in unique_categories:
    df[category] = df['categories'].apply(lambda x: 1 if category in x else 0)

#concat the two, original and flattened
meta_subset_flat_df = pd.concat([filtered_meta_df,df],axis=1)
meta_subset_flat_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,...,Range Hoods,Washers,Built-In Dishwashers,Slide-In Ranges,Kegerators,Chest Freezers,Washers & Dryers,Double Wall Ovens,Beverage Refrigerators,Single Wall Ovens
50,Industrial & Scientific,"Kndko Nugget Ice Maker with Chewy Ice,High Ice...",4.1,20,"[Good Taste, Soft, Crushed Ice for freezing dr...",[],299.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Silonn Nugget Ice Maker Countertop...,Kndko,...,0,0,0,0,0,0,0,0,0,0
149,Health & Personal Care,O3 Pure Professional Eco Laundry Washer System...,4.5,594,[Don't settle for older technology. The O3 Pur...,[],297.0,"[{'hi_res': None, 'large': 'https://m.media-am...","[{'title': 'Lasted about a year', 'url': 'http...",O3 Pure,...,0,1,0,0,0,0,1,0,0,0
270,Appliances,Maytag Bravos Series MVWB850WL 28 5 cu. Ft. To...,1.9,100,"[Maytag, Maytag Bravos Series MVWB850WL 28 Top...",[Maytag Bravos Series MVWB850WL 28 TopLoader W...,,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Maytag,...,0,1,0,0,0,0,1,0,0,0
336,Appliances,Air King QZ2368 Quiet Zone 36-Inch Stainless S...,2.9,19,[Lets your hood be noticed for the stylized at...,"[From the Manufacturer, Let your range hood be...",,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Air King,...,1,0,0,0,0,0,0,0,0,0
491,Tools & Home Improvement,"AKDY 30"" Under Cabinet Stainless Steel Kitchen...",4.0,131,"[567 CFM, 65dB at Max Speed, Dishwasher Friend...","[Beautiful, sleek, and modern. This AKDY RH030...",,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': 'Great Strong fan.', 'url': 'https:...",AKDY,...,1,0,0,0,0,0,0,0,0,0


In [21]:
# look at columns in flattened meta dataframe
meta_subset_flat_df.columns

Index(['main_category', 'title', 'average_rating', 'rating_number', 'features',
       'description', 'price', 'images', 'videos', 'store', 'categories',
       'details', 'parent_asin', 'bought_together', 'subtitle', 'author',
       'categories', '', 'Cooktops', 'Appliances', 'Dishwashers',
       'Freezers & Ice Makers', 'Dryers', 'Portable Dryers',
       'Countertop Dishwashers', 'Portable Washers', 'Ice Makers', 'Freezers',
       'Laundry Appliances', 'Wall Ovens', 'Portable Dishwashers',
       'Combination Microwave & Wall Ovens', 'Ovens & Cooktops',
       'All-in-One Combination Washers & Dryers', 'Refrigerators',
       'Upright Freezers', 'Stacked Washer & Dryer Units',
       'Freestanding Ranges', 'Ranges', 'Range Hoods', 'Washers',
       'Built-In Dishwashers', 'Slide-In Ranges', 'Kegerators',
       'Chest Freezers', 'Washers & Dryers', 'Double Wall Ovens',
       'Beverage Refrigerators', 'Single Wall Ovens'],
      dtype='object')

In [22]:
# function to find top 3 and bottom 3 reviews
def find_top_bottom(Appliance_df,meta_flat_df,product_type):
    # find meta data that match product type
    selected_product = meta_flat_df[(meta_flat_df[product_type]==1)]['parent_asin'].unique()
    # find reviews that match product type
    reviews_with_match_product = Appliance_df[Appliance_df['parent_asin'].isin(selected_product)]
    # calculate the mean rating for each parent_asin or product type
    product_ratings_means = reviews_with_match_product.groupby('parent_asin')['rating'].mean().sort_values(ascending=False).reset_index()
    # find top 3 and bottom 3
    top3 = product_ratings_means[:].head(3)['parent_asin']
    bottom3 = product_ratings_means[:].tail(3)['parent_asin']
    # return top 3 and bottom 3 products
    return meta_subset_df[meta_subset_df['parent_asin'].isin(top3)], meta_subset_df[meta_subset_df['parent_asin'].isin(bottom3)]


top3,bottom3 = find_top_bottom(Appliance_subset_df,meta_subset_flat_df,product_type='Chest Freezers')

In [23]:
# view top 3
top3

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
14348,Appliances,Haier HF50CM23NW 5.0 cu. ft. Capacity Chest Fr...,4.4,239,"[Holds up to 175lbs of frozen food, Adjustable...",[Storage for up to 175 lbs. Storage space to a...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],Haier,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B00C4R0AL6,,,
57594,Appliances,"Midea Single Door Chest Freezer, 2.1 Cubic Fee...",3.7,53,[Mechanical Control with Adjustable Thermostat...,[],,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Midea,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B00SUV4OJE,,,
79166,Appliances,"Avanti CF6216E Chest Freezer, 2.1 cu. ft, White",3.0,8,"[Top Loading Flip Up Lid, Stackable Storage, A...",[CF6216E Features: -White color. Product Type:...,,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Avanti,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B00P23529O,,,


In [24]:
# view bottom 3
bottom3

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
41966,Appliances,Haier HF50CM23NW 5.0 cu. ft. Capacity Chest Fr...,4.4,237,"[Holds up to 175lbs of frozen food, Adjustable...",[Storage for up to 175 lbs. Storage space to a...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],Haier,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0198ZXHEI,,,
52333,Office Products,Magic Chef 3.5 Cf Chest Freezer White - MCCF35W3,4.2,61,"[Manual defrost with bottom front water drain,...",[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],Magic Chef,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B07QBM85V6,,,
73990,Appliances,"Hisense FC72D7AWD 7.2 cu. ft. Chest Freezer,...",1.9,19,"[Convenient Center-located handle, Low Noise o...",[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],Hisense,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B01FH0JKI0,,,


# Run dataset against model
This section runs all the reviews agains the newly trained roberta model

In [16]:
# # load libraries
# import tensorflow as tf
# #from tensorflow.keras.layers import TFSMLayer
# from transformers import TFRobertaForSequenceClassification

In [17]:

# # # Load the model layer
# # model_layer = TFSMLayer('..'+path_char+'..'+path_char+'trained_model', call_endpoint='serving_default')

# # # Wrap in a Keras model
# # class WrappedModel(tf.keras.Model):
# #     def __init__(self, model_layer):
# #         super(WrappedModel, self).__init__()
# #         self.model_layer = model_layer

# #     def call(self, inputs):
# #         return self.model_layer(inputs)
    

# # # Initialize the wrapped model
# # wrapped_model = WrappedModel(model_layer)

# #model = tf.keras.models.load_model('..'+path_char+'..'+path_char+'trained_model')

# model = TFRobertaForSequenceClassification.from_pretrained('..'+path_char+'..'+path_char+'trained_model_undersample')

In [18]:

# text_test = pd.DataFrame(["i really hate this product"], columns=["text"])

In [19]:
# text_test

In [20]:


# import re
# import nltk
# from nltk.corpus import stopwords
# from nltk.tokenize import word_tokenize
# from transformers import RobertaTokenizer
# nltk.download('stopwords')
# nltk.download('punkt')

# # define characters to remove and stop words
# regex = re.compile("[^a-zA-Z ]")
# stop_words = set(stopwords.words('english'))

# # function to clean text
# def preprocess_text(text):
#     text = text.lower()  # Convert to lowercase
#     text = regex.sub(' ', text) # Substitute everything that is not a letter with an empty string
#     words = word_tokenize(text) # tokenize text
#     words = [word for word in words if word not in stop_words]  # Remove stopwords
#     return ' '.join(words)


# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')


In [21]:

# #text_test = pd.DataFrame(['this is the best product ever'], columns=["text"])
# #text_test = pd.DataFrame(["Melted after first use and did not fit."], columns=["text"])
# #text_test = pd.DataFrame(["really hated the product"], columns=["text"])
# text_test = pd.DataFrame(["loved the product"], columns=["text"])



# text_test['cleaned_text'] = text_test['text'].apply(preprocess_text)

# X_test_tokenized = tokenizer(
#     text_test['cleaned_text'].tolist(),
#     padding='max_length',
#     truncation=True,
#     max_length=128,
#     return_tensors="tf"
# )

# result = model({
#     'input_ids': X_test_tokenized['input_ids'], 
#     'attention_mask': X_test_tokenized['attention_mask']
# })


# probabilities = tf.nn.softmax(result.logits, axis=-1)

# # Output the results
# print("Logits:", result.logits)
# print("Probabilities:", probabilities)
# predicted_class = tf.argmax(probabilities, axis=-1).numpy()[0]
# print(f"Predicted class: {predicted_class}")

In [7]:
# read in entire dataset

meta_df_read_large = pd.read_parquet('..'+path_char+'..'+path_char+'meta_file_large.parquet', engine='pyarrow')
appliance_df_read_large = pd.read_parquet('..'+path_char+'..'+path_char+'Appliance_file_large.parquet', engine='pyarrow')


In [13]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/laserlon/nltk_data...


True

In [14]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Initialize the SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# function to clean text
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = regex.sub(' ', text) # Substitute everything that is not a letter with an empty string
    words = word_tokenize(text) # tokenize text
    return ' '.join(words)

# combine title and text
appliance_df_read_large['combined_text'] = [preprocess_text(title + " " + text) for title, text in zip(appliance_df_read_large['title'], appliance_df_read_large['text'])]


In [15]:
# Score all records in the data set

reviews = appliance_df_read_large['combined_text']

# Calculate sentiment scores for each review
sentiment_scores = []
for review in reviews:
    score = sia.polarity_scores(review)
    sentiment_scores.append(score)

scores = []
# Display sentiment scores
for review, score in zip(reviews, sentiment_scores):
    scores.append({'review': review, 'score':score})
    #print(f"Review: {review}\nScore: {score}\n")
scores

[{'review': 'five stars work well',
  'score': {'neg': 0.0, 'neu': 0.588, 'pos': 0.412, 'compound': 0.2732}},
 {'review': 'junk parts these rollers for our maytag neptune dryer only lasted months they simply do not hold up under normal usage the original factory rollers lasted about years i have just replaced these cheap broken rollers with factory parts this time',
  'score': {'neg': 0.071, 'neu': 0.876, 'pos': 0.053, 'compound': -0.2023}},
 {'review': 'great little machine decided to get this portable washer and dryer for my apartment it works great with three kids in the house its so convenient to have delivery was fast came sooner than delivery date',
  'score': {'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'compound': 0.8481}},
 {'review': 'it fits fits right in working great ever since the old one has been broken for years glad to find this replacement online and could save the money and time to call the repair guy',
  'score': {'neg': 0.07, 'neu': 0.698, 'pos': 0.232, 'compound': 0.8

In [16]:
# flatten the data and create a dataframe

flattened_data = []

for entry in scores:
    # Extract review text and scores
    review_text = entry['review']
    scored = entry['score']
    
    # Create a flat dictionary combining review text with scores
    flat_entry = {
        'review': review_text,
        'neg': scored['neg'],
        'neu': scored['neu'],
        'pos': scored['pos'],
        'compound': scored['compound']
    }
    flattened_data.append(flat_entry)

# Convert to DataFrame
scored_df = pd.DataFrame(flattened_data)
scored_df

Unnamed: 0,review,neg,neu,pos,compound
0,five stars work well,0.000,0.588,0.412,0.2732
1,junk parts these rollers for our maytag neptun...,0.071,0.876,0.053,-0.2023
2,great little machine decided to get this porta...,0.000,0.806,0.194,0.8481
3,it fits fits right in working great ever since...,0.070,0.698,0.232,0.8020
4,great deal for a cappuccino pitcher works well...,0.000,0.857,0.143,0.8176
...,...,...,...,...,...
2128600,great product best bang for buck great filters...,0.000,0.640,0.360,0.9246
2128601,easy to use no mold no smells easy to use,0.272,0.370,0.358,0.3400
2128602,just what i needed easy to use,0.000,0.633,0.367,0.4404
2128603,four stars it was well priced and shipped quickly,0.000,0.792,0.208,0.2732


In [17]:
combined_df = pd.concat([appliance_df_read_large[['parent_asin']], scored_df], axis=1)
combined_df

Unnamed: 0,parent_asin,review,neg,neu,pos,compound
0,B00LLM6ZFK,five stars work well,0.000,0.588,0.412,0.2732
1,B07VW4CF2C,junk parts these rollers for our maytag neptun...,0.071,0.876,0.053,-0.2023
2,B01ALBMIEI,great little machine decided to get this porta...,0.000,0.806,0.194,0.8481
3,B00DM8JUIU,it fits fits right in working great ever since...,0.070,0.698,0.232,0.8020
4,B08PNT3BLH,great deal for a cappuccino pitcher works well...,0.000,0.857,0.143,0.8176
...,...,...,...,...,...,...
2128600,B07MC5TBNS,great product best bang for buck great filters...,0.000,0.640,0.360,0.9246
2128601,B000CMHLMG,easy to use no mold no smells easy to use,0.272,0.370,0.358,0.3400
2128602,B09SHVMDY8,just what i needed easy to use,0.000,0.633,0.367,0.4404
2128603,B0C7CKLVXW,four stars it was well priced and shipped quickly,0.000,0.792,0.208,0.2732


In [18]:
# write scores to file
combined_df.to_csv('..'+path_char+'..'+path_char+'scored.csv')

# Evaluate model using pre-evaluated sentiment scores from vader model

In [3]:
# flatten the categories field into individual columns

meta_df_read_large = pd.read_parquet('..'+path_char+'..'+path_char+'meta_file_large.parquet', engine='pyarrow')

# create a df of just categories
df = pd.DataFrame(meta_df_read_large['categories'])
 
# Convert the list in the categories column to strings to work with the values
df['categories'] = df['categories'].apply(lambda x: ', '.join(x))

# Split the lists into unique categories
unique_categories = set([item for sublist in df['categories'].str.split(', ') for item in sublist])

# Create new columns for each unique category
category_columns = {category: df['categories'].str.contains(rf'\b{category}\b').astype(int) for category in unique_categories}
category_df = pd.DataFrame(category_columns)

#concat the two, original and flattened
meta_flat_df = pd.concat([meta_df_read_large,category_df],axis=1)
meta_flat_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,...,Oven Parts & Accessories,Shrink Wrappers,Coffee & Espresso Machine Parts & Accessories,Ice Makers,Garbage Disposals & Compactors,Wall Ovens,Filters,Combination Microwave & Wall Ovens,Air Conditioner Parts & Accessories,Permanent Filters
0,Appliances,5304514664 Range Oven Control Board (Replaces ...,5.0,1,[],[],307.95,[{'hi_res': 'https://m.media-amazon.com/images...,[],Frigidaire,...,0,0,0,0,0,0,0,0,0,0
1,Tools & Home Improvement,GE WH01X20826 Washer Shock Damper,4.8,10,[The GE WH01X20826 is a genuine OEM Shock Damp...,[The GE WH01X20826 is a genuine OEM Shock Damp...,26.23,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': 'Welcome to TOMOON', 'url': 'http...",GE,...,0,0,0,0,0,0,0,0,0,0
2,,Refrigerator Ice Maker Assembly for Samsung DA...,3.0,1,[],[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],wshrapplns,...,0,0,0,1,0,0,0,0,0,0
3,Appliances,"La Cuisine 30"" Wall Mount Stainless Steel Glas...",4.0,4,[5 Layer Stainless Steel Baffle Filters. Dishw...,"[This 30"" wall mount range hood Glass features...",,[{'hi_res': 'https://m.media-amazon.com/images...,[],LA CUISINE,...,0,0,0,0,0,0,0,0,0,0
4,Appliances,4 Pack W10311524 Air Filter Replacement For Wh...,4.5,129,"[Part Number: W10311524, Replaces: 2319308, W1...","[Specifications:, Dimension: 3 x 1.8 x 0.5 inc...",14.99,[{'hi_res': 'https://m.media-amazon.com/images...,[],Discount Parts Direct,...,0,0,0,0,0,0,1,0,0,0


In [4]:
# read Appliance data with pre-evaluated scores
combined_df = pd.read_csv('..'+path_char+'..'+path_char+'scored.csv')
combined_df


Unnamed: 0.1,Unnamed: 0,parent_asin,review,neg,neu,pos,compound
0,0,B08YWXWBZV,heat bag sealer videoid ae bc ae ca f fe e the...,0.000,0.795,0.205,0.9081
1,1,B07PVQ89Z9,miserable dud give me an old agitator unimagin...,0.087,0.890,0.022,-0.8323
2,2,B01I3NCJAE,five stars seems to work very well,0.000,0.715,0.285,0.3384
3,3,B0C9SC79TF,good product my bathroom has really reduced th...,0.000,0.861,0.139,0.4404
4,4,B071RQTP86,definitely not new part didn t work definitely...,0.141,0.738,0.121,-0.2263
...,...,...,...,...,...,...,...
2128600,2128600,B0C6K9Z29X,stopped working after one use i bought this fo...,0.137,0.801,0.062,-0.8957
2128601,2128601,B07XLLXQHN,defective this valve came to me defective,0.537,0.463,0.000,-0.7003
2128602,2128602,B09KS68V2N,did the job replace the old switch on my washe...,0.000,0.760,0.240,0.6901
2128603,2128603,B00GPXTA0C,great product love this thing,0.000,0.265,0.735,0.8519


In [10]:

# function to find top 3 and bottom 3 reviews
def find_top_bottom(Appliance_df,meta_flat_df,product_type):
    # find meta data that match product type
    selected_product = meta_flat_df[(meta_flat_df[product_type]==1)]['parent_asin'].unique()
    # find reviews that match product type
    reviews_with_match_product = Appliance_df[Appliance_df['parent_asin'].isin(selected_product)]
    # calculate the mean rating for each parent_asin or product type
    product_ratings_means = (reviews_with_match_product.groupby('parent_asin').agg(mean_compound=('compound', 'mean'), review_count=('compound', 'size')).reset_index())
    #print(product_ratings_means)
    filtered_ratings_means = product_ratings_means[product_ratings_means['review_count'] > 100].sort_values(by='mean_compound', ascending=False)
    #print(filtered_ratings_means)
    
    # find top 3 and bottom 3
    top3 = filtered_ratings_means.head(3)['parent_asin']
    bottom3 = filtered_ratings_means.tail(3)['parent_asin']
    
    # return top 3 and bottom 3 products
    return meta_flat_df[meta_flat_df['parent_asin'].isin(top3)], meta_flat_df[meta_flat_df['parent_asin'].isin(bottom3)]

# find top and bottom 3 products with best and worst scores.
top3,bottom3 = find_top_bottom(combined_df,meta_flat_df,product_type='Chest Freezers')

In [6]:
top3

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,...,Oven Parts & Accessories,Shrink Wrappers,Coffee & Espresso Machine Parts & Accessories,Ice Makers,Garbage Disposals & Compactors,Wall Ovens,Filters,Combination Microwave & Wall Ovens,Air Conditioner Parts & Accessories,Permanent Filters
38029,Appliances,Kismile 3.5 Cubic Feet Chest Freezer with Remo...,4.6,1428,[【Compact Design with a Big Capacity】The space...,[],164.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Perfect Size and Versatile Freezer...,Kismile,...,0,0,0,1,0,0,0,0,0,0
87528,Appliances,"Midea MRC050S0AWW Chest Freezer, 5.0 Cubic Fee...",4.6,9279,[Mechanical Control with Adjustable Thermostat...,[],279.0,[{'hi_res': 'https://m.media-amazon.com/images...,[],Midea,...,0,0,0,1,0,0,0,0,0,0
87896,Appliances,Northair Low temperature Chest Freezer - 3.5 C...,4.3,1017,[EASY TEMPERATURE ADJUSTMENT - Intelligent dig...,[],249.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'I've never had an extra freezer be...,Northair,...,0,0,0,1,0,0,0,0,0,0


In [7]:
bottom3

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,...,Oven Parts & Accessories,Shrink Wrappers,Coffee & Espresso Machine Parts & Accessories,Ice Makers,Garbage Disposals & Compactors,Wall Ovens,Filters,Combination Microwave & Wall Ovens,Air Conditioner Parts & Accessories,Permanent Filters
32957,Appliances,Arctic King 7.0 cu ft Chest Freezer in Black,4.2,1592,[],"[Arctic King 7.0 cu ft Chest Freezer, Black.]",,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Midea Chest Freezer! This Thing i...,Arctic King,...,0,0,0,1,0,0,0,0,0,0
38938,Appliances,"RCA RFRF450-AMZ, 5.1 Cubic Foot Chest, Deep Fr...",4.0,1133,[Chest Freezer with 5.1 Inch Cubic Foot Capaci...,"[Introducing this loaded up, 5.1 Inch CU FT Ch...",410.2,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'I am disgusted! But glad I read th...,RCA,...,0,0,0,1,0,0,0,0,0,0
48193,Appliances,RCA RFRF454-BLACK 5.1 Cubic Feet Chest Freezer...,4.1,837,"[Adjustable Thermostat, Power on Indicator, Re...",[RFRF454-BLACK],410.51,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'WANAI 5.0 CU.FT Freezer- Meet your...,RCA,...,0,0,0,1,0,0,0,0,0,0


In [11]:
# Imports for loading environment variables.
from dotenv import load_dotenv
import os

In [12]:
def generate_chatgpt_prompt(good_product_name, bad_product_name, product_category, good_rating):
    # Create the prompt
    prompt = f"""
    Choose a unique style for a salesperson character, such as Pirate, Shakespearean, Robot, Southern Drawl, 
    Epic Movie Trailer, Yoda, Salesperson Stereotype, Detective Noir, Sports Announcer, Valley Girl, 
    Sci-Fi Space Captain, Poetic, Elderly Grandparent, Excited Kid, Formal Business Pitch, or Haunted Ghost.
    
    Select a name for yourself that fits the chosen style. Introduce yourself in character and provide a brief, 
    engaging product pitch in one or two paragraphs.

    Product details:
    - **Good Product Name**: {good_product_name}
    - **Bad Product Name**: {bad_product_name}
    - **Product Category**: {product_category}
    - **Good Product Rating**: {good_rating}
    
    Make up good features about the good product.

    For the best-reviewed product, explain why it’s the finest choice, focusing on its rating and key feature. 
    For the worst-reviewed product, tactfully suggest why it may not be ideal. 
    Keep your response to one paragraph.
    """
    
    return prompt

# Example usage:
good_product_name = "Craftsman Blender"
bad_product_name = "Acme Blender"
product_category = "Kitchen Appliances"
rating = "4.8/5"

# Generate the prompt
chatgpt_prompt = generate_chatgpt_prompt(good_product_name, bad_product_name, product_category, rating)
print(chatgpt_prompt)



    Choose a unique style for a salesperson character, such as Pirate, Shakespearean, Robot, Southern Drawl, 
    Epic Movie Trailer, Yoda, Salesperson Stereotype, Detective Noir, Sports Announcer, Valley Girl, 
    Sci-Fi Space Captain, Poetic, Elderly Grandparent, Excited Kid, Formal Business Pitch, or Haunted Ghost.
    
    Select a name for yourself that fits the chosen style. Introduce yourself in character and provide a brief, 
    engaging product pitch in one or two paragraphs.

    Product details:
    - **Good Product Name**: Craftsman Blender
    - **Bad Product Name**: Acme Blender
    - **Product Category**: Kitchen Appliances
    - **Good Product Rating**: 4.8/5
    
    Make up good features about the good product.

    For the best-reviewed product, explain why it’s the finest choice, focusing on its rating and key feature. 
    For the worst-reviewed product, tactfully suggest why it may not be ideal. 
    Keep your response to one paragraph.
    


In [13]:
# Load environment variables.
#load_dotenv()
load_dotenv('..'+path_char+'..'+path_char+'OpenAI.env')

# Set the model name for our LLMs.
OPENAI_MODEL = "gpt-4o"
# Store the API key in a variable.
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


In [14]:
from openai import OpenAI
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

client = OpenAI()

client.api_key = OPENAI_API_KEY

def get_chatgpt_recommendations(prompt):
    try:
        response = client.chat.completions.create( model=OPENAI_MODEL,
                                                    messages=[ {"role": "user", "content": prompt} ] ) 
        return response.choices[0].message.content 
    except Exception as e: 
        print(f"An unexpected error occurred: {e}")

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def analyze_review_sentiment(review_text):
    analyzer = SentimentIntensityAnalyzer()
    return analyzer.polarity_scores(review_text)['compound']

In [15]:
review = "I absolutely love these shoes, they are the most comfortable sneakers I have ever owned."
print("Sentiment Analysis:", analyze_review_sentiment(review))
print("Product Recommendation:", get_chatgpt_recommendations("Recommend me the best sneakers."))

Sentiment Analysis: 0.8437
Product Recommendation: Selecting the "best" sneakers can depend on your specific needs and preferences such as style, comfort, purpose, and budget. However, here are some popular and well-regarded options across different categories as of the latest trends:

### 1. **Running Shoes:**
   - **Nike Air Zoom Pegasus 39**: Known for its comfort, durability, and responsive cushioning.
   - **Adidas Ultraboost 22**: Offers a plush feel and energy return, great for both running and everyday wear.
   - **New Balance Fresh Foam 1080v11**: Provides excellent cushioning and support, ideal for long-distance running.

### 2. **Lifestyle Sneakers:**
   - **Nike Air Force 1**: A timeless classic that goes well with a variety of outfits.
   - **Adidas Stan Smith**: Known for its clean, minimalist style.
   - **Converse Chuck Taylor All Star**: Another iconic option that's versatile in casual settings.

### 3. **Basketball Shoes:**
   - **Air Jordan 36**: Offers great support

In [16]:
p_type = 'Range Hoods'
top3,bottom3 = find_top_bottom(combined_df,meta_flat_df,product_type=p_type)
prompt = generate_chatgpt_prompt(top3.iloc[0]['title'], bottom3.iloc[2]['title'], p_type, top3.iloc[0]['average_rating'])
print(get_chatgpt_recommendations(prompt))

**Character Style: Pirate**

**Name: Captain Gustav the Bold**

Ahoy there, landlubbers! I be Captain Gustav the Bold, sailin' the seven seas in search of treasures untold. Today, I've dropped anchor to share with ye the gleamin' gem of the culinary world—the COSMO QB90 36 in. Under Cabinet Range Hood! With its shiny stainless steel exterior, it's as robust as a pirate ship, fittin' snugly beneath yer cabinets like it was meant to be. With push-button controls as easy to manage as hoistin' a sail and permanent filters that'll outlast Davy Jones himself, this beauty converts from ducted to ductless with nary a complaint. Illuminate yer galley with LED lights that shine like a lighthouse on a stormy night.

Now, if I may steer yer gaze towards the treasure map of reviews, this COSMO QB90 holds a mighty 4.4 stars, a true testimony to its prowess in the kitchen seas. It boasts versatile ventilation options and handy LED lights, perfect for navigating the murky depths of cookin' with ease. 

In [17]:

# function to generate highly-rated and poorly-rated products and sales pitch
def get_pitch(p_type):
    top3,bottom3 = find_top_bottom(combined_df,meta_flat_df,product_type=p_type)
    prompt = generate_chatgpt_prompt(top3.iloc[0]['title'], bottom3.iloc[2]['title'], p_type, top3.iloc[0]['average_rating'])
    pitch = get_chatgpt_recommendations(prompt)
    
    highly_rated = "\n\n".join(top3['title'].tolist())
    poorly_rated = "\n\n".join(bottom3['title'].tolist())
    
    return highly_rated, poorly_rated, pitch

# test the Get_pitch Function
a,b,c = get_pitch('Wall Ovens')

print (a)
print (b)
print (c)

IndexError: single positional indexer is out-of-bounds

In [18]:
import gradio as gr

# Product types dropdown list
product_types = [
    'All-in-One Combination Washers & Dryers', 'Dishwashers', 'Slide-In Ranges', 
    'Upright Freezers', 'Kegerators', 'Freezers', 'Countertop Dishwashers', 
    'Ranges', 'Single Wall Ovens', 'Laundry Appliances', 'Portable Dryers', 
    'Range Hoods', 'Double Wall Ovens', 'Cooktops', 'Ice Makers', 
    'Ovens & Cooktops', 'Combination Microwave & Wall Ovens', 
    'Freestanding Ranges', 'Portable Dishwashers', 'Chest Freezers', 
    'Refrigerators', 'Beverage Refrigerators', 'Washers', 'Dryers', 
    'Built-In Dishwashers', 'Washers & Dryers', 'Appliances', 'Portable Washers', 
    'Stacked Washer & Dryer Units', 'Freezers & Ice Makers', 'Wall Ovens'
]

# We used https://color.adobe.com/create/color-wheel for color selection in the code below

# Creating the Gradio interface with the layout and color scheme
with gr.Blocks(css = """
    .gradio-container {background-color: #2470DC;}
    .gradio-container .label {font-weight: bold; color: #1B2A35 !important;}
""") as demo:
    gr.Markdown("# Product Recommendations and Sales Pitch")

    with gr.Row():
        product_dropdown = gr.Dropdown(label="Select Product Type", choices=product_types)
    
    with gr.Row():
        highly_rated_textbox = gr.Textbox(label="Highly Rated Products", lines=4)
        poorly_rated_textbox = gr.Textbox(label="Poorly Rated Products", lines=4)
        
    summary_textbox = gr.Textbox(label="Sales Pitch", lines=6)

    product_dropdown.change(
        fn=get_pitch,
        inputs=product_dropdown,
        outputs=[highly_rated_textbox, poorly_rated_textbox, summary_textbox]
    )

demo.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


