# Data Prep
This first section is used for reading in raw data files and creating subsets of data that can be used for development

In [2]:
import pandas as pd
import json
import random
import ast
import requests
import io
import os
import platform

In [11]:
# set folder path to data when using Linux or Windows
# if os.name == 'posix' or platform.system() == 'Linux':  # For Ubuntu/Linux
#     folder_path = "../Project Main/data/"
# elif os.name == 'nt' or platform.system() == 'Windows':  # For Windows
#     folder_path = "..\\Project Main\\data\\"

if os.name == 'posix' or platform.system() == 'Linux':  # For Ubuntu/Linux
    path_char = "/"
elif os.name == 'nt' or platform.system() == 'Windows':  # For Windows
    path_char = "\\"

In [12]:
# read in jsonl files for all Appliances reviews
data = []
file = '..'+path_char+'..'+path_char+'Appliances.jsonl'
with open(file, 'r') as fp:
    for line in fp:
        data.append(json.loads(line.strip()))

In [13]:
# read in jsonl files for meta_Appliances for all products
meta_data = []
file = '..'+path_char+'..'+path_char+'meta_Appliances.jsonl'
with open(file, 'r') as fp:
    for line in fp:
        meta_data.append(json.loads(line.strip()))

In [14]:
# look at size of datasets
print("data length: ", len(data))
print("meta data lenth: ", len(meta_data))

data length:  2128605
meta data lenth:  94327


In [15]:
# define function to split data into chunks
def split_list(data, chunk_size):
    # Shuffle the data randomly
    random.shuffle(data)
    
    # Split the list into chunks of specified size
    return [data[i:i + chunk_size] for i in range(0, len(data), chunk_size)]

# spit data into smaller chunks
smaller_lists_appliances = split_list(data, 200000)
smaller_lists_meta = split_list(meta_data, 30000)


In [17]:
# save smaller appliance files
for i, chunk in enumerate(smaller_lists_appliances):
    # Define the filename for each chunk (e.g., "list_1.txt", "list_2.txt", etc.)
    filename = 'data'+path_char+f'Appliances_{i+1}.txt'
    with open(filename, 'w') as file:
        # Write each item from the list to a new line in the file
        file.write('\n'.join(map(str, chunk)))

In [18]:
# save smaller meta files 
for i, chunk in enumerate(smaller_lists_meta):
    # Define the filename for each chunk (e.g., "list_1.txt", "list_2.txt", etc.)
    filename = 'data'+path_char+f'meta_{i+1}.txt'
    with open(filename, 'w') as file:
        # Write each item from the list to a new line in the file
        file.write('\n'.join(map(str, chunk)))

In [None]:
# # read in smaller Appliance and meta files
# with open('..'+path_char+'Project Main'+path_char+'data'+path_char+'Appliances_1.txt', 'r') as file:
#     appliances = file.read().splitlines()  
# with open('..'+path_char+'Project Main'+path_char+'data'+'path_char'+path_char+'meta_4.txt', 'r') as file:
#     meta = file.read().splitlines()  

# # put Appliance and meta data into data frames
# parsed_data = [ast.literal_eval(item) for item in appliances]
# df_appliance = pd.DataFrame(parsed_data)

# parsed_data = [ast.literal_eval(item) for item in meta]
# df_meta = pd.DataFrame(parsed_data)


In [19]:
# read in all files and create a dataframe, then save to parquet file
df_appliances = []
df_metas = []

#combine all appliance files
for i in range(1, 11 + 1):
    print(f"Appliance file: {i}")
    filename = '..'+path_char+'Project Main'+path_char+'data'+path_char+f'Appliances_{i}.txt'
    with open(filename, 'r') as file:
        appliances = file.read().splitlines()  

    parsed_data = [ast.literal_eval(item) for item in appliances]
    df_appliance = pd.DataFrame(parsed_data)

    df_appliances.append(df_appliance)

# combine all meta files
for i in range(1, 4 + 1):
    print(f"meta file: {i}")
    filename = '..'+path_char+'Project Main'+path_char+'data'+path_char+f'meta_{i}.txt'
    with open(filename, 'r') as file:
        meta = file.read().splitlines()  

    parsed_data = [ast.literal_eval(item) for item in meta]
    df_meta = pd.DataFrame(parsed_data)

    df_metas.append(df_meta)

combined_df_appliance = pd.concat(df_appliances, ignore_index=True)
combined_df_meta = pd.concat(df_metas, ignore_index=True)

#write large data frames to parquet files
combined_df_appliance.to_parquet('..'+path_char+'..'+path_char+'Appliance_file_large.parquet',compression='Brotli', engine='pyarrow')
combined_df_meta.to_parquet('..'+path_char+'..'+path_char+'meta_file_large.parquet', compression='Brotli',engine='pyarrow')



Appliance file: 1
Appliance file: 2
Appliance file: 3
Appliance file: 4
Appliance file: 5
Appliance file: 6
Appliance file: 7
Appliance file: 8
Appliance file: 9
Appliance file: 10
Appliance file: 11
meta file: 1
meta file: 2
meta file: 3
meta file: 4


In [None]:
# this code can be used to pull the combined large datafiles from google drive.  The file format is parquet.

import gdown  # for downloading from google drive

# pull large Appliance files from google drive
file_id = '17-NNDAjDtI-PhQJ2P8U-O-l9sZhI71rp'
download_url = f'https://drive.google.com/uc?id={file_id}'
gdown.download(download_url, 'downloaded_file.parquet', quiet=False)
Appliance_file_large = pd.read_parquet('downloaded_file.parquet')

# Display the dataframe
Appliance_file_large.head()

# pull large meta file from google drive
file_id = '19F34NIW3QVfzd1g7BuhR6DBiFHbSVRbH'
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'
response = requests.get(download_url)
response.raise_for_status()  
file_content = io.BytesIO(response.content)
meta_file_large = pd.read_parquet(file_content)

# Display the dataframe
meta_file_large.head()

In [20]:
# read in large dataset from parquet to confirm correct
meta_df_read_large = pd.read_parquet('..'+path_char+'..'+path_char+'meta_file_large.parquet', engine='pyarrow')
meta_df_read_large

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Appliances,5304514664 Range Oven Control Board (Replaces ...,5.0,1,[],[],307.95,[{'hi_res': 'https://m.media-amazon.com/images...,[],Frigidaire,[],"{'': None, 'AC Adapter Current': None, 'Access...",B07B3M5KRX,,,
1,Tools & Home Improvement,GE WH01X20826 Washer Shock Damper,4.8,10,[The GE WH01X20826 is a genuine OEM Shock Damp...,[The GE WH01X20826 is a genuine OEM Shock Damp...,26.23,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': 'Welcome to TOMOON', 'url': 'http...",GE,"[Appliances, Parts & Accessories]","{'': None, 'AC Adapter Current': None, 'Access...",B01M7TMMTL,,,
2,,Refrigerator Ice Maker Assembly for Samsung DA...,3.0,1,[],[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],wshrapplns,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B07PNVNK7X,,,
3,Appliances,"La Cuisine 30"" Wall Mount Stainless Steel Glas...",4.0,4,[5 Layer Stainless Steel Baffle Filters. Dishw...,"[This 30"" wall mount range hood Glass features...",,[{'hi_res': 'https://m.media-amazon.com/images...,[],LA CUISINE,"[Appliances, Ranges, Ovens & Cooktops, Range H...","{'': None, 'AC Adapter Current': None, 'Access...",B01C7I9Z1W,,,
4,Appliances,4 Pack W10311524 Air Filter Replacement For Wh...,4.5,129,"[Part Number: W10311524, Replaces: 2319308, W1...","[Specifications:, Dimension: 3 x 1.8 x 0.5 inc...",14.99,[{'hi_res': 'https://m.media-amazon.com/images...,[],Discount Parts Direct,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B07QLB1CYC,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94322,Tools & Home Improvement,GE Part# WR30X10150 Door Mounted Icemaker (OEM),4.0,22,"[This is an O.E.M. Authorized part, Fits with ...",[This is an O.E.M. Authorized part. Fits with ...,139.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': '241798224 Ice Maker for Electrolux...,General Electric,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B00ZJRD7RW,,,
94323,Tools & Home Improvement,312959 Dryer Belt Replacement for Maytag LDE73...,4.1,4,[UpStart Components Replacement 312959 Dryer B...,[UpStart Components Replacement 312959 Dryer B...,7.99,[{'hi_res': 'https://m.media-amazon.com/images...,[],UpStart Components,"[Appliances, Parts & Accessories]","{'': None, 'AC Adapter Current': None, 'Access...",B07S8724TT,,,
94324,Tools & Home Improvement,Whirlpool W10177965 Surface Element for Range,2.0,1,[Works with the following models: Whirlpool WF...,"[From the Manufacturer, Whirlpool W10177965 Su...",,"[{'hi_res': None, 'large': 'https://m.media-am...",[],Whirlpool,"[Appliances, Parts & Accessories, Range Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B005ARBA3E,,,
94325,Tools & Home Improvement,"Cheliq Egg Holder with Lid, BPA Free and Dishw...",4.0,32,"[Made of food grade PP material, Transparent e...","[Cheliq Egg Holder with Lid, BPA Free and Dish...",,[{'hi_res': 'https://m.media-amazon.com/images...,[],CHELIQ,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B09B2ZJTHH,,,


In [22]:
#randomly select a subset of appliance data that can be used for development.  A larger dataset will be used once model is developed

#choose random subset
subset_appliances = split_list(data, 50000)
subset_appliances_df = pd.DataFrame(subset_appliances[0])

# find unique asin's from subset of data
asin_unique= subset_appliances_df['parent_asin'].unique()
subset_meta_df = meta_df_read_large.loc[meta_df_read_large['parent_asin'].isin(asin_unique)]

# save subset to parquet files
subset_meta_df.to_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'meta_file_subset.parquet', compression='Brotli',engine='pyarrow')
subset_appliances_df.to_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'Appliance_file_subset.parquet', compression='Brotli',engine='pyarrow')
subset_meta_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
14,Tools & Home Improvement,EXCELPURE GXRTDR Inline Water Filter Replaceme...,4.7,146,[❄️EXCEL.QUALITY - NSF Certified Carbon Filter...,[],31.99,[{'hi_res': 'https://m.media-amazon.com/images...,"[{'title': 'WD-INL Installation Instructions',...",EXCELPURE,"[Appliances, Parts & Accessories, Refrigerator...","{'': None, 'AC Adapter Current': None, 'Access...",B07MCH6W31,,,
23,Amazon Home,TrulyAll Disposable Single Serve Pour-Over Cof...,4.2,44,"[Quickly brew your favorite drink anytime, any...",[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],TrulyAll,"[Small Appliance Parts & Accessories, Coffee &...","{'': None, 'AC Adapter Current': None, 'Access...",B01IBXADOK,,,
27,Appliances,"LG AEH36821908 , BLACK",4.5,12,"[Country Of Origin: United States, Model Numbe...",[This is a Genuine replacement LG part. Please...,35.79,[{'hi_res': 'https://m.media-amazon.com/images...,[],LG,"[Appliances, Parts & Accessories]","{'': None, 'AC Adapter Current': None, 'Access...",B00I8VIYBK,,,
33,Tools & Home Improvement,Upgraded 5301EL1001A Dryer Heating Element Ass...,4.0,282,[🛒 PREMIUM METAL MATERIAL -- The 5301EL1001A d...,[],52.77,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': '5301EL1001J Dryer Heating Element'...,Sikawai,"[Appliances, Parts & Accessories, Dryer Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B08C7H18YP,,,
35,Tools & Home Improvement,Supco DE902M Dryer Door Latch Kit Replaces Whi...,3.8,132,[DRYER DOOR LATCH - This premium quality part ...,"[Product Description, Supco DE902M Dryer Door ...",6.98,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Supplying Demand 279570B Product 3...,Supco,"[Appliances, Parts & Accessories, Dryer Parts ...","{'': None, 'AC Adapter Current': None, 'Access...",B00GS9DPJA,,,


# Reviews prep for model
This section is used to prep the data from evaluation in the model

In [23]:
# load libraries
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')

# define characters to remove and stop words
regex = re.compile("[^a-zA-Z ]")
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /home/mike/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/mike/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [24]:
# function to process reviews
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = regex.sub(' ', text) # Substitute everything that is not a letter with an empty string
    words = word_tokenize(text) # tokenize text
    words = [word for word in words if word not in stop_words]  # Remove stopwords
    return words


# read in subset of data files
meta_subset_df = pd.read_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'meta_file_subset.parquet', engine='pyarrow')
Appliance_subset_df = pd.read_parquet('..'+path_char+'Project Main'+path_char+'data'+path_char+'Appliance_file_subset.parquet', engine='pyarrow')


# Apply the preprocessing function to your text column
Appliance_subset_df['text_clean'] = Appliance_subset_df['text'].apply(preprocess_text)

In [25]:
# view raw vs processed reviews
Appliance_subset_df[['text','text_clean']]

Unnamed: 0,text,text_clean
0,Quality Product.,"[quality, product]"
1,Works as expected.,"[works, expected]"
2,These filters are a perfect fit for my Vornado...,"[filters, perfect, fit, vornado, looks, like, ..."
3,This filter was just as good as the original o...,"[filter, good, original, one, came, coffee, ma..."
4,Liked everything about k cups. These are much ...,"[liked, everything, k, cups, much, easier, cle..."
...,...,...
49995,It fits and works perfectly on my fridge with ...,"[fits, works, perfectly, fridge, added, conven..."
49996,I had on of the gold mesh/plastic framed filte...,"[gold, mesh, plastic, framed, filters, years, ..."
49997,Perfect to replace cheap aluminum ones on our ...,"[perfect, replace, cheap, aluminum, ones, euro..."
49998,nice,[nice]


# flatten meta data
This section flattens the meta data.  Categores is a list of all products and needs to be flattened in order to use.

In [27]:
# define a list of products to be used as part of the app
my_list = ['Dishwashers','Freezers','Ranges','Ice Makers','Cooktops','Refrigerators','Beverage Refrigerators','Dryers','Washers']

# remove parts and accessories from data
mask = ~meta_subset_df.apply(lambda col: col.map(lambda x: 'Parts & Accessories' in str(x))).any(axis=1)
filtered_meta_df = meta_subset_df[mask]

# select specific items
mask = filtered_meta_df.apply(lambda col: col.map(lambda x: any(item in str(x) for item in my_list))).any(axis=1)

# create a dataframe of filtered dataset
filtered_meta_df = filtered_meta_df[mask]
filtered_meta_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
111,Appliances,Igloo Premium Self-Cleaning Countertop Ice Mak...,4.4,2410,[Note : Please check the dimension and item we...,[Say hello the Igloo 26-pound countertop ice m...,183.09,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Our Point of View on Igloo Counter...,Igloo,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0BXHCDWM8,,,
116,Appliances,Whirlpool WDT720PADM Built-in Stainless Dishwa...,2.7,8,[],[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],Whirlpool,"[Appliances, Dishwashers, Built-In Dishwashers]","{'': None, 'AC Adapter Current': None, 'Access...",B00NVKRTIW,,,
362,Amazon Home,GE Profile : PHB925SPSS 30 Freestanding Induct...,2.3,11,[],[GE Consumer and Industrial spans the globe as...,,"[{'hi_res': None, 'large': 'https://m.media-am...",[],GE,"[Appliances, Ranges, Ovens & Cooktops, Ranges,...","{'': None, 'AC Adapter Current': None, 'Access...",B002KGKNYU,,,
642,Appliances,"QUNYI Mini Washing Machine, Portable Foldable ...",2.0,27,[],[🧦QUNYI Mini Washing Machine is the best assis...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],QUNYI,[],"{'': None, 'AC Adapter Current': None, 'Access...",B09KB8SMJ6,,,
723,Industrial & Scientific,VIVOHOME Electric Portable Compact Countertop ...,4.4,8025,[LIGHTWEIGHT & PORTABLE - Lightweight feature ...,[],139.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'HONEST Review of VIVOHOME Electric...,VIVOHOME,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0BLC6GRLX,,,


In [28]:
# flatten the categories field into individual columns

# create a df of just categories
df = pd.DataFrame(filtered_meta_df['categories'])
 
# Convert the list in the categories column to strings to work with the values
df['categories'] = df['categories'].apply(lambda x: ', '.join(x))

# Split the lists into unique categories
unique_categories = set([item for sublist in df['categories'].str.split(', ') for item in sublist])

# Create new columns for each unique category
for category in unique_categories:
    df[category] = df['categories'].apply(lambda x: 1 if category in x else 0)

#concat the two, original and flattened
meta_subset_flat_df = pd.concat([filtered_meta_df,df],axis=1)
meta_subset_flat_df.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,...,Beverage Refrigerators,Washers,Dryers,Built-In Dishwashers,Washers & Dryers,Appliances,Portable Washers,Stacked Washer & Dryer Units,Freezers & Ice Makers,Wall Ovens
111,Appliances,Igloo Premium Self-Cleaning Countertop Ice Mak...,4.4,2410,[Note : Please check the dimension and item we...,[Say hello the Igloo 26-pound countertop ice m...,183.09,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'Our Point of View on Igloo Counter...,Igloo,...,0,0,0,0,0,1,0,0,1,0
116,Appliances,Whirlpool WDT720PADM Built-in Stainless Dishwa...,2.7,8,[],[],,[{'hi_res': 'https://m.media-amazon.com/images...,[],Whirlpool,...,0,0,0,1,0,1,0,0,0,0
362,Amazon Home,GE Profile : PHB925SPSS 30 Freestanding Induct...,2.3,11,[],[GE Consumer and Industrial spans the globe as...,,"[{'hi_res': None, 'large': 'https://m.media-am...",[],GE,...,0,0,0,0,0,1,0,0,0,0
642,Appliances,"QUNYI Mini Washing Machine, Portable Foldable ...",2.0,27,[],[🧦QUNYI Mini Washing Machine is the best assis...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],QUNYI,...,0,0,0,0,0,0,0,0,0,0
723,Industrial & Scientific,VIVOHOME Electric Portable Compact Countertop ...,4.4,8025,[LIGHTWEIGHT & PORTABLE - Lightweight feature ...,[],139.99,[{'hi_res': 'https://m.media-amazon.com/images...,[{'title': 'HONEST Review of VIVOHOME Electric...,VIVOHOME,...,0,0,0,0,0,1,0,0,1,0


In [29]:
# look at columns in flattened meta dataframe
meta_subset_flat_df.columns

Index(['main_category', 'title', 'average_rating', 'rating_number', 'features',
       'description', 'price', 'images', 'videos', 'store', 'categories',
       'details', 'parent_asin', 'bought_together', 'subtitle', 'author',
       'categories', '', 'All-in-One Combination Washers & Dryers',
       'Dishwashers', 'Slide-In Ranges', 'Upright Freezers', 'Kegerators',
       'Freezers', 'Countertop Dishwashers', 'Ranges', 'Single Wall Ovens',
       'Laundry Appliances', 'Portable Dryers', 'Range Hoods',
       'Double Wall Ovens', 'Cooktops', 'Ice Makers', 'Ovens & Cooktops',
       'Combination Microwave & Wall Ovens', 'Freestanding Ranges',
       'Portable Dishwashers', 'Chest Freezers', 'Refrigerators',
       'Beverage Refrigerators', 'Washers', 'Dryers', 'Built-In Dishwashers',
       'Washers & Dryers', 'Appliances', 'Portable Washers',
       'Stacked Washer & Dryer Units', 'Freezers & Ice Makers', 'Wall Ovens'],
      dtype='object')

In [34]:
# function to find top 3 and bottom 3 reviews
def find_top_bottom(Appliance_df,meta_flat_df,product_type):
    # find meta data that match product type
    selected_product = meta_flat_df[(meta_flat_df[product_type]==1)]['parent_asin'].unique()
    # find reviews that match product type
    reviews_with_match_product = Appliance_df[Appliance_df['parent_asin'].isin(selected_product)]
    # calculate the mean rating for each parent_asin or product type
    product_ratings_means = reviews_with_match_product.groupby('parent_asin')['rating'].mean().sort_values(ascending=False).reset_index()
    # find top 3 and bottom 3
    top3 = product_ratings_means[:].head(3)['parent_asin']
    bottom3 = product_ratings_means[:].tail(3)['parent_asin']
    # return top 3 and bottom 3 products
    return meta_subset_df[meta_subset_df['parent_asin'].isin(top3)], meta_subset_df[meta_subset_df['parent_asin'].isin(bottom3)]


top3,bottom3 = find_top_bottom(Appliance_subset_df,meta_subset_flat_df,product_type='Chest Freezers')

In [31]:
# view top 3
top3

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
3724,Appliances,Haier HF50CM23NW 5.0 cu. ft. Capacity Chest Fr...,4.4,237,"[Holds up to 175lbs of frozen food, Adjustable...",[Storage for up to 175 lbs. Storage space to a...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],Haier,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0198ZXHEI,,,
6699,Appliances,Commercial Top Chest Freezer - KITMA 9.6 Cu. F...,4.3,21,[KITMA CHEST FREEZER],[Kitma 9.6 Cu.Ft Chest Freezer],,[{'hi_res': 'https://m.media-amazon.com/images...,[],KITMA,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B07GLKMY7G,,,
80093,Cell Phones & Accessories,Headphone Adapter,4.5,27,[Soft 2HB graphite core gives you a superb smo...,"[Miukada half pencil, has been pre-sharpened t...",,[{'hi_res': 'https://m.media-amazon.com/images...,[],Miukada,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B07GWWQK2M,,,


In [32]:
# view bottom 3
bottom3

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
55056,Tools & Home Improvement,"ZUYIYI Mini Chest Freezer Cover, Outdoor Deep ...",4.7,10,[✅【HIGH QUALITY】Made of 420D Oxford Fabric wit...,[],20.99,[{'hi_res': 'https://m.media-amazon.com/images...,[],ZUYIYI,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B0BCKGRW6Y,,,
68576,Appliances,"COSTWAY 105 Quart Car Travel Freezer, Portable...",3.2,7,[🐟【Large Capacity & Perfect Portability: 】With...,"[Description:, This portable vehicle refrigera...",,[{'hi_res': 'https://m.media-amazon.com/images...,[],COSTWAY,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B07VPDJQ5Y,,,
87504,,Magic Chef MCCF35WBX 3.5 Cubic Feet Chest Freezer,3.3,8,"[3.5 cubic feet capacity, Adjustable thermosta...",[3.5 Cubic Feet Capacity; Adjustable Thermosta...,,[{'hi_res': 'https://m.media-amazon.com/images...,[],Magic Chef,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'': None, 'AC Adapter Current': None, 'Access...",B007ZT2XU4,,,


# Run dataset against model
This section runs all the reviews agains the newly trained roberta model