In [2]:
#Load and Preview Data
import pandas as pd
from pathlib import Path

# Load the Electronics dataset
file_path = Path("../data/amazon_reviews/amazon_reviews_us_Electronics_v1_00.tsv")

# Read a limited number of rows (adjust as needed)
df = pd.read_csv(file_path, sep='\t', nrows=100000, on_bad_lines='skip', low_memory=False)

print(f"✅ Loaded {len(df)} rows")
df.head()


✅ Loaded 100000 rows


Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date
0,US,41409413,R2MTG1GCZLR2DK,B00428R89M,112201306,yoomall 5M Antenna WIFI RP-SMA Female to Male ...,Electronics,5,0,0,N,Y,Five Stars,As described.,2015-08-31
1,US,49668221,R2HBOEM8LE9928,B000068O48,734576678,"Hosa GPM-103 3.5mm TRS to 1/4"" TRS Adaptor",Electronics,5,0,0,N,Y,It works as advertising.,It works as advertising.,2015-08-31
2,US,12338275,R1P4RW1R9FDPEE,B000GGKOG8,614448099,Channel Master Titan 2 Antenna Preamplifier,Electronics,5,1,1,N,Y,Five Stars,Works pissa,2015-08-31
3,US,38487968,R1EBPM82ENI67M,B000NU4OTA,72265257,LIMTECH Wall charger + USB Hotsync & Charging ...,Electronics,1,0,0,N,Y,One Star,Did not work at all.,2015-08-31
4,US,23732619,R372S58V6D11AT,B00JOQIO6S,308169188,Skullcandy Air Raid Portable Bluetooth Speaker,Electronics,5,1,1,N,Y,Overall pleased with the item,Works well. Bass is somewhat lacking but is pr...,2015-08-31


In [3]:
#Select Relevant Columns
columns_to_keep = [
    'product_id', 'product_title', 'product_category',
    'star_rating', 'review_headline', 'review_body', 'review_date'
]

df = df[columns_to_keep].dropna()
df = df[df['review_body'].str.strip() != '']

df.head()


Unnamed: 0,product_id,product_title,product_category,star_rating,review_headline,review_body,review_date
0,B00428R89M,yoomall 5M Antenna WIFI RP-SMA Female to Male ...,Electronics,5,Five Stars,As described.,2015-08-31
1,B000068O48,"Hosa GPM-103 3.5mm TRS to 1/4"" TRS Adaptor",Electronics,5,It works as advertising.,It works as advertising.,2015-08-31
2,B000GGKOG8,Channel Master Titan 2 Antenna Preamplifier,Electronics,5,Five Stars,Works pissa,2015-08-31
3,B000NU4OTA,LIMTECH Wall charger + USB Hotsync & Charging ...,Electronics,1,One Star,Did not work at all.,2015-08-31
4,B00JOQIO6S,Skullcandy Air Raid Portable Bluetooth Speaker,Electronics,5,Overall pleased with the item,Works well. Bass is somewhat lacking but is pr...,2015-08-31


In [4]:
#Map Star Rating to Sentiment
def map_rating_to_sentiment(star):
    if star >= 4:
        return 'positive'
    elif star == 3:
        return 'neutral'
    else:
        return 'negative'

df['sentiment'] = df['star_rating'].apply(map_rating_to_sentiment)
df[['star_rating', 'sentiment']].head()


Unnamed: 0,star_rating,sentiment
0,5,positive
1,5,positive
2,5,positive
3,1,negative
4,5,positive


In [5]:
#Save Cleaned Dataset for Sentiment & Basket Analysis
output_path = Path("../data/amazon_reviews/amazon_cleaned_electronics.csv")
df.to_csv(output_path, index=False)

print(f"✅ Cleaned data saved to {output_path}")


✅ Cleaned data saved to ../data/amazon_reviews/amazon_cleaned_electronics.csv
