# Packages 

In [6]:
import pandas as pd # pandas package
import numpy as np # numpy package
from pathlib import Path # path package
import re #regex package

# Load Data set

In [7]:
df = pd.read_csv(Path(r"../Data/Merged Data/merged_data.csv"))
df.sample(5)

Unnamed: 0,review_title,review_text,review_star,date,author,page,product
15716,If you like to watch content from your IPhone ...,I have owned IPhones for years. I wish I had p...,5.0 out of 5 stars,"Reviewed in the United States on January 17, 2019",NY Consumer,72,Apple TV
9441,Will never regret it buying this.,really good product easy to use but i like the...,4.0 out of 5 stars,"Reviewed in the United States on January 14, 2020",Yessenia Luna Valdez,445,Google ChromeCast
9923,Great option for non smart TVs,It works great 99% of the time there are only ...,4.0 out of 5 stars,"Reviewed in the United States on March 1, 2019",DJDre,493,Google ChromeCast
15289,Fantastic!,Always amazing products from Apple. Just FYI t...,5.0 out of 5 stars,"Reviewed in the United States on February 25, ...",Christopher Vaughn,29,Apple TV
11401,ROKU DO BE THE BEST,"The best purchase I’ve made, its easy to use a...",5.0 out of 5 stars,"Reviewed in the United States on May 21, 2022",Anna,141,Roku Remote


# Pre-cleaning 

1. Fix `review_star` to have numeric value of ratings 
2. fix `date` to be parsed and converted into datetime object

## Cleaning `review_star`

In [8]:
## extract rating from strings and convert to float type
df['review_star'] = df['review_star'].apply(lambda x: re.findall(r'^[0-9]\.[0-9]?', x)[0]).astype(float)

# sample only 5 rows
df['review_star'].sample(5)


13371    5.0
4902     5.0
7963     5.0
3920     5.0
10023    5.0
Name: review_star, dtype: float64

## Cleaning `date`

In [9]:
# clean date of reviews to parse the actual date and convert to date-time object 
df['date'] = pd.to_datetime(df['date'].apply(lambda x: re.findall(r'(?<=on\ ).*', x)[0])).dt.strftime('%m.%d.%Y')

# sample only 5 rows
df['date'].sample(5)

6218     05.22.2019
4055     01.10.2022
12405    07.27.2021
16872    10.01.2019
7556     12.16.2019
Name: date, dtype: object

## Re-examine data now

In [10]:
# preview new clean columns
df.sample(10)

Unnamed: 0,review_title,review_text,review_star,date,author,page,product
1760,Bought 2,They work good,5.0,01.22.2022,Kswis76,177,Amazon Fire TV Stick
4648,Nothing,It’s nice. I love it,5.0,11.17.2021,Amazon Customer,465,Amazon Fire TV Stick
9915,<U+0001F9D0><U+0001F9D0><U+0001F9D0> where's m...,I need a remote control for it I try mouse not...,3.0,03.10.2021,Alain B.,492,Google ChromeCast
3407,It's hooked up on 55 inch TV LG nino cell 2021...,I can't take a picture,5.0,03.13.2022,Ronnie cleeton,341,Amazon Fire TV Stick
2045,Vast improvement,The voice recognition with Alexa is a big plus...,5.0,12.09.2021,Van C. Seagraves,205,Amazon Fire TV Stick
12871,Meets my need exactly!,This Roku stick package had everything needed ...,5.0,12.05.2019,RAM,288,Roku Remote
16859,Intuitive & effective,The best of all the devices I’ve tested.,5.0,07.08.2019,Jeff Frederes,186,Apple TV
4296,Works great,I love the direct Netflix button,5.0,03.20.2022,Caitlyn Mckeen,430,Amazon Fire TV Stick
6248,Ok,Does need a little better installation instruc...,5.0,11.24.2019,Duque',125,Google ChromeCast
16329,Easy to use,Love this product took a moment to set up beca...,5.0,02.14.2020,kelseylee webb,133,Apple TV
