### Imports

Installiert benötigte Imports mit 'pip install ...'

In [1]:
import warnings
warnings.filterwarnings('ignore')

#General Data/Plotting
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from tqdm.auto import tqdm 
import random
import re

# Add more later if necessary

In [2]:
df = pd.read_json('../Datasets/Cell_Phones_and_Accessories_5.json', lines=True)

In [3]:
# rename columns for better understanding
df = df.rename(columns={"reviewText":"Review","overall": "Rating","summary":"Title"})

# drop unessesary data
df.drop(['reviewerID', 'asin', 'reviewerName', 'unixReviewTime', 'reviewTime'], axis='columns', inplace=True)

# split the helpful tuple into two new rows
df['helpful_0'] = df['helpful'].apply(func=lambda x: x[0])
df['helpful_1'] = df['helpful'].apply(func=lambda x: x[1])

# Remove Reviews that have less than 2 helpful votes
df = df[df['helpful_1'] >= 2]

# add calculation of helpfullness ration and convert it to an int percentage
df['helpful_ratio'] = df['helpful_0'] / df['helpful_1']
df['ratio_percent'] = (df['helpful_ratio'] * 100).astype(int)

# calculate length of reviews in new column
df['review_len'] = [len(text.split()) for text in df.Review]

In [4]:
def convert_label(df) : 
    if df['ratio_percent'] <= 70 : 
        rate = 0 # for Negative 
    else : 
        rate = 1 # for Positive
        
    return rate

In [5]:
df['IsHelpful'] = df.apply(convert_label, axis = 1)
df.head()

Unnamed: 0,helpful,Review,Rating,Title,helpful_0,helpful_1,helpful_ratio,ratio_percent,review_len,IsHelpful
3,"[4, 4]",Item arrived in great time and was in perfect ...,4,Cute!,4,4,1.0,100,51,1
4,"[2, 3]","awesome! stays on, and looks great. can be use...",5,leopard home button sticker for iphone 4s,2,3,0.666667,66,23,0
5,"[1, 2]",These make using the home button easy. My daug...,3,Cute,1,2,0.5,50,23,0
7,"[1, 2]",it worked for the first week then it only char...,1,not a good Idea,1,2,0.5,50,20,0
8,"[2, 3]","Good case, solid build. Protects phone all aro...",5,Solid Case,2,3,0.666667,66,44,0


df['Capslock_Count'] = df['Review'].str.findall(r'\b[A-Z]+\b')

In [6]:

df['Capslock_Count'] = df['Review'].str.findall(r'\b(?![Ii]\b)[A-Z]+\b').str.len()

In [7]:
df.head(20)

Unnamed: 0,helpful,Review,Rating,Title,helpful_0,helpful_1,helpful_ratio,ratio_percent,review_len,IsHelpful,Capslock_Count
3,"[4, 4]",Item arrived in great time and was in perfect ...,4,Cute!,4,4,1.0,100,51,1,1
4,"[2, 3]","awesome! stays on, and looks great. can be use...",5,leopard home button sticker for iphone 4s,2,3,0.666667,66,23,0,0
5,"[1, 2]",These make using the home button easy. My daug...,3,Cute,1,2,0.5,50,23,0,0
7,"[1, 2]",it worked for the first week then it only char...,1,not a good Idea,1,2,0.5,50,20,0,0
8,"[2, 3]","Good case, solid build. Protects phone all aro...",5,Solid Case,2,3,0.666667,66,44,0,0
11,"[2, 3]",This is the first battery case I have had for ...,5,A Winner,2,3,0.666667,66,71,0,0
12,"[3, 3]",Performs exactly as advertised . It's very st...,5,Absolutely love the case !!,3,3,1.0,100,47,1,3
13,"[12, 16]","Unlike Most of the Rechargeable Battery cases,...",5,Best Rechargeable Battery Case on the Market!!...,12,16,0.75,75,239,1,5
16,"[2, 3]",It works great. Doesn't heat up like crazy lik...,5,SUPER DUPER QUALITY!,2,3,0.666667,66,46,0,0
18,"[2, 3]",I have tested this against the griffin dual ou...,5,High power as promised!,2,3,0.666667,66,102,0,0


In [8]:
df['Capslock_Ratio'] = df['Capslock_Count'] / df['review_len']

df['Capslock_Ratio'] = df['Capslock_Ratio'].fillna(0)

df['Capslock_Percent'] = (df['Capslock_Ratio'] * 100).astype(int)
df.head(50)

Unnamed: 0,helpful,Review,Rating,Title,helpful_0,helpful_1,helpful_ratio,ratio_percent,review_len,IsHelpful,Capslock_Count,Capslock_Ratio,Capslock_Percent
3,"[4, 4]",Item arrived in great time and was in perfect ...,4,Cute!,4,4,1.0,100,51,1,1,0.019608,1
4,"[2, 3]","awesome! stays on, and looks great. can be use...",5,leopard home button sticker for iphone 4s,2,3,0.666667,66,23,0,0,0.0,0
5,"[1, 2]",These make using the home button easy. My daug...,3,Cute,1,2,0.5,50,23,0,0,0.0,0
7,"[1, 2]",it worked for the first week then it only char...,1,not a good Idea,1,2,0.5,50,20,0,0,0.0,0
8,"[2, 3]","Good case, solid build. Protects phone all aro...",5,Solid Case,2,3,0.666667,66,44,0,0,0.0,0
11,"[2, 3]",This is the first battery case I have had for ...,5,A Winner,2,3,0.666667,66,71,0,0,0.0,0
12,"[3, 3]",Performs exactly as advertised . It's very st...,5,Absolutely love the case !!,3,3,1.0,100,47,1,3,0.06383,6
13,"[12, 16]","Unlike Most of the Rechargeable Battery cases,...",5,Best Rechargeable Battery Case on the Market!!...,12,16,0.75,75,239,1,5,0.020921,2
16,"[2, 3]",It works great. Doesn't heat up like crazy lik...,5,SUPER DUPER QUALITY!,2,3,0.666667,66,46,0,0,0.0,0
18,"[2, 3]",I have tested this against the griffin dual ou...,5,High power as promised!,2,3,0.666667,66,102,0,0,0.0,0
