In [17]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import math
import numpy as np 
import matplotlib.pyplot as plt
import warnings
import plotly.offline as py
from bubbly.bubbly import bubbleplot
import seaborn as sns

from textblob import TextBlob

In [18]:
data = pd.read_csv('amazon_alexa.tsv', delimiter='\t')
data

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1
...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1


In [19]:
print(data['verified_reviews'].dtypes)

object


In [20]:
# lets calculate the length of the review
# data['length'] = data['verified_reviews'].apply(len)
data['length'] = data['verified_reviews'].apply(lambda x: len(str(x)) if isinstance(x, str) else 0)
data
# data.info()

Unnamed: 0,rating,date,variation,verified_reviews,feedback,length
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,13
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,9
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,195
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,172
4,5,31-Jul-18,Charcoal Fabric,Music,1,5
...,...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1,50
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1,135
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1,441
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1,380


#### Polarity
##### It is the expresion that determines the sentimental aspect of an opinion. in Textual data, the result of sentiment analysis can be dtermind ofr each entity in the sentece, document or sentence. The sentiment polarity can be dtermined as positive, negative and neutral

In [21]:
# let's calculate the polarity of the review
def get_polarity(text):
    textblob = TextBlob(str(text.encode('utf-8')))
    pol = textblob.sentiment.polarity
    return pol


# Let's apply the polarity
data['polarity'] = data['verified_reviews'].apply(lambda x: get_polarity(str(x)) if isinstance(x, str) else 0)
data

Unnamed: 0,rating,date,variation,verified_reviews,feedback,length,polarity
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,13,0.625000
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,9,0.875000
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,195,-0.100000
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,172,0.350000
4,5,31-Jul-18,Charcoal Fabric,Music,1,5,0.000000
...,...,...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1,50,1.000000
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1,135,0.333333
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1,441,0.237662
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1,380,0.316667


#### Text Subjectivity

##### In natural language, subjectivity refers to expression of opinion, evaluations, feelings, and speculations and thus incorporates sentiment. Subjective text is further calssified with sentiment or polarity

In [22]:
# let's calculate the polarity of the review
def get_subjectivity(text):
    textblob = TextBlob(str(text.encode('utf-8')))
    subj = textblob.sentiment.subjectivity
    return subj


# Let's apply the polarity
data['subjectivity'] = data['verified_reviews'].apply(lambda x: get_subjectivity(str(x)) if isinstance(x, str) else 0)
data

Unnamed: 0,rating,date,variation,verified_reviews,feedback,length,polarity,subjectivity
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,13,0.625000,0.600000
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,9,0.875000,0.800000
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,195,-0.100000,0.512500
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,172,0.350000,0.450000
4,5,31-Jul-18,Charcoal Fabric,Music,1,5,0.000000,0.000000
...,...,...,...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1,50,1.000000,1.000000
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1,135,0.333333,0.452381
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1,441,0.237662,0.533766
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1,380,0.316667,0.632099


In [23]:
# let's summarize the newly created features
data[['length','polarity','subjectivity']].describe()

Unnamed: 0,length,polarity,subjectivity
count,3150.0,3150.0,3150.0
mean,132.048254,0.349792,0.528922
std,182.100859,0.303362,0.256324
min,0.0,-1.0,0.0
25%,30.0,0.123852,0.419196
50%,74.0,0.35,0.585
75%,165.0,0.533333,0.695486
max,2851.0,1.0,1.0
