# Sentiment analysis of the shoes dataset 

In [1]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd

In [2]:
shoes_data = pd.read_csv('datasets/shoes_data.csv')

In [3]:
shoes_data.head()

Unnamed: 0,title_token,title_tokens_merged,subreddit
0,"['ordered', 'a', 'pair', 'of', 'these', 'my', ...",ordered a pair of these my first 327 s im in love,Newbalance
1,"['started', 'grey', 'day', 'getting', 'a', 'li...",started grey day getting a lil hooked,Newbalance
2,"['started', 'grey', 'day', 'getting', 'a', 'li...",started grey day getting a lil hooked,Newbalance
3,"['started', 'on', 'grey', 'day', 'getting', 'a...",started on grey day getting a lil hooked,Newbalance
4,"['when', 'will', 'these', 'drop', 'on', 'the',...",when will these drop on the nb website i have ...,Newbalance


In [4]:
shoe_sentiments = shoes_data[['title_tokens_merged', 'subreddit']]

In [5]:
shoe_sentiments.head()

Unnamed: 0,title_tokens_merged,subreddit
0,ordered a pair of these my first 327 s im in love,Newbalance
1,started grey day getting a lil hooked,Newbalance
2,started grey day getting a lil hooked,Newbalance
3,started on grey day getting a lil hooked,Newbalance
4,when will these drop on the nb website i have ...,Newbalance


In [6]:
shoe_sentiments.isnull().sum()

title_tokens_merged    0
subreddit              0
dtype: int64

In [7]:
shoe_sentiments.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shoe_sentiments.dropna(inplace=True)


In [8]:
shoe_sentiments.isnull().sum()

title_tokens_merged    0
subreddit              0
dtype: int64

### Instantiate the Sentiment Intensity Analyzer 

In [9]:
sia = SentimentIntensityAnalyzer()

In [10]:
sia_dicts = [sia.polarity_scores(post) for post in shoe_sentiments['title_tokens_merged']]

In [11]:
sia_df = pd.DataFrame(sia_dicts)

In [12]:
titles = shoe_sentiments.reset_index(drop=True)

### Create dataset of sentiment scores 

In [13]:
shoe_sentiments_df = pd.concat([titles, sia_df], axis=1)
shoe_sentiments_df

Unnamed: 0,title_tokens_merged,subreddit,neg,neu,pos,compound
0,ordered a pair of these my first 327 s im in love,Newbalance,0.000,0.682,0.318,0.6369
1,started grey day getting a lil hooked,Newbalance,0.000,0.806,0.194,0.0516
2,started grey day getting a lil hooked,Newbalance,0.000,0.806,0.194,0.0516
3,started on grey day getting a lil hooked,Newbalance,0.000,0.833,0.167,0.0516
4,when will these drop on the nb website i have ...,Newbalance,0.049,0.785,0.166,0.5789
...,...,...,...,...,...,...
1988,what are those found them in my basement and k...,Nike,0.000,0.833,0.167,0.2975
1989,new air force 1s stained,Nike,0.000,1.000,0.000,0.0000
1990,did nike discontinue these medium olive boots ...,Nike,0.000,1.000,0.000,0.0000
1991,should i wear them or you think the price will...,Nike,0.000,1.000,0.000,0.0000


### Sentiment analysis scores sorted by negative scores, highest to lowest 

In [14]:
shoe_sentiments_df.sort_values(by='neg', ascending=False).head(15)

Unnamed: 0,title_tokens_merged,subreddit,neg,neu,pos,compound
936,bummer,Newbalance,1.0,0.0,0.0,-0.3818
1674,pain,Nike,1.0,0.0,0.0,-0.5106
1860,banned,Nike,1.0,0.0,0.0,-0.4588
1665,i have a problem,Nike,0.73,0.27,0.0,-0.4019
524,lazy sunday,Newbalance,0.714,0.286,0.0,-0.3612
1337,faulty product,Nike,0.697,0.303,0.0,-0.3182
1220,faulty product,Nike,0.697,0.303,0.0,-0.3182
767,nb for lower back discomfort,Newbalance,0.625,0.375,0.0,-0.6124
1065,real or fake,Nike,0.608,0.392,0.0,-0.4767
1064,real or fake,Nike,0.608,0.392,0.0,-0.4767


### Sentiment analysis scores sorted by compound scores, highest to lowest 

##### Compound scores give a better overall idea of the sentiment of that object

In [15]:
# compound score closer to +1 means higher positivity of the text 

shoe_sentiments_df.sort_values(by='compound', ascending=False).head(15)

Unnamed: 0,title_tokens_merged,subreddit,neg,neu,pos,compound
383,thanks to everyone who responded yesterday was...,Newbalance,0.0,0.755,0.245,0.9231
607,first pair of 992s usually rock 990s size 14 t...,Newbalance,0.0,0.774,0.226,0.9169
608,this is my first pair usually rock 990s size 1...,Newbalance,0.0,0.777,0.223,0.9169
132,it s probably done before but what is the best...,Newbalance,0.0,0.612,0.388,0.9081
1641,hello i m mornica i m really sweet and polite ...,Nike,0.0,0.79,0.21,0.8932
1100,newest pickup haven t bought anything other th...,Nike,0.0,0.814,0.186,0.8883
1183,i m stuck between these 3 shoes i need help if...,Nike,0.062,0.556,0.383,0.886
1406,could someone help me find these exact shoes o...,Nike,0.0,0.769,0.231,0.8858
1229,hi guys bought these back in 2013 please can s...,Nike,0.0,0.703,0.297,0.875
1034,these are being sold second hand at a great pr...,Nike,0.0,0.6,0.4,0.875


### Sentiment analysis scores sorted by compound scores, lowest to highest 

In [16]:
shoe_sentiments_df.sort_values(by='compound', ascending=True).head(15)

Unnamed: 0,title_tokens_merged,subreddit,neg,neu,pos,compound
1947,found these they look fake asf but i ll just a...,Nike,0.318,0.682,0.0,-0.8522
1681,i check this stupid app every day and never on...,Nike,0.378,0.622,0.0,-0.836
1490,got these from the outlets left jumpman lookin...,Nike,0.336,0.664,0.0,-0.7635
724,my disappointment is immeasurable and my day i...,Newbalance,0.478,0.522,0.0,-0.7506
1072,they are bought used i want to know if they ar...,Nike,0.355,0.582,0.063,-0.7202
555,are these m996 beaters that i bought authentic...,Newbalance,0.305,0.695,0.0,-0.6924
1232,nike s playbook under fire after poor results,Nike,0.524,0.476,0.0,-0.6705
1544,the shoes were muddy so i put random cleaners ...,Nike,0.228,0.684,0.088,-0.6486
1169,i bought the wrong size and am wondering where...,Nike,0.215,0.785,0.0,-0.6369
740,ml2002rc marblehead re up in australia was str...,Newbalance,0.184,0.816,0.0,-0.634


### Sentiment analysis scores sorted by positive scores, highest to lowest 

In [17]:
shoe_sentiments_df.sort_values(by='pos', ascending=False).head(15)

Unnamed: 0,title_tokens_merged,subreddit,neg,neu,pos,compound
402,fit,Newbalance,0.0,0.0,1.0,0.3612
1455,help,Nike,0.0,0.0,1.0,0.4019
282,stunning,Newbalance,0.0,0.0,1.0,0.3818
569,id help please,Newbalance,0.0,0.167,0.833,0.6124
787,feeling cute today,Newbalance,0.0,0.182,0.818,0.5423
1278,love nike,Nike,0.0,0.192,0.808,0.6369
1027,i m in love,Nike,0.0,0.192,0.808,0.6369
226,sunday best,Newbalance,0.0,0.192,0.808,0.6369
518,fully protected,Newbalance,0.0,0.238,0.762,0.4927
41,good morning,Newbalance,0.0,0.256,0.744,0.4404
