In [20]:
import pandas as pd
import re
from textblob import TextBlob

In [2]:
data = pd.read_csv('data/twitter-airline-sentiment/Tweets.csv')
print(data.shape)
data.head()

(14640, 15)


Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,5.70306e+17,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2/24/2015 11:35,,Eastern Time (US & Canada)
1,5.70301e+17,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2/24/2015 11:15,,Pacific Time (US & Canada)
2,5.70301e+17,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2/24/2015 11:15,Lets Play,Central Time (US & Canada)
3,5.70301e+17,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2/24/2015 11:15,,Pacific Time (US & Canada)
4,5.70301e+17,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2/24/2015 11:14,,Pacific Time (US & Canada)


In [8]:
tweets = pd.DataFrame(data['text'])
tweets.head(10)

Unnamed: 0,text
0,@VirginAmerica What @dhepburn said.
1,@VirginAmerica plus you've added commercials t...
2,@VirginAmerica I didn't today... Must mean I n...
3,@VirginAmerica it's really aggressive to blast...
4,@VirginAmerica and it's a really big bad thing...
5,@VirginAmerica seriously would pay $30 a fligh...
6,"@VirginAmerica yes, nearly every time I fly VX..."
7,@VirginAmerica Really missed a prime opportuni...
8,"@virginamerica Well, I didn't…but NOW I DO! :-D"
9,"@VirginAmerica it was amazing, and arrived an ..."


In [9]:
tweets['At'] = tweets['text'].str.extract(r'^(@\S+)')
tweets.head(10)

Unnamed: 0,text,At
0,@VirginAmerica What @dhepburn said.,@VirginAmerica
1,@VirginAmerica plus you've added commercials t...,@VirginAmerica
2,@VirginAmerica I didn't today... Must mean I n...,@VirginAmerica
3,@VirginAmerica it's really aggressive to blast...,@VirginAmerica
4,@VirginAmerica and it's a really big bad thing...,@VirginAmerica
5,@VirginAmerica seriously would pay $30 a fligh...,@VirginAmerica
6,"@VirginAmerica yes, nearly every time I fly VX...",@VirginAmerica
7,@VirginAmerica Really missed a prime opportuni...,@VirginAmerica
8,"@virginamerica Well, I didn't…but NOW I DO! :-D",@virginamerica
9,"@VirginAmerica it was amazing, and arrived an ...",@VirginAmerica


In [13]:
# for removing handles
def remove_handles(tweet):
    return re.sub(r'@\S+', '', tweet)

In [14]:
tweets['text'] = tweets['text'].apply(remove_handles)

In [16]:
tweets.head(10)

Unnamed: 0,text,At
0,What said.,@VirginAmerica
1,plus you've added commercials to the experien...,@VirginAmerica
2,I didn't today... Must mean I need to take an...,@VirginAmerica
3,"it's really aggressive to blast obnoxious ""en...",@VirginAmerica
4,and it's a really big bad thing about it,@VirginAmerica
5,seriously would pay $30 a flight for seats th...,@VirginAmerica
6,"yes, nearly every time I fly VX this “ear wor...",@VirginAmerica
7,Really missed a prime opportunity for Men Wit...,@VirginAmerica
8,"Well, I didn't…but NOW I DO! :-D",@virginamerica
9,"it was amazing, and arrived an hour early. Yo...",@VirginAmerica


In [18]:
def get_sentiment(dataframe, column):
    '''
    PARAMETERS - 
        dataframe - pandas DataFrame object
        column - column in dataframe which contains the text to which sentiment is applied 
    
    RETURNS - 
        pandas dataframe, polarity Subjectivity for each text
    
    '''
    text_column = dataframe[column]
    textblob_sentiment = text_column.apply(TextBlob)
    
    sentiment_values = [ {'Polarity': v.sentiment.polarity,
                         'Subjectivity': v.sentiment.subjectivity}
                   for v in textblob_sentiment.values]
    return pd.DataFrame(sentiment_values)

In [21]:
sentiment_df = get_sentiment(tweets,'text')

In [22]:
sentiment_df.head()

Unnamed: 0,Polarity,Subjectivity
0,0.0,0.0
1,0.0,0.0
2,-0.390625,0.6875
3,0.00625,0.35
4,-0.35,0.383333


In [24]:
#concat the two dataframes
tweets = pd.concat([tweets, sentiment_df], axis=1)

In [25]:
tweets.head()

Unnamed: 0,text,At,Polarity,Subjectivity
0,What said.,@VirginAmerica,0.0,0.0
1,plus you've added commercials to the experien...,@VirginAmerica,0.0,0.0
2,I didn't today... Must mean I need to take an...,@VirginAmerica,-0.390625,0.6875
3,"it's really aggressive to blast obnoxious ""en...",@VirginAmerica,0.00625,0.35
4,and it's a really big bad thing about it,@VirginAmerica,-0.35,0.383333


In [32]:
postive_tweets = tweets[tweets['Polarity'] > 0.5]
negative_tweets = tweets[tweets['Polarity'] < -0.5]
neutral_tweets = tweets[(tweets['Polarity'] < 0.1) & (tweets['Polarity']> -0.1)]

## Positive tweets

In [33]:
print('Number of postive tweets - ',len(postive_tweets))
postive_tweets.head(10)

Number of postive tweets -  1047


Unnamed: 0,text,At,Polarity,Subjectivity
8,"Well, I didn't…but NOW I DO! :-D",@virginamerica,1.0,1.0
19,you know what would be amazingly awesome? BOS...,@VirginAmerica,0.6,0.966667
22,I love the hipster innovation. You are a feel...,@VirginAmerica,0.6,0.6
34,this is great news! America could start flig...,@VirginAmerica,1.0,0.75
35,Nice RT Vibe with the moodlight from takeoff ...,,0.6,1.0
36,Moodlighting is the only way to fly! Best exp...,@VirginAmerica,0.5875,0.7125
42,plz help me win my bid upgrade for my flight ...,@VirginAmerica,1.0,0.4
43,I have an unused ticket but moved to a new ci...,@VirginAmerica,0.578788,0.751515
45,I'm #elevategold for a good reason: you rock!!,@VirginAmerica,1.0,0.6
51,Julie Andrews all the way though was very ...,@VirginAmerica,1.0,1.0


## Negative tweets

In [34]:
print('Number of negative tweets - ',len(negative_tweets))
negative_tweets.head(10)

Number of negative tweets -  599


Unnamed: 0,text,At,Polarity,Subjectivity
33,"awaiting my return phone call, just would pre...",@VirginAmerica,-0.75,1.0
84,it was a disappointing experience which will ...,@VirginAmerica,-0.6,0.7
114,come back to #PHL already. We need you to tak...,@VirginAmerica,-0.533333,0.666667
131,us too! Terrible airline! Just gave us a hot...,@VirginAmerica,-0.75,1.0
181,too bad you say it takes 10 to 14 days via YO...,@VirginAmerica,-0.875,0.666667
187,for all my flight stuff wrong and did nothin...,@VirginAmerica,-0.75,0.95
354,I am deeply disappointed that your birthday p...,@VirginAmerica,-0.625,0.625
367,on VX399 from JFK to LA - dirty plane - not u...,@VirginAmerica,-0.6,0.8
411,all crap channels which is why I pay to watch...,@VirginAmerica,-0.8,0.8
446,"Never had a bad experience before, but this o...",@VirginAmerica,-0.7,0.666667


## Neutral tweets 


In [35]:
print('Number of neutral tweets are - ',len(neutral_tweets))
neutral_tweets.head(10)

Number of neutral tweets are -  6730


Unnamed: 0,text,At,Polarity,Subjectivity
0,What said.,@VirginAmerica,0.0,0.0
1,plus you've added commercials to the experien...,@VirginAmerica,0.0,0.0
3,"it's really aggressive to blast obnoxious ""en...",@VirginAmerica,0.00625,0.35
10,did you know that suicide is the second leadi...,@VirginAmerica,0.0,0.0
15,SFO-PDX schedule is still MIA.,@VirginAmerica,0.0,0.0
17,I flew from NYC to SFO last week and couldn'...,@VirginAmerica,0.047619,0.290079
18,I ❤️ flying ☺️👍,,0.0,0.0
23,will you be making BOS&gt;LAS non stop perman...,@VirginAmerica,0.0,0.0
25,status match program. I applied and it's bee...,@VirginAmerica,0.0,0.0
26,What happened 2 ur vegan food options?! At le...,@VirginAmerica,-0.075,0.33125
