In [1]:
import re
import string
from nltk.stem.wordnet import WordNetLemmatizer 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import seaborn as sns

In [10]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ammar\AppData\Roaming\nltk_data...


True

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
tweets_df = pd.read_csv(r'./tweets.csv')
tweets_df = tweets_df['Tweet'].astype('str')
tweets_df.head(10)

0    I have to say, Apple has by far the best custo...
1    iOS 7 is so fricking smooth & beautiful!! #Tha...
2                                        LOVE U @APPLE
3    Thank you @apple, loving my new iPhone 5S!!!!!...
4    .@apple has the best customer service. In and ...
5    @apple ear pods are AMAZING! Best sound from i...
6    Omg the iPhone 5S is so cool it can read your ...
7              the iPhone 5c is so beautiful <3 @Apple
8    #AttributeOwnership is exactly why @apple will...
9    Just checked out the specs on the new iOS 7......
Name: Tweet, dtype: object

In [4]:
new_stop_words = [
    'some', 'like', 'think', 'wow', 'one', 'http', 'web', 'really',
    'see', 'watch', 'apple', 'know', 'show', 'think', 'click', 'go', 'to', 'great',
    'very', 'good', 'many', 'more', 'people', 'made', 'technology', 'tech',
    'iphone', 'ipad', 'new', 'latest', 'phone', 'itunes', 'brand', 'ipod', 'iphones',
    'io', 'get', 'buy', 'purchase', 'make', 'im', "iam", 'dont', 'cant', 'promoipodplayerpromo',
    'ipodplayerpromo', 'player', 'itune']

def pre_process_text(df, custom_stopwords):
    translate_table = dict((ord(char), None) for char in string.punctuation)
    stop_words = set(stopwords.words('english'))
    stop_words = stop_words.union(custom_stopwords)

    corpus = []
    for i, line in df.iteritems():
        line = line.lower()
        line = re.sub(r"\d+", "", line)
        line = line.translate(translate_table)
        line = line.split()

        # Lemmatizers reduces each word to its root/canonical form
        lm = WordNetLemmatizer()
        line = [lm.lemmatize(word) for word in line if not word in stop_words]
        line = " ".join(line)
        corpus.append(line)
    return corpus

In [16]:
def get_sentiment_score(sentence_series):
    df_dict = {}
    for i, response in sentence_series.iteritems():
        sent_analyzer = SentimentIntensityAnalyzer()
        scores = sent_analyzer.polarity_scores(response)
        df_dict[i] = scores
    df = pd.DataFrame.from_dict(df_dict)
    return df.T

In [6]:
tweets_series = pd.Series(pre_process_text(tweets_df, new_stop_words))
tweets_series

0       say far best customer care service ever receiv...
1                 io fricking smooth beautiful thanxapple
2                                                  love u
3                     thank loving pictwittercomxmhjcupcb
4                               best customer service min
                              ...                        
1176                                                freak
1177            freaking picture tl annoyed freak twitter
1178                                   freaking cow freak
1179                             hate working going freak
1180                   agounalakis thats nasty nasty brat
Length: 1181, dtype: object

In [18]:
df = get_sentiment_score(tweets_series)

        neg    neu    pos  compound
0     0.000  0.486  0.514    0.8126
1     0.000  0.489  0.511    0.6344
2     0.000  0.000  1.000    0.6369
3     0.000  0.135  0.865    0.7506
4     0.000  0.417  0.583    0.6369
...     ...    ...    ...       ...
1176  1.000  0.000  0.000   -0.4404
1177  0.735  0.265  0.000   -0.8074
1178  0.851  0.149  0.000   -0.6908
1179  0.767  0.233  0.000   -0.7650
1180  0.706  0.294  0.000   -0.8020

[1181 rows x 4 columns]
