# Calculating the polarity of tweets

#### Importing all the required packages

In [1]:
import pandas as pd
import re
import sys
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import csv

#### First Function:  Tokenizing the tweets
i.e replacing the non word characters with space i.e replacing ("$ # @ ! & %") with space

#### Second Function: Calculating Polarity of the word with the reference of AFINN dictionary

AFINN is a list of English words rated for valence with an integer
between minus five (negative) and plus five (positive). The words have
been manually labeled by Finn Årup Nielsen in 2009-2011. The file
is tab-separated. There are two versions:

AFINN-111: Newest version with 2477 words and phrases.

AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
are 1480 lines, as some words are listed twice. The word list in not
entirely in alphabetic ordering. 

In [69]:
def tokenize(text):
    var = re.sub('\W+', ' ', text.lower()).split()
    return var

def afinn_sentiment(terms, afinn):

    total = 0.
    for t in terms:
        if t in afinn:
            #print(t+' found in afinn so adding '+str(afinn[t]))
            total += afinn[t]
    return total

#### Downloading Afinn file from url mentioned below

In [None]:
url = urlopen('http://www2.compute.dtu.dk/~faan/data/AFINN.zip')
zipfile = ZipFile(BytesIO(url.read()))
afinn_file = zipfile.open('AFINN/AFINN-111.txt')

afinn = dict()
for line in afinn_file:
    parts = line.strip().split()
    if len(parts) == 2:
        afinn[parts[0]] = int(parts[1])
        
afinn = {key.decode('utf-8'): value for (key, value) in afinn.items()}
 

### Sentiment Analyser:
Calculating polarity of each word in the tweet and according categorizing if postive negative or neutral 

In [71]:
def sentiment_analyzer(tweet_s):
    tokens = [tokenize(t) for t in tweet_s]  # Tokenize all the tweets

    afinn_total = []
    polarity = []

    for tweet in tokens:
        total = afinn_sentiment(tweet, afinn)
        afinn_total.append(total)
        if total > 0:
            polarity.append("positive")
        elif total < 0:
            polarity.append("negative")
        else:
            polarity.append("neutral")
    afinn_total = np.array(afinn_total).transpose()   
    polarity = np.array(polarity).transpose()
    return afinn_total,polarity   

#### Adding two new columns to already created csv with tweets 
columns = polarity of tweets and Sentiments associated

In [79]:
df19 = pd.read_csv("Sentiment_Analysis_of_67_Tweets_About_AAPL.csv")
df19 = df19.drop("Sentiment",axis =1)

In [80]:
weights, sentiments = sentiment_analyzer(df19.Tweet)
df19["Senitments"] = sentiments
df19["Weights"] = weights

### Converting to new csv 

In [87]:
df19.to_csv("AppleTweets19.csv",index=False)

### Loading the above created csv

In [88]:
df = pd.read_csv("AppleTweets19.csv")

In [89]:
df

Unnamed: 0,Time,Tweet,Senitments,Weights
0,2018-04-19 20:13:52,@fitbit &amp; @apple -- two horse race -- $FI...,positive,1.0
1,2018-04-19 20:13:30,$AAPL falls -2.83% today on worries that #iPho...,negative,-3.0
2,2018-04-19 20:13:05,I'd be all over buying $AAPL right here at $17...,positive,2.0
3,2018-04-19 20:12:40,Check out r/tradingsystems on Reddit for info ...,neutral,0.0
4,2018-04-19 20:12:22,Join @RobinhoodApp and we'll both get a stock ...,positive,4.0
5,2018-04-19 20:12:11,Join @RobinhoodApp and we'll both get a stock ...,positive,4.0
6,2018-04-19 20:11:58,$SVSA .0619?! 25 MIL FLOAT! INSIDERS OWN 76%! ...,neutral,0.0
7,2018-04-19 20:08:44,When $AAPL net income was the same as $AMZN is...,neutral,0.0
8,2018-04-19 20:08:37,Not a word from @cnbc @CNN @MSNBC on Trump bei...,neutral,0.0
9,2018-04-19 20:08:28,another stellar day.. Staunch Train keeps roll...,negative,-1.0
