In [75]:
import pandas as pd
import numpy as np
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import datetime

import statsmodels.api as sm

import matplotlib.pyplot as plt
import mpl_finance

from pandas_datareader import data as pdr
import fix_yahoo_finance

import pickle


In [2]:
dt_tweets = pd.read_csv('data/trump_tweets_16.txt')

In [3]:
dt_tweets = dt_tweets.set_index('created_at')

In [4]:
pd.set_option('display.max_colwidth',-1)  

# Cleaning

In [5]:
dt_tweets.head(2)

Unnamed: 0_level_0,source,text,retweet_count,favorite_count,is_retweet,id_str
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
01-24-2019 16:16:03,Twitter for iPhone,Nancy just said she “just doesn’t understand why?” Very simply without a Wall it all doesn’t work. Our Country has a chance to greatly reduce Crime Human Trafficking Gangs and Drugs. Should have been done for decades. We will not Cave!,24896,104385,False,1088470495312400384
01-24-2019 13:37:59,Twitter for iPhone,Without a Wall there cannot be safety and security at the Border or for the U.S.A. BUILD THE WALL AND CRIME WILL FALL!,21673,95221,False,1088430717611245571


In [6]:
def clean_tweet(tweet): 
        ''' 
        Utility function to clean tweet text by removing links, special characters 
        using simple regex statements. 
        '''
        return ' '.join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split()) 

In [7]:
dt_tweets['text'] = dt_tweets['text'].apply(lambda x: clean_tweet(x))

In [8]:
dt_tweets16 = dt_tweets.drop(['source', 'id_str', 'retweet_count', 'favorite_count', 'is_retweet'], axis=1)

In [9]:
dt_tweets16.head(5)

Unnamed: 0_level_0,text
created_at,Unnamed: 1_level_1
01-24-2019 16:16:03,Nancy just said she just doesn t understand why Very simply without a Wall it all doesn t work Our Country has a chance to greatly reduce Crime Human Trafficking Gangs and Drugs Should have been done for decades We will not Cave
01-24-2019 13:37:59,Without a Wall there cannot be safety and security at the Border or for the U S A BUILD THE WALL AND CRIME WILL FALL
01-24-2019 13:34:26,back home where they belong no more Rockets or M s being fired over Japan or anywhere else and most importantly no Nuclear Testing This is more than has ever been accomplished with North Korea and the Fake News knows it I expect another good meeting soon much potential
01-24-2019 13:21:59,The Fake News Media loves saying so little happened at my first summit with Kim Jong Un Wrong After 40 years of doing nothing with North Korea but being taken to the cleaners amp with a major war ready to start in a short 15 months relationships built hostages amp remains
01-24-2019 12:48:32,So interesting that bad lawyer Michael Cohen who sadly will not be testifying before Congress is using the lawyer of Crooked Hillary Clinton to represent him Gee how did that happen Remember July 4th weekend when Crooked went before FBI amp wasn t sworn in no tape nothing


In [10]:
#Change date object to datetime
dt_tweets16.index = pd.to_datetime(dt_tweets16.index)

In [11]:
dt_tweets16['text'] = dt_tweets16['text'].apply(lambda x: clean_tweet(x))

In [12]:
dt_tweets16['text'] = dt_tweets16['text'].apply(lambda x: x.lower())

In [13]:
dt_tweets16.head(10)

Unnamed: 0_level_0,text
created_at,Unnamed: 1_level_1
2019-01-24 16:16:03,nancy just said she just doesn t understand why very simply without a wall it all doesn t work our country has a chance to greatly reduce crime human trafficking gangs and drugs should have been done for decades we will not cave
2019-01-24 13:37:59,without a wall there cannot be safety and security at the border or for the u s a build the wall and crime will fall
2019-01-24 13:34:26,back home where they belong no more rockets or m s being fired over japan or anywhere else and most importantly no nuclear testing this is more than has ever been accomplished with north korea and the fake news knows it i expect another good meeting soon much potential
2019-01-24 13:21:59,the fake news media loves saying so little happened at my first summit with kim jong un wrong after 40 years of doing nothing with north korea but being taken to the cleaners amp with a major war ready to start in a short 15 months relationships built hostages amp remains
2019-01-24 12:48:32,so interesting that bad lawyer michael cohen who sadly will not be testifying before congress is using the lawyer of crooked hillary clinton to represent him gee how did that happen remember july 4th weekend when crooked went before fbi amp wasn t sworn in no tape nothing
2019-01-24 11:56:31,the economy is doing great more people working in u s a today than at any time in our history media barely covers foxandfriends
2019-01-24 11:51:52,this is everything fdr dreamed about the new deal to put america back to work think of lbj he gave people food stamps amp welfare donald trump s giving them a job he s got a lot of good things to talk about news stories do not accurately cover him should correct dougwead
2019-01-24 11:35:48,a great new book just out game of thorns by doug wead presidential historian and best selling author the book covers the campaign of 2016 and what could be more exciting than that
2019-01-24 04:18:30,alternative venue for the sotu address because there is no venue that can compete with the history tradition and importance of the house chamber i look forward to giving a great state of the union address in the near future
2019-01-24 04:12:07,as the shutdown was going on nancy pelosi asked me to give the state of the union address i agreed she then changed her mind because of the shutdown suggesting a later date this is her prerogative i will do the address when the shutdown is over i am not looking for an


## Companies

In [19]:
companies = ['Ford', 'general motors','United Technologies', 'Rexnord', 'Boeing', 'Softbank','Glenfiddich' 'Lockheed Martin', 'General Motors', 'Toyota', 'Fiat', 'Chrysler', 'Walmart', 'Fiat', 'Chrysler', 'Nordstrom', 'Corning', 'Pfizer', 'Harley Davidson', 'Intel', 'Exxon Mobil', 'Aetna', 'Bayer', 'Carrier', 'SoftBank', 'Comcast', 'Amazon', 'Facebook', 'American Airlines', 'CBS', 'Wells Fargo', 'Disney', 'JP Morgan', 'Banning', 'CNN', 'NBC', 'sony','general dynamics','nfl','nascar','google','holiday inn','nike', 'usatoday','wsj','merck','vanity fair','time magazine','univision', 'goldman sachs','rolling stone','procter gamble','nypost','politico' ' nymag', 'microsoft','koch', 'MSNBC', 'GM', 'Macys', 'Twitter', 'NY Post', 'Fox News', 'Buzzfeed', 'NY Times', 'New York Times', 'Tmobile', 'Washington Post', 'amtrak', 'AOL', 'Verizon', 'Amazon', 'forbes', 'espn','coca cola']

In [20]:
#company_tweet = dt_tweets16[dt_tweets16["text"].str.contains()]

TypeError: contains() missing 1 required positional argument: 'pat'

In [16]:
#company_tweet = company_tweet.append(dt_tweets16[dt_tweets16["text"].str.contains('nfl')])

NameError: name 'company_tweet' is not defined

In [17]:
#company_tweet.info()

NameError: name 'company_tweet' is not defined

In [21]:
#company_tweet.head();

NameError: name 'company_tweet' is not defined

In [21]:
#save dataframe as pickle file

with open('company_tweets.pickle', 'wb') as to_write:
    pickle.dump(company_tweet, to_write)

NameError: name 'company_tweet' is not defined

Company Tweets

In [22]:
tweets_co = pd.read_pickle('data/company_tweets.pickle')

In [23]:
tweets_co.index = pd.to_datetime(tweets_co.index)

In [24]:
tweets_co = tweets_co.sort_index()

In [25]:
tweets_co.head(20)

Unnamed: 0_level_0,text
created_at,Unnamed: 1_level_1
2016-01-01 21:29:56,sprinklermanus cnn realdonaldtrump they re spending millions but you re still going to win go donald trump
2016-01-03 18:13:29,tinahillstrom1 foxnews katrinapierson i love piersmorgan amp realdonaldtrump real men w an opinion exciting interesting controversial 01 03 2016 19 49 18 571 2152 false 683736913631653888 twitter for android ruthmarcus of the washingtonpost was terrible today on face the nation no focus poor level of concentration but correct on hillary lying 01 03 2016 19 03 56 984 3733 false 683725499391164416 twitter for android chucktodd said today on meetthepress that attacking bill to get to hillary has never worked before wrong attacked him in 08 amp won
2016-01-05 03:36:53,iloveidevices edwinro47796972 happyjack225 foxnews krauthammer minimizing dependency on china is crucial only trump talks about that 01 05 2016 03 39 11 1049 3225 false 684217554861199360 twitter for android salriccobono realdonaldtrump troyconway donald get big business back and make america great again for 2016
2016-01-05 03:47:14,lilredfrmkokomo realdonaldtrump my facebook groups are all voting trump 4000 people great
2016-01-06 03:50:46,compresphyllis seanhannity foxnews i am so glad u are for trump your program is my favorite the don is by far the best
2016-01-06 21:09:42,i will be on wolfblitzer for a cnnsitroom interview today please join us 5pm et
2016-01-07 11:15:54,macys was one of the worst performing stocks on the s amp p last year plunging 46 very disloyal company another win for trump boycott
2016-01-07 16:14:38,sentedcruz ted free legal advice on how to pre empt the dems on citizen issue go to court now amp seek declaratory judgment you will win
2016-01-07 16:14:38,sentedcruz ted free legal advice on how to pre empt the dems on citizen issue go to court now amp seek declaratory judgment you will win
2016-01-08 23:12:35,foxnews poll thank you makeamericagreatagain trump2016


In [319]:
#create empty column
#tweets_co['company'] = tweets_co.apply(lambda _: '', axis=1)

In [27]:
tweets_co.head()

Unnamed: 0_level_0,text
created_at,Unnamed: 1_level_1
2016-01-01 21:29:56,sprinklermanus cnn realdonaldtrump they re spending millions but you re still going to win go donald trump
2016-01-03 18:13:29,tinahillstrom1 foxnews katrinapierson i love piersmorgan amp realdonaldtrump real men w an opinion exciting interesting controversial 01 03 2016 19 49 18 571 2152 false 683736913631653888 twitter for android ruthmarcus of the washingtonpost was terrible today on face the nation no focus poor level of concentration but correct on hillary lying 01 03 2016 19 03 56 984 3733 false 683725499391164416 twitter for android chucktodd said today on meetthepress that attacking bill to get to hillary has never worked before wrong attacked him in 08 amp won
2016-01-05 03:36:53,iloveidevices edwinro47796972 happyjack225 foxnews krauthammer minimizing dependency on china is crucial only trump talks about that 01 05 2016 03 39 11 1049 3225 false 684217554861199360 twitter for android salriccobono realdonaldtrump troyconway donald get big business back and make america great again for 2016
2016-01-05 03:47:14,lilredfrmkokomo realdonaldtrump my facebook groups are all voting trump 4000 people great
2016-01-06 03:50:46,compresphyllis seanhannity foxnews i am so glad u are for trump your program is my favorite the don is by far the best


In [323]:
#company list
companies = ['ford', 'general motors','united technologies', 'rexnord', 'boeing', 'softbank','glenfiddich', 'lockheed martin', 'general motors', 'toyota', 'fiat', 'chrysler', 'walmart', 'fiat', 'chrysler', 'nordstrom', 'corning', 'pfizer', 'harley davidson', 'intel', 'exxon mobil', 'aetna', 'bayer', 'carrier', 'softbank', 'comcast', 'amazon', 'facebook', 'american airlines', 'cbs', 'wells fargo', 'disney', 'jp morgan', 'banning', 'cnn', 'nbc', 'sony','general dynamics','nfl','nascar','google','holiday inn','nike', 'usatoday','wsj','merck','vanity fair','time magazine','univision', 'goldman sachs','rolling stone','procter gamble','nypost','politico' ' nymag', 'microsoft','koch', 'msnbc', 'gm', 'macys', 'twitter', 'ny post', 'fox news', 'buzzfeed', 'ny times', 'new york times', 'tmobile', 'washington post', 'amtrak', 'aol', 'verizon', 'amazon', 'forbes', 'espn','coca cola']

In [325]:
#df['company'] = None

for c in companies:
    tweets_co.loc[tweets_co.text.str.contains(c), 'company'] = c

In [341]:
analyzer = SentimentIntensityAnalyzer()
sentiment = tweets_co['text'].apply(lambda x: analyzer.polarity_scores(x))

In [342]:
tweets_co=pd.concat([tweets_co,sentiment.apply(pd.Series)],1)

In [343]:
tweets_co.head()

Unnamed: 0_level_0,text,company,neg,neu,pos,compound
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-01 21:29:56,sprinklermanus cnn realdonaldtrump they re spending millions but you re still going to win go donald trump,cnn,0.0,0.755,0.245,0.7351
2016-01-03 18:13:29,tinahillstrom1 foxnews katrinapierson i love piersmorgan amp realdonaldtrump real men w an opinion exciting interesting controversial 01 03 2016 19 49 18 571 2152 false 683736913631653888 twitter for android ruthmarcus of the washingtonpost was terrible today on face the nation no focus poor level of concentration but correct on hillary lying 01 03 2016 19 03 56 984 3733 false 683725499391164416 twitter for android chucktodd said today on meetthepress that attacking bill to get to hillary has never worked before wrong attacked him in 08 amp won,twitter,0.183,0.678,0.139,-0.5816
2016-01-05 03:36:53,iloveidevices edwinro47796972 happyjack225 foxnews krauthammer minimizing dependency on china is crucial only trump talks about that 01 05 2016 03 39 11 1049 3225 false 684217554861199360 twitter for android salriccobono realdonaldtrump troyconway donald get big business back and make america great again for 2016,twitter,0.0,0.913,0.087,0.6249


In [344]:
#save dataframe as pickle file

with open('company_tweets_sentiment.pickle', 'wb') as to_write:
    pickle.dump(tweets_co, to_write)

### Make Company Dataframe

In [337]:
co_data = pd.DataFrame({'company':companies})

In [340]:
co_data;

### Amazon

In [28]:
amazon_tweets = pd.read_csv('data/amazon_tweets.txt')

In [29]:
#amazon_tweets['text'] = amazon_tweets['text'].apply(lambda x: clean_tweet(x))

In [30]:
amazon_tweets = amazon_tweets.drop(['source', 'id_str', 'retweet_count', 'favorite_count', 'is_retweet'], axis=1)

In [31]:
amazon_tweets.index = amazon_tweets['created_at']

In [33]:
amazon_tweets.index = amazon_tweets.index

In [47]:
amazon_tweets = amazon_tweets.drop(['created_at'], axis=1)

In [48]:
amazon_tweets.head()

Unnamed: 0_level_0,text
created_at,Unnamed: 1_level_1
01-04-2015 03:46:43,@MIclimber: @realDonaldTrump look at alum @THEGaryBusey making you proud with @AmazonFireTV #CelebApprentice Great.
04-25-2015 13:48:54,Congratulations to @DanaPerino on your book going to number one on Amazon. Great book - Great job!
12-07-2015 15:08:20,The @washingtonpost which loses a fortune is owned by @JeffBezos for purposes of keeping taxes down at his no profit company @amazon.
12-07-2015 15:18:25,The @washingtonpost loses money (a deduction) and gives owner @JeffBezos power to screw public on low taxation of @Amazon! Big tax shelter
12-07-2015 15:22:48,If @amazon ever had to pay fair taxes its stock would crash and it would crumble like a paper bag. The @washingtonpost scam is saving it!


In [49]:
#save dataframe as pickle file

with open('amazon_tweets.pickle', 'wb') as to_write:
    pickle.dump(amazon_tweets, to_write)

In [61]:
amazon = pd.read_pickle('amazon_tweets.pickle')

In [64]:
amazon

Unnamed: 0_level_0,text
created_at,Unnamed: 1_level_1
01-04-2015 03:46:43,@MIclimber: @realDonaldTrump look at alum @THEGaryBusey making you proud with @AmazonFireTV #CelebApprentice Great.
04-25-2015 13:48:54,Congratulations to @DanaPerino on your book going to number one on Amazon. Great book - Great job!
12-07-2015 15:08:20,The @washingtonpost which loses a fortune is owned by @JeffBezos for purposes of keeping taxes down at his no profit company @amazon.
12-07-2015 15:18:25,The @washingtonpost loses money (a deduction) and gives owner @JeffBezos power to screw public on low taxation of @Amazon! Big tax shelter
12-07-2015 15:22:48,If @amazon ever had to pay fair taxes its stock would crash and it would crumble like a paper bag. The @washingtonpost scam is saving it!
12-23-2015 14:55:24,The @washingtonpost which is the lobbyist (power) for not imposing taxes on #Amazon today did a nasty cartoon attacking @tedcruz kids. Bad
06-28-2017 13:06:14,The #AmazonWashingtonPost sometimes referred to as the guardian of Amazon not paying internet taxes (which they should) is FAKE NEWS!
07-22-2017 10:33:01,A new INTELLIGENCE LEAK from the Amazon Washington Postthis time against A.G. Jeff Sessions.These illegal leaks like Comey's must stop!
07-23-2017 23:57:36,It's hard to read the Failing New York Times or the Amazon Washington Post because every story/opinion even if should be positive is bad!
07-25-2017 02:23:18,The Amazon Washington Post fabricated the facts on my ending massive dangerous and wasteful payments to Syrian rebels fighting Assad.....


## Calculating Beta

In [41]:
company_df = pd.read_csv('data/companies_close.csv')

In [42]:
company_df.head(1)

Unnamed: 0,Date,SPX,Amazon
0,1/2/15,2058.199951,308.519989


In [43]:
# Convert date column to datetime
company_df['Date'] = pd.to_datetime(company_df['Date'])

In [44]:
#Change close to percent change
company_df["SPX"] = company_df["SPX"].pct_change(1) 
company_df["Amazon"] = company_df["Amazon"].pct_change(1) 

In [46]:
company_df = company_df.dropna(axis=0)

In [47]:
company_df.head()

Unnamed: 0,Date,SPX,Amazon
1,2015-01-05,-0.018278,-0.020517
2,2015-01-06,-0.008893,-0.022833
3,2015-01-07,0.01163,0.0106
4,2015-01-08,0.017888,0.006836
5,2015-01-09,-0.008404,-0.011749


In [49]:
#OLS in Statsmodel
# split dependent and independent variable
X = company_df['SPX']
y = company_df['Amazon']

# Add a constant to the independent value
X1 = sm.add_constant(X)

# make regression model 
model = sm.OLS(y, X1)

# fit model and print results
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Amazon   R-squared:                       0.251
Model:                            OLS   Adj. R-squared:                  0.250
Method:                 Least Squares   F-statistic:                     172.2
Date:                Wed, 30 Jan 2019   Prob (F-statistic):           3.91e-34
Time:                        14:38:07   Log-Likelihood:                 1365.8
No. Observations:                 516   AIC:                            -2728.
Df Residuals:                     514   BIC:                            -2719.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0018      0.001      2.384      0.0

In [None]:
# Amazon Beta = 1.11 (13.123)

In [None]:
events = [pd.to_datetime('12-07-2015'), pd.to_datetime('1990-01-15')]
max_delta = pd.Timedelta(2, unit='d')