# Analysis of Financial Markets based on President Trump's Tweets

## Data Preprocessing

### Importing Data

In [1]:
!pip install -q kaggle

In [None]:
# Upload kaggle.json file containing your API key
from google.colab import files
files.upload()

In [None]:
!mkdir ~/.kaggle 

In [4]:
!cp kaggle.json ~/.kaggle/

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
# Check to see if the API is working correctly 
# ! kaggle datasets list

In [None]:
! kaggle datasets download -d austinreese/trump-tweets

In [None]:
! pip install yfinance

### Data Cleaning

In [9]:
import pandas as pd
import numpy as np
from zipfile import ZipFile

#### Twitter Data

In [41]:
data = ZipFile("/content/trump-tweets.zip")
trump_tweets = pd.read_csv(data.open('trumptweets.csv'))
trump_tweets.head()

Unnamed: 0,id,link,content,date,retweets,favorites,mentions,hashtags,geo
0,1698308935,https://twitter.com/realDonaldTrump/status/169...,Be sure to tune in and watch Donald Trump on L...,2009-05-04 20:54:25,500,868,,,
1,1701461182,https://twitter.com/realDonaldTrump/status/170...,Donald Trump will be appearing on The View tom...,2009-05-05 03:00:10,33,273,,,
2,1737479987,https://twitter.com/realDonaldTrump/status/173...,Donald Trump reads Top Ten Financial Tips on L...,2009-05-08 15:38:08,12,18,,,
3,1741160716,https://twitter.com/realDonaldTrump/status/174...,New Blog Post: Celebrity Apprentice Finale and...,2009-05-08 22:40:15,11,24,,,
4,1773561338,https://twitter.com/realDonaldTrump/status/177...,"""My persona will never be that of a wallflower...",2009-05-12 16:07:28,1399,1965,,,


In [42]:
trump_tweets.shape

(41122, 9)

The dataframe shows that there are 41,122 rows and 9 columns. Three of these columns mention, hashtags, and geo are currenly showing NaN for the first five rows. 

In [43]:
trump_tweets.describe()

Unnamed: 0,id,retweets,favorites,geo
count,41122.0,41122.0,41122.0,0.0
mean,6.088909e+17,5455.590657,22356.899105,
std,3.027946e+17,10130.076661,41501.859711,
min,1698309000.0,0.0,0.0,
25%,3.549428e+17,25.0,28.0,
50%,5.609149e+17,291.0,247.0,
75%,7.941218e+17,8778.0,32970.75,
max,1.219077e+18,309892.0,857678.0,


In [44]:
#Remove NaN columns
trump_tweets = trump_tweets.drop(labels=['mentions', 'hashtags', 'geo'], axis='columns')
trump_tweets

Unnamed: 0,id,link,content,date,retweets,favorites
0,1698308935,https://twitter.com/realDonaldTrump/status/169...,Be sure to tune in and watch Donald Trump on L...,2009-05-04 20:54:25,500,868
1,1701461182,https://twitter.com/realDonaldTrump/status/170...,Donald Trump will be appearing on The View tom...,2009-05-05 03:00:10,33,273
2,1737479987,https://twitter.com/realDonaldTrump/status/173...,Donald Trump reads Top Ten Financial Tips on L...,2009-05-08 15:38:08,12,18
3,1741160716,https://twitter.com/realDonaldTrump/status/174...,New Blog Post: Celebrity Apprentice Finale and...,2009-05-08 22:40:15,11,24
4,1773561338,https://twitter.com/realDonaldTrump/status/177...,"""My persona will never be that of a wallflower...",2009-05-12 16:07:28,1399,1965
...,...,...,...,...,...,...
41117,1218962544372670467,https://twitter.com/realDonaldTrump/status/121...,I have never seen the Republican Party as Stro...,2020-01-19 19:24:52,32620,213817
41118,1219004689716412416,https://twitter.com/realDonaldTrump/status/121...,Now Mini Mike Bloomberg is critical of Jack Wi...,2020-01-19 22:12:20,36239,149571
41119,1219053709428248576,https://twitter.com/realDonaldTrump/status/121...,I was thrilled to be back in the Great State o...,2020-01-20 01:27:07,16588,66944
41120,1219066007731310593,https://twitter.com/realDonaldTrump/status/121...,"“In the House, the President got less due proc...",2020-01-20 02:16:00,20599,81921


In [45]:
trump_tweets.dtypes

id            int64
link         object
content      object
date         object
retweets      int64
favorites     int64
dtype: object

In [46]:
# The date column is an object when it should be datetime
trump_tweets['date'] = pd.to_datetime(trump_tweets['date'], format='%Y%m%d %H:%M:%S')

In [47]:
# Split the date column into 2 seperate columns
trump_tweets['Time'],trump_tweets['Date']= trump_tweets['date'].apply(lambda x:x.time()), trump_tweets['date'].apply(lambda x:x.date())
trump_tweets = trump_tweets.drop(labels='date',axis='columns')

In [48]:
trump_tweets.head()

Unnamed: 0,id,link,content,retweets,favorites,Time,Date
0,1698308935,https://twitter.com/realDonaldTrump/status/169...,Be sure to tune in and watch Donald Trump on L...,500,868,20:54:25,2009-05-04
1,1701461182,https://twitter.com/realDonaldTrump/status/170...,Donald Trump will be appearing on The View tom...,33,273,03:00:10,2009-05-05
2,1737479987,https://twitter.com/realDonaldTrump/status/173...,Donald Trump reads Top Ten Financial Tips on L...,12,18,15:38:08,2009-05-08
3,1741160716,https://twitter.com/realDonaldTrump/status/174...,New Blog Post: Celebrity Apprentice Finale and...,11,24,22:40:15,2009-05-08
4,1773561338,https://twitter.com/realDonaldTrump/status/177...,"""My persona will never be that of a wallflower...",1399,1965,16:07:28,2009-05-12


In [49]:
trump_tweets['content']

0        Be sure to tune in and watch Donald Trump on L...
1        Donald Trump will be appearing on The View tom...
2        Donald Trump reads Top Ten Financial Tips on L...
3        New Blog Post: Celebrity Apprentice Finale and...
4        "My persona will never be that of a wallflower...
                               ...                        
41117    I have never seen the Republican Party as Stro...
41118    Now Mini Mike Bloomberg is critical of Jack Wi...
41119    I was thrilled to be back in the Great State o...
41120    “In the House, the President got less due proc...
41121    A great show! Check it out tonight at 9pm. @ F...
Name: content, Length: 41122, dtype: object

In [50]:
# Remove punctuation from content column
trump_tweets['content'] = trump_tweets['content'].str.replace('[^\w\s]','')
trump_tweets['content'] 

0        Be sure to tune in and watch Donald Trump on L...
1        Donald Trump will be appearing on The View tom...
2        Donald Trump reads Top Ten Financial Tips on L...
3        New Blog Post Celebrity Apprentice Finale and ...
4        My persona will never be that of a wallflower ...
                               ...                        
41117    I have never seen the Republican Party as Stro...
41118    Now Mini Mike Bloomberg is critical of Jack Wi...
41119    I was thrilled to be back in the Great State o...
41120    In the House the President got less due proces...
41121    A great show Check it out tonight at 9pm  FoxN...
Name: content, Length: 41122, dtype: object

In [51]:
trump_tweets.head()

Unnamed: 0,id,link,content,retweets,favorites,Time,Date
0,1698308935,https://twitter.com/realDonaldTrump/status/169...,Be sure to tune in and watch Donald Trump on L...,500,868,20:54:25,2009-05-04
1,1701461182,https://twitter.com/realDonaldTrump/status/170...,Donald Trump will be appearing on The View tom...,33,273,03:00:10,2009-05-05
2,1737479987,https://twitter.com/realDonaldTrump/status/173...,Donald Trump reads Top Ten Financial Tips on L...,12,18,15:38:08,2009-05-08
3,1741160716,https://twitter.com/realDonaldTrump/status/174...,New Blog Post Celebrity Apprentice Finale and ...,11,24,22:40:15,2009-05-08
4,1773561338,https://twitter.com/realDonaldTrump/status/177...,My persona will never be that of a wallflower ...,1399,1965,16:07:28,2009-05-12


Slice out the rows that are not needed. 

In [60]:
dates = trump_tweets[trump_tweets['Date'] >= pd.to_datetime('2016-11-08')]
dates

Unnamed: 0,id,link,content,retweets,favorites,Time,Date
30889,795770006306861057,https://twitter.com/realDonaldTrump/status/795...,LIVE on Periscope Join me for a few minutes i...,8765,27429,00:28:48,2016-11-08
30890,795779987152523264,https://twitter.com/realDonaldTrump/status/795...,Thank you Pennsylvania Going to New Hampshire ...,7743,25771,01:08:28,2016-11-08
30891,795781945607278592,https://twitter.com/realDonaldTrump/status/795...,Today in Florida I pledged to stand with the p...,12110,32433,01:16:15,2016-11-08
30892,795782371895349250,https://twitter.com/realDonaldTrump/status/795...,Big news to share in New Hampshire tonight Pol...,11281,38360,01:17:57,2016-11-08
30893,795834203430645760,https://twitter.com/realDonaldTrump/status/795...,Unbelievable evening in New Hampshire THANK Y...,7796,28222,04:43:54,2016-11-08
...,...,...,...,...,...,...,...
41117,1218962544372670467,https://twitter.com/realDonaldTrump/status/121...,I have never seen the Republican Party as Stro...,32620,213817,19:24:52,2020-01-19
41118,1219004689716412416,https://twitter.com/realDonaldTrump/status/121...,Now Mini Mike Bloomberg is critical of Jack Wi...,36239,149571,22:12:20,2020-01-19
41119,1219053709428248576,https://twitter.com/realDonaldTrump/status/121...,I was thrilled to be back in the Great State o...,16588,66944,01:27:07,2020-01-20
41120,1219066007731310593,https://twitter.com/realDonaldTrump/status/121...,In the House the President got less due proces...,20599,81921,02:16:00,2020-01-20


#### Stock Market Data

The Yahoo Finance API is utilized for market data from November 8th 2016 through present day.

In [25]:
import yfinance as yf
market_data = yf.download('NDAQ',start='2016-11-08')
market_data.to_csv('market_data.csv')

[*********************100%***********************]  1 of 1 completed


In [26]:
stock_data = pd.read_csv('/content/market_data.csv')
stock_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-11-08,65.440002,65.57,64.190002,65.379997,60.622059,666500
1,2016-11-09,65.209999,67.139999,64.690002,66.93,62.059265,1034000
2,2016-11-10,67.489998,68.290001,66.889999,67.099998,62.2169,1211200
3,2016-11-11,66.980003,67.32,65.870003,66.440002,61.604923,889400
4,2016-11-14,66.620003,66.660004,64.57,64.730003,60.019367,1413300


In [27]:
stock_data.shape

(995, 7)

In [28]:
stock_data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,995.0,995.0,995.0,995.0,995.0,995.0
mean,90.579156,91.406101,89.718563,90.595789,87.689579,860614.2
std,17.378123,17.616862,17.088426,17.363765,18.661687,380385.4
min,63.700001,64.32,63.360001,63.549999,58.92524,217900.0
25%,76.129997,76.764999,75.599998,76.119999,72.189796,598950.0
50%,88.849998,89.82,88.099998,88.800003,86.053398,773800.0
75%,100.34,101.489998,99.200001,100.389999,98.653645,1017500.0
max,137.809998,137.940002,135.0,137.279999,136.759308,2973800.0


In [29]:
# Round columns with decimal places to only 2 decimal places
stock_data = stock_data.round(decimals=2)
stock_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-11-08,65.44,65.57,64.19,65.38,60.62,666500
1,2016-11-09,65.21,67.14,64.69,66.93,62.06,1034000
2,2016-11-10,67.49,68.29,66.89,67.10,62.22,1211200
3,2016-11-11,66.98,67.32,65.87,66.44,61.60,889400
4,2016-11-14,66.62,66.66,64.57,64.73,60.02,1413300
...,...,...,...,...,...,...,...
990,2020-10-15,126.50,128.44,126.15,128.41,128.41,410100
991,2020-10-16,128.66,129.90,127.88,128.87,128.87,727500
992,2020-10-19,129.71,130.10,127.39,127.48,127.48,886800
993,2020-10-20,127.51,131.08,127.51,128.99,128.99,884400


There is not much cleaning that needs to take place for the stock market data. 

## Exploratory Analysis

In [30]:
import seaborn as sns 
import matplotlib.pyplot as plt
%matplotlib inline 
sns.set(color_codes=True)

### Null Values

Now that the data is cleaned up, lets preform some exploritory anlaysis for the 2 data sets.

In [31]:
trump_tweets.isnull().sum()

id               0
link             0
content          0
retweets         0
favorites        0
Time         41122
Date             0
dtype: int64

In [32]:
stock_data.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

There are no missing values for either of the data sets. 