In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import tweepy
import time
import seaborn as sns

# Initialize Sentiment Anal

# Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Twitter API Keys
from twitterapi import consumer_key, consumer_secret, access_token, access_token_secret

# Alpha Vantage API Key
from AV_apikey import apikey

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser(), wait_on_rate_limit=True)


In [2]:
# Do a .json dump to see data
tweet = api.search("DJIA")
# Utilize JSON dumps
print(json.dumps(tweet, sort_keys=True, indent=4, separators=(',', ': ')))

{
    "search_metadata": {
        "completed_in": 0.046,
        "count": 15,
        "max_id": 982617061313560577,
        "max_id_str": "982617061313560577",
        "next_results": "?max_id=982609125531152383&q=DJIA&include_entities=1",
        "query": "DJIA",
        "refresh_url": "?since_id=982617061313560577&q=DJIA&include_entities=1",
        "since_id": 0,
        "since_id_str": "0"
    },
    "statuses": [
        {
            "contributors": null,
            "coordinates": null,
            "created_at": "Sat Apr 07 13:52:18 +0000 2018",
            "entities": {
                "hashtags": [],
                "symbols": [
                    {
                        "indices": [
                            15,
                            20
                        ],
                        "text": "DJIA"
                    }
                ],
                "urls": [],
                "user_mentions": [
                    {
                        "id": 86537404,

In [3]:
#pulled from whatever source we're using, set as lists for now, but can be changed
pull_dates = ["2018-04-06", "2018-04-05", "2018-04-04","2018-04-03","2018-04-02", 
              "2018-03-29", "2018-03-28", "2018-03-27", "2018-03-26", 
              "2018-03-23", "2018-03-22", "2018-03-21", "2018-03-20", "2018-03-19", 
              "2018-03-16", "2018-03-15", "2018-03-14", "2018-03-13", "2018-03-12",
              "2018-03-09", "2018-03-08", "2018-03-07", "2018-03-06", "2018-03-05",
              "2018-03-02", "2018-03-01",]

#pulled from whatever source we're using, set as lists for now, but can be changed
index_name = "Stock Market"

#for dataframe

sentiments = []

prev_date = "2018-04-07"
#loop through dates
for date in pull_dates:
    # Counter
    counter = 1
    
    # Get all tweets from home feed
    public_tweets = api.search(index_name, count = 10, since=date, until=prev_date)
    prev_date = date

    # Loop through all tweets 
    for tweet in public_tweets["statuses"]:

        # Print Tweets
        print("Tweet %s: %s" % (counter, tweet["text"]))
        
        # Run Vader Analysis on each tweet
        compound = analyzer.polarity_scores(tweet["text"])["compound"]
        # The following are not needed but I included them for reference
        pos = analyzer.polarity_scores(tweet["text"])["pos"]
        neu = analyzer.polarity_scores(tweet["text"])["neu"]
        neg = analyzer.polarity_scores(tweet["text"])["neg"]
        tweets_ago = counter
            
        date = time.strftime('%m-%d-%Y', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
        
        # Add sentiments for each tweet into an array if the language is English
        if tweet["lang"] == "en":
            sentiments.append({"Index": index_name,
                               "Tweet":tweet["text"],
                               "Date": date,
                               "Language": tweet["lang"],
                                "Compound": compound,
                               "Positive": pos,
                               "Negative": neu,
                               "Neutral": neg,
                               "Tweets Ago": counter})
            
        # Add to counter 
        counter = counter + 1

Tweet 1: RT @tcsmithbiz: Job growth starting to stagnate

Stock market 20 quarter rally ends

Deficit skyrocketing

Trade war with China hurting ave…
Tweet 2: @LewTheSpook @up_trumped @djrothkopf @realDonaldTrump Yep.  Unless you own stock and can sell it before he releases… https://t.co/Uu6BxZMcUD
Tweet 3: RT @RepSwalwell: Stock market is tumbling. You shouldn’t lose money in your  retirement investments because a childish @realDonaldTrump sta…
Tweet 4: RT @gr8tjude: China tariffs will sting a bit but in the long run it will be good!  https://t.co/o600WFak9Y
Tweet 5: @realDonaldTrump @bernieandsid @77WABCradio Mr. President u better know what u r doing as your trade war has cost m… https://t.co/zUQST850gm
Tweet 6: @realDonaldTrump America is a disaster. Thanks for causing the stock market to tank. You're causing all of us to lo… https://t.co/friw5ZbU7g
Tweet 7: RT @RepSwalwell: Stock market is tumbling. You shouldn’t lose money in your  retirement investments because a childish @realD

In [4]:
# Convert sentiments to DataFrame
sentiments_pd = pd.DataFrame.from_dict(sentiments)
sentiments_pd = sentiments_pd[["Index", "Date", "Language", "Tweets Ago", "Compound", "Positive", "Neutral", "Negative", "Tweet"]]
sentiments_pd.to_csv("SM_data.csv")
print(sentiments_pd.shape)
sentiments_pd.tail()

(63, 9)


Unnamed: 0,Index,Date,Language,Tweets Ago,Compound,Positive,Neutral,Negative,Tweet
58,Stock Market,03-28-2018,en,6,0.0,0.0,0.0,1.0,RT @toddmillionaire: ‘Todd Capital Millionaire...
59,Stock Market,03-28-2018,en,7,0.4096,0.139,0.0,0.861,@WhiteHouse More selective Glee Club reporting...
60,Stock Market,03-28-2018,en,8,0.4019,0.137,0.0,0.863,@wesley_jordan @Amerikabeau2 @realDonaldTrump ...
61,Stock Market,03-28-2018,en,9,-0.5994,0.0,0.197,0.803,"RT @BrexitBin: Actually, we didn""t.\nBetween l..."
62,Stock Market,03-28-2018,en,10,-0.4215,0.0,0.123,0.877,RT @MarsBars6116: Stock market experts have pr...


In [5]:
# Get the mean sentiment for each date
# Only compound sentiment is needed but I wanted to see all sentiment averages
mean_values = pd.pivot_table(sentiments_pd, index = ["Date"],
                                   values = ("Compound", "Positive", "Neutral", "Negative"), aggfunc=np.mean)

# Flattenning the header to clean up the dataframe for mean values
mean_sentiment = pd.DataFrame(mean_values.to_records())
mean_sentiment = mean_sentiment[["Date", "Compound", "Positive", "Neutral", "Negative"]]
mean_sentiment["Index"] = sentiments_pd["Index"]
mean_sentiment = mean_sentiment[["Date", "Index", "Compound", "Positive", "Neutral", "Negative"]]
mean_sentiment.to_csv("SM_sentiment_by_day.csv")

mean_sentiment

Unnamed: 0,Date,Index,Compound,Positive,Neutral,Negative
0,03-28-2018,Stock Market,-0.009022,0.069222,0.075,0.855778
1,04-01-2018,Stock Market,-0.148,0.0,0.052,0.948
2,04-02-2018,Stock Market,-0.01114,0.1075,0.1072,0.7852
3,04-03-2018,Stock Market,-0.29115,0.0361,0.1319,0.832
4,04-04-2018,Stock Market,-0.44671,0.0705,0.1895,0.74
5,04-05-2018,Stock Market,0.10767,0.1075,0.0666,0.8258
6,04-06-2018,Stock Market,-0.21593,0.0872,0.1601,0.7527


In [6]:
# https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&apikey=demo&datatype=csv
    
# Save config information.
url = "https://www.alphavantage.co/query?"

# Build partial query URL
query_url = f"{url}function=TIME_SERIES_DAILY&symbol={index_name}&apikey={apikey}&datatype=csv"
query_url

'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=Stock Market&apikey=72CL7KDAJ55OYYA1&datatype=csv'