In [8]:
import tweepy
import configparser as cp
import os
from datetime import datetime
import nltk
import re
from collections import Counter
import numpy as np

from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# download nltk dependencies
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')

# initialize a sentiment analyzer
sid = SentimentIntensityAnalyzer()

# stop words for the word-counts
stops = stopwords.words('english')
stops.append('https')

# the number of most frequently mentioned tags
num_tags_scatter = 5

# initalize a dictionary to store the number of tweets for each game
scatter_dict = {}
sentiment_dict = {}


# Config Parser for Twitter API authentification
config = cp.ConfigParser()
config.read('./config.ini')

# Twitter API credentials
consumer_key = config.get('AUTH', 'consumer_key')
consumer_secret = config.get('AUTH', 'consumer_secret')

access_key = config.get('AUTH', 'access_key')
access_secret = config.get('AUTH', 'access_secret')

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)


def get_tweets_by_query(stock, debug=False):
    date_until = datetime.today().strftime('%Y-%m-%d')

    # get tweets
    tweets = []
    query_tag = '#'+str(stock)

    if debug:
        json_limit = api.rate_limit_status()
        print("Calls restants : " + str(json_limit['resources']['search']['/search/tweets']['remaining']) + '\n')

    for tweet in tweepy.Cursor(api.search, q=query_tag, lang="en", count=100).items():
        # create array of tweet information: created at, username, text
        tweets.append([tweet.created_at, tweet.user.screen_name, tweet.text])

    if debug:
        json_limit = api.rate_limit_status()
        print("Calls restants : " + str(json_limit['resources']['search']['/search/tweets']['remaining']) + '\n')

    df = pd.DataFrame(tweets, columns=['Date', 'Author', 'Text'])
    df.Date = df.Date.dt.strftime('%Y-%m-%d')

    if debug:
        print(df.head())

    return df


def count_words(series):
    # merge the text from all the tweets into one document
    document = ' '.join([row for row in series])

    # lowercasing, tokenization, and keep only alphabetical tokens
    tokens = [word for word in word_tokenize(document.lower()) if word.isalpha()]

    # filtering out tokens that are not all alphabetical
    tokens = [word for word in re.findall(r'[A-Za-z]+', ' '.join(tokens))]

    # remove all stopwords
    no_stop = [word for word in tokens if word not in stops]

    return Counter(no_stop)


def preprocess_nltk(row):
    # lowercasing, tokenization, and keep only alphabetical tokens
    tokens = [word for word in word_tokenize(row.lower()) if word.isalpha()]

    # filtering out tokens that are not all alphabetical
    tokens = [word for word in re.findall(r'[A-Za-z]+', ' '.join(tokens))]

    # remove all stopwords
    no_stop = [word for word in tokens if word not in stops]

    return ' '.join(no_stop)

[nltk_data] Downloading package punkt to /home/zozi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/zozi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/zozi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/zozi/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [9]:
import plotly.graph_objects as go
import pandas as pd

In [10]:
twitter_data = {}
for stock in ['AAPL', 'TSLA']:
    twitter_data[stock] = get_tweets_by_query(stock)

In [11]:
twitter_data['AAPL']

Unnamed: 0,Date,Author,Text
0,2020-09-23,AquilaMarkets,#Aquiladashboard US tech led the #Sp500 rally ...
1,2020-09-23,dailytradingapp,#AAPL #AMZN Not All FAANG Stocks Are Going To ...
2,2020-09-23,ForexTrends365,RT @cryptoexp38560E: #NZDCAD Target Hit! 90 PI...
3,2020-09-23,cryptoexp38560E,#NZDCAD Target Hit! 90 PIPS Profit 🔥\n\nGet Da...
4,2020-09-23,UnsichtbarHand,@yieldbusters @carlosotermin Communism approve...
...,...,...,...
1016,2020-09-14,xaelbot,RT @concept10: Constant note taking is the key...
1017,2020-09-14,ThomasClone_US,RT @concept10: Constant note taking is the key...
1018,2020-09-14,learn__together,RT @concept10: Constant note taking is the key...
1019,2020-09-14,nodeQuotesBot,RT @concept10: Constant note taking is the key...


In [98]:
cnt = count_words(df.Text)

In [99]:
df.Date = pd.to_datetime(df.Date)

In [100]:
df

Unnamed: 0,Date,Author,Text
0,2020-09-22 10:34:53,LearningMindse1,#AAPL chart https://t.co/4akHkwsuqH via https:...
1,2020-09-22 09:55:19,Xavier_Trading,Stock Market Charts | The Market Broke a Key L...
2,2020-09-22 09:07:32,cryptoexp32562F,#EURCAD 275 PIPS Profit🔥✅⭐️\n\nFor free signal...
3,2020-09-22 08:47:10,SangamAgarwalFX,Apple stock after support at fib - #AAPL char...
4,2020-09-22 08:37:20,cryptoexp32562E,#EURCAD 275 PIPS Profit🔥✅⭐️\n\nFor Forex signa...
...,...,...,...
887,2020-09-13 17:10:06,TrendSpider,$AAPL #AAPL A little extended shown by the mon...
888,2020-09-13 15:40:00,es_tradingview,#AAPL - AAPL - TradingView - https://t.co/70Sx...
889,2020-09-13 15:35:22,Jin_Yani,RT @Gino89910812: #AAPL remains a buy! https:/...
890,2020-09-13 15:32:22,firstpenney,RT @OCMillionaire: $BRTXQ big post for those w...


In [106]:
df['Hours'] = df.Date.dt.hour
df['Days'] = df.Date.dt.day

In [107]:
df

Unnamed: 0,Date,Author,Text,Hours,Days
0,2020-09-22 10:34:53,LearningMindse1,#AAPL chart https://t.co/4akHkwsuqH via https:...,10,22
1,2020-09-22 09:55:19,Xavier_Trading,Stock Market Charts | The Market Broke a Key L...,9,22
2,2020-09-22 09:07:32,cryptoexp32562F,#EURCAD 275 PIPS Profit🔥✅⭐️\n\nFor free signal...,9,22
3,2020-09-22 08:47:10,SangamAgarwalFX,Apple stock after support at fib - #AAPL char...,8,22
4,2020-09-22 08:37:20,cryptoexp32562E,#EURCAD 275 PIPS Profit🔥✅⭐️\n\nFor Forex signa...,8,22
...,...,...,...,...,...
887,2020-09-13 17:10:06,TrendSpider,$AAPL #AAPL A little extended shown by the mon...,17,13
888,2020-09-13 15:40:00,es_tradingview,#AAPL - AAPL - TradingView - https://t.co/70Sx...,15,13
889,2020-09-13 15:35:22,Jin_Yani,RT @Gino89910812: #AAPL remains a buy! https:/...,15,13
890,2020-09-13 15:32:22,firstpenney,RT @OCMillionaire: $BRTXQ big post for those w...,15,13


In [109]:
df.groupby('Days').count()

Unnamed: 0_level_0,Date,Author,Text,Hours
Days,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
13,21,21,21,21
14,78,78,78,78
15,193,193,193,193
16,120,120,120,120
17,128,128,128,128
18,110,110,110,110
19,61,61,61,61
20,80,80,80,80
21,77,77,77,77
22,24,24,24,24


In [119]:
fig = go.Figure()

for stock in ['AAPL', 'TSLA', 'GOOG']:
    li, _ = get_tweets_by_query(stock, 100)
    x, y, _ = organize_data(li)
    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            name=stock
    ))

fig.show()

Calls restants : 180

Calls restants : 170

Calls restants : 170

Calls restants : 150

Calls restants : 150

Calls restants : 147



In [116]:
# colors for plots
chart_colors = [
    '#664DFF',
    '#893BFF',
    '#3CC5E8',
    '#2C93E8',
    '#0BEBDD',
    '#0073FF',
    '#00BDFF',
    '#A5E82C',
    '#FFBD42',
    '#FFCA30'
]
# global color setting
app_color = {
    "graph_bg": "rgb(221, 236, 255)",
    "graph_line": "rgb(8, 70, 151)",
    "graph_font":"rgb(2, 29, 65)"
}

In [128]:
fig_2 = go.Figure()

visible = [False] * len(['AAPL', 'TSLA', 'MSFT'])
visible[0] = True

for i, stock in enumerate(['AAPL', 'TSLA', 'MSFT']):
    li, _ = get_tweets_by_query(stock, 100, debug=True)
    x, y, cnt = organize_data(li)
    top_n = cnt.most_common(10)[::-1]

    # get the x and y values
    X = [cnt for word, cnt in top_n]
    Y = [word for word, cnt in top_n]

    # plot the bar chart
    fig_2.add_trace(go.Bar(
        x=X, y=Y,
        name='Word Counts',
        orientation='h',
        marker=dict(color=chart_colors[::-1]),
        visible=visible[i]
    ))

# specify the layout
fig_2.update_layout(
        xaxis={
            'type': 'log',
            'autorange': True,
            'title': 'Number of Words'
        },
        height=300,
        plot_bgcolor=app_color["graph_bg"],
        paper_bgcolor=app_color["graph_bg"],
        font={"color": app_color["graph_font"]},
        autosize=True,
        margin=go.layout.Margin(
            l=100,
            r=25,
            b=75,
            t=25,
            pad=4
        ),
    )

buttons = []

for i, stock in enumerate(['AAPL', 'TSLA', 'MSFT']):
    false_true = [False] * len(['AAPL', 'TSLA', 'MSFT'])
    false_true[i] = True
    buttons.append(
        dict(label = stock,
                method = 'update',
                args = [{'visible': false_true}])
    )


fig_2.update_layout(
    updatemenus=[dict(
        x=1.1,
        y=0.8,
        active=0,
        type='buttons',
        direction='down',
        buttons=buttons
        )
    ])

fig_2.show()

Calls restants : 173

Calls restants : 163

Calls restants : 163

Calls restants : 143

Calls restants : 143

Calls restants : 136



In [188]:
# query tweets from the database
li, _ = get_tweets_by_query('MSFT', 100, True)

df = pd.DataFrame(li, columns=['Date', 'Author', 'Text'])
df.Date = df.Date.dt.strftime('%Y-%m-%d')
# preprocess the text column
df['Text'] = df.Text.apply(preprocess_nltk)

Calls restants : 158

Calls restants : 151



In [189]:
len(df)

543

In [190]:
avg_sentiments = {}

for date in df.Date.unique():
    sub_df = df[df.Date == date]

    sentiments = []
    for row in sub_df['Text']:
        sentiments.append(sid.polarity_scores(row)['compound'])

    avg_sentiments[date] = [np.mean(sentiments), np.std(sentiments)]

In [191]:
# plot the scatter plot
fig_3 = go.Figure(go.Scatter(
    x=[time for time, score in avg_sentiments.items()],
    y=[score[0] for time, score in avg_sentiments.items()],
    error_y={
        "type": "data",
        "array": [score[1]/30 for time, score in avg_sentiments.items()],
        "thickness": 1.5,
        "width": 1,
        "color": "#000",
    },
    name='AAPL',
    mode='markers',
    opacity=0.7,
    marker=dict(color=chart_colors[4], size=10)
))

# specify the layout
fig_3.update_layout(
        xaxis={
            'automargin': False,
            'title': 'Current Time (GMT)',
            'nticks': len(df.Date.unique()),
        },
        yaxis={
            'autorange': True,
            'title': 'Sentiment Score'
        },
        height=400,
        plot_bgcolor=app_color["graph_bg"],
        paper_bgcolor=app_color["graph_bg"],
        font={"color": app_color["graph_font"]},
        autosize=False,
        legend={
            'orientation': 'v',
            # 'xanchor': 'right',
            # 'yanchor': 'middle',
            # 'x': 0.5,
            # 'y': 1.025
        },
        margin=go.layout.Margin(
            l=75,
            r=25,
            b=70,
            t=25,
            pad=4
        ),
    )

fig_3.show()