In [1]:
%matplotlib inline

In [2]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import seaborn as sns
import time
import tweepy
from datetime import datetime
 
# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Twitter API Keys
from tw_config import (consumer_key,
                       consumer_secret,
                       access_token,
                       access_token_secret)

In [3]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [4]:
# Load S&P500 Company List
df_sp500 = pd.read_csv('Resources/SP500_Company_List.csv')

df_sp500.head()

Unnamed: 0,Ticker,Security,GICS Sector,GICS Sub Industry,Location,Date Added,CIK
0,MMM,3M Company,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740
1,ABT,Abbott Laboratories,Health Care,Health Care Equipment,"North Chicago, Illinois",3/31/1964,1800
2,ABBV,AbbVie Inc.,Health Care,Pharmaceuticals,"North Chicago, Illinois",12/31/2012,1551152
3,ABMD,ABIOMED Inc,Health Care,Health Care Equipment,"Danvers, Massachusetts",5/31/2018,815094
4,ACN,Accenture plc,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",7/6/2011,1467373


In [36]:
# Find Twitter handles for S&P500 Companies - RUN IF "SP500_twtr_ids.csv" DOES NOT ALREADY EXIST. 
twtr_ids = []

# Search for Twitter handles by Company Name
for index, row in df_sp500.iterrows(): 
    try:
        user_id = ''
        screen_name = ''
        name = ''
        
        time.sleep(1)                                                  # Keep API Calls under limit
        public_users = api.search_users(row.Security)
        re_name = re.search('[\w]+', row.Security)
        loc = re.search('[\w]+$', row.Location)
        if len(public_users) > 0:
            for user in public_users:
                if (re_name.group(0).lower() in user['name'].lower()):
                    user_id = user['id']
                    screen_name = user['screen_name']
                    name = user['name']
                    break
                
                elif (row.Security.lower() in user['name'].lower()):
                    user_id = user['id']
                    screen_name = user['screen_name']
                    name = user['name']
                
        twtr_ids.append([row.Ticker, row.Security, loc.group(0).lower(), user_id, screen_name, name])
        #print(f'Company Match: [{row.Ticker},{row.Security},{loc.group(0).lower()},{user_id},{screen_name},{name}]')
        
    except Exception as e:
        print(e)

# Search for Twitter handles with different company name search
for i in range(len(twtr_ids)):
    try:
        if twtr_ids[i][3] == '':                                     # Check if user_id is blank
            time.sleep(1)                                                # Keep API Calls under limit
            re_name = re.search('[\w]+ [\w]+', twtr_ids[i][1])
            if re_name != None:
                public_users = api.search_users(re_name.group(0).lower())
            
                if len(public_users) > 0:
                    for user in public_users:
                        if (re_name.group(0).lower() in user['name'].lower()):
                            user_id = user['id']
                            screen_name = user['screen_name']
                            name = user['name']
                            twtr_ids[i] = [twtr_ids[i][0],twtr_ids[i][1],twtr_ids[i][2],user_id,screen_name,name]
                            #print(f'2nd Company Search: {twtr_ids[i]}')
                            break
                           
                        elif (twtr_ids[i][1].lower() in user['name'].lower()):
                            user_id = user['id']
                            screen_name = user['screen_name']
                            name = user['name']
                            twtr_ids[i] = [twtr_ids[i][0],twtr_ids[i][1],twtr_ids[i][2],user_id,screen_name,name]
                            #print(f'2nd Company Search: {twtr_ids[i]}')
                            
    except Exception as e:
        print(e)        
        
# Search for Twitter handles by ticker symbol
for i in range(len(twtr_ids)):
    try:
        if twtr_ids[i][3] == '':                                     # Check if user_id is blank
            time.sleep(1)                                                # Keep API Calls under limit
            public_users = api.search_users(f'${twtr_ids[i][0]}')
            
            if len(public_users) > 0:
                for user in public_users:
                    if re.search('[\w]+', twtr_ids[i][1]) != None:
                        if re.search('[\w]+', twtr_ids[i][1]).group(0).lower() in user['name'].lower():
                            user_id = user['id']
                            screen_name = user['screen_name']
                            name = user['name']
                            twtr_ids[i] = [twtr_ids[i][0],twtr_ids[i][1],twtr_ids[i][2],user_id,screen_name,name]
                            #print(f'Ticker Match: {twtr_ids[i]}')
                            break
                        
                    elif re.search('[\w]+ [\w]+', twtr_ids[i][1]) != None:
                        if re.search('[\w]+ [\w]+', twtr_ids[i][1]).group(0).lower() in user['name'].lower():
                            user_id = user['id']
                            screen_name = user['screen_name']
                            name = user['name']
                            twtr_ids[i] = [twtr_ids[i][0],twtr_ids[i][1],twtr_ids[i][2],user_id,screen_name,name]
                            #print(f'Ticker Match: {twtr_ids[i]}')
                            break
                        
                    elif (twtr_ids[i][0] == user['name']) and (user['followers_count'] > 10000):
                        user_id = user['id']
                        screen_name = user['screen_name']
                        name = user['name']
                        twtr_ids[i] = [twtr_ids[i][0],twtr_ids[i][1],twtr_ids[i][2],user_id,screen_name,name]
                        #print(f'Ticker Match: {twtr_ids[i]}')
                      
    except Exception as e:
        print(e)

df_twtr_ids = pd.DataFrame(twtr_ids, columns=['ticker','security','location','user_id','screen_name','twtr_name'])
df_twtr_ids = df_twtr_ids.drop(columns=['location'])
df_twtr_ids.to_csv('Resources/SP500_twtr_ids.csv', index=False)

'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'


In [165]:
df_sp500 = df_sp500[450:]

In [166]:
# Get Twitter Sentiments for S&P500 Companies
twtr_sentiments = []

for index, row in df_sp500.iterrows(): 
    try:
        # Create search name to use in Twitter search
        re_name = re.search('[\w]+ [\w]+', row.Security)
        
        # Verify that search name is not null 
        if re_name == None: 
            re_name = re.search('[\w]+', row.Security)
            
        # Create Twitter search term
        target_term = f'{re_name.group(0)} ${row.Ticker}'
        
        # Perform search of most recent 100 tweets
        time.sleep(5)
        public_tweets = api.search(target_term, count=100)
        
        for tweet in public_tweets['statuses']:
            twt_date = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
            twt_fdate = twt_date.strftime('%Y%m%d')
            twt_yr = twt_date.strftime('%Y')
            twt_md = twt_date.strftime('%m%d')
            
            twt_text = tweet['text']
            vdr_results = analyzer.polarity_scores(twt_text)
            compound = vdr_results['compound']
            neg = vdr_results['neg']
            neu = vdr_results['neu']
            pos = vdr_results['pos']
            
            # Create row for Twitter sentiment list
            twtr_sentiments.append([row.Ticker, row.Security, twt_fdate, twt_yr, twt_md, compound, neg, neu, pos, twt_text])
    
    except Exception as e:
        print(e)

#Create dataframe for Twitter sentiments
data_columns = ('ticker','company','date','year','monthday','compound_score','neg_score','neu_score','pos_score','tweet')

df_twtr_sentiments = pd.DataFrame(twtr_sentiments, columns=data_columns)

qtr_bins = [0,331,630,930,1231]
qtr_labels = ['Q1', 'Q2', 'Q3', 'Q4']

twt_qtr = pd.cut(pd.to_numeric(df_twtr_sentiments.monthday, errors='coerce'), qtr_bins, labels=qtr_labels)

quarter = []

for x in range(len(twt_qtr)):
    quarter.append(str(df_twtr_sentiments.year[x]) + str(twt_qtr[x]))

df_twtr_sentiments['quarter'] = quarter

df_twtr_sentiments = df_twtr_sentiments.drop(columns=['monthday'])

df_twtr_sentiments.to_csv('Resources/SP500_sentiments10.csv', index=False)
    

In [168]:
df_data = pd.read_csv('Resources/SP500_sentiments1.csv')
for i in range(2,11):
    df_data = df_data.merge(pd.read_csv(f'Resources/SP500_sentiments{i}.csv'),how='outer')
    
df_data.to_csv('Resources/SP500_sentiments.csv')

In [6]:
public_tweets = api.search('$MMM debt')
public_tweets

{'search_metadata': {'completed_in': 0.023,
  'count': 15,
  'max_id': 1013522033202696192,
  'max_id_str': '1013522033202696192',
  'query': '%24MMM+debt',
  'refresh_url': '?since_id=1013522033202696192&q=%24MMM%20debt&include_entities=1',
  'since_id': 0,
  'since_id_str': '0'},
 'statuses': [{'contributors': None,
   'coordinates': None,
   'created_at': 'Sun Jul 01 20:37:37 +0000 2018',
   'entities': {'hashtags': [],
    'symbols': [],
    'urls': [{'display_url': 'twitter.com/i/web/status/1…',
      'expanded_url': 'https://twitter.com/i/web/status/1013522033202696192',
      'indices': [123, 146],
      'url': 'https://t.co/C79JF5eeiZ'}],
    'user_mentions': [{'id': 16397147,
      'id_str': '16397147',
      'indices': [0, 14],
      'name': 'Liberal Party',
      'screen_name': 'liberal_party'},
     {'id': 3341414159,
      'id_str': '3341414159',
      'indices': [15, 27],
      'name': 'Ginette Petitpas Taylor',
      'screen_name': 'GPTaylorMRD'},
     {'id': 14260960,
 