In [1]:
%matplotlib inline

In [2]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import seaborn as sns
import time
import tweepy
from datetime import datetime
 
# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Twitter API Keys
from tw_config import (consumer_key,
                       consumer_secret,
                       access_token,
                       access_token_secret)

In [3]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [35]:
# Load S&P500 Company List
df_sp500 = pd.read_csv('Resources/SP500_Company_List.csv')

df_sp500.head()

Unnamed: 0,Ticker,Security,GICS Sector,GICS Sub Industry,Location,Date Added,CIK
0,MMM,3M Company,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740
1,ABT,Abbott Laboratories,Health Care,Health Care Equipment,"North Chicago, Illinois",3/31/1964,1800
2,ABBV,AbbVie Inc.,Health Care,Pharmaceuticals,"North Chicago, Illinois",12/31/2012,1551152
3,ABMD,ABIOMED Inc,Health Care,Health Care Equipment,"Danvers, Massachusetts",5/31/2018,815094
4,ACN,Accenture plc,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",7/6/2011,1467373


In [36]:
# Find Twitter handles for S&P500 Companies - RUN IF SP500_twtr_handles.csv DOES NOT ALREADY EXIST. 
twtr_handles = []

# Search for Twitter handles by Company Name
for index, row in df_sp500.iterrows(): 
    try:
        user_id = ''
        screen_name = ''
        name = ''
        
        time.sleep(1)                                                  # Keep API Calls under limit
        public_users = api.search_users(row.Security)
        re_name = re.search('[\w]+', row.Security)
        loc = re.search('[\w]+$', row.Location)
        if len(public_users) > 0:
            for user in public_users:
                if (re_name.group(0).lower() in user['name'].lower()):
                    user_id = user['id']
                    screen_name = user['screen_name']
                    name = user['name']
                    break
                
                elif (row.Security.lower() in user['name'].lower()):
                    user_id = user['id']
                    screen_name = user['screen_name']
                    name = user['name']
                
        twtr_handles.append([row.Ticker, row.Security, loc.group(0).lower(), user_id, screen_name, name])
        #print(f'Company Match: [{row.Ticker},{row.Security},{loc.group(0).lower()},{user_id},{screen_name},{name}]')
        
    except Exception as e:
        print(e)

# Search for Twitter handles with different company name search
for i in range(len(twtr_handles)):
    try:
        if twtr_handles[i][3] == '':                                     # Check if user_id is blank
            time.sleep(1)                                                # Keep API Calls under limit
            re_name = re.search('[\w]+ [\w]+', twtr_handles[i][1])
            if re_name != None:
                public_users = api.search_users(re_name.group(0).lower())
            
                if len(public_users) > 0:
                    for user in public_users:
                        if (re_name.group(0).lower() in user['name'].lower()):
                            user_id = user['id']
                            screen_name = user['screen_name']
                            name = user['name']
                            twtr_handles[i] = [twtr_handles[i][0],twtr_handles[i][1],twtr_handles[i][2],user_id,screen_name,name]
                            #print(f'2nd Company Search: {twtr_handles[i]}')
                            break
                           
                        elif (twtr_handles[i][1].lower() in user['name'].lower()):
                            user_id = user['id']
                            screen_name = user['screen_name']
                            name = user['name']
                            twtr_handles[i] = [twtr_handles[i][0],twtr_handles[i][1],twtr_handles[i][2],user_id,screen_name,name]
                            #print(f'2nd Company Search: {twtr_handles[i]}')
                            
    except Exception as e:
        print(e)        
        
# Search for Twitter handles by ticker symbol
for i in range(len(twtr_handles)):
    try:
        if twtr_handles[i][3] == '':                                     # Check if user_id is blank
            time.sleep(1)                                                # Keep API Calls under limit
            public_users = api.search_users(f'${twtr_handles[i][0]}')
            
            if len(public_users) > 0:
                for user in public_users:
                    if re.search('[\w]+', twtr_handles[i][1]).group(0).lower() in user['name'].lower():
                        user_id = user['id']
                        screen_name = user['screen_name']
                        name = user['name']
                        twtr_handles[i] = [twtr_handles[i][0],twtr_handles[i][1],twtr_handles[i][2],user_id,screen_name,name]
                        #print(f'Ticker Match: {twtr_handles[i]}')
                        break
                        
                    elif re.search('[\w]+ [\w]+', twtr_handles[i][1]).group(0).lower() in user['name'].lower():
                        user_id = user['id']
                        screen_name = user['screen_name']
                        name = user['name']
                        twtr_handles[i] = [twtr_handles[i][0],twtr_handles[i][1],twtr_handles[i][2],user_id,screen_name,name]
                        #print(f'Ticker Match: {twtr_handles[i]}')
                        break
                        
                    elif (twtr_handles[i][0] == user['name']) and (user['followers_count'] > 10000):
                        user_id = user['id']
                        screen_name = user['screen_name']
                        name = user['name']
                        twtr_handles[i] = [twtr_handles[i][0],twtr_handles[i][1],twtr_handles[i][2],user_id,screen_name,name]
                        #print(f'Ticker Match: {twtr_handles[i]}')
                      
    except Exception as e:
        print(e)

df_twtr_handles = pd.DataFrame(twtr_handles, columns=['ticker','security','location','user_id','screen_name','twtr_name'])
df_twtr_handles = df_twtr_handles.drop(columns=['location'])
df_twtr_handles.to_csv('Resources/SP500_twtr_handles.csv', index=False)

'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'
'NoneType' object has no attribute 'group'


In [34]:
public_users = api.search_users('$AIG')
public_users[0]


{'contributors_enabled': False,
 'created_at': 'Wed Mar 24 22:10:39 +0000 2010',
 'default_profile': False,
 'default_profile_image': False,
 'description': "Global insurance company serving clients in 80+ countries. We’re committed to preparing our clients for what's next. For service, tweet us! 👋",
 'entities': {'description': {'urls': []},
  'url': {'urls': [{'display_url': 'aig.com',
     'expanded_url': 'http://www.aig.com',
     'indices': [0, 22],
     'url': 'http://t.co/1Q1cbaVvyn'}]}},
 'favourites_count': 456,
 'follow_request_sent': False,
 'followers_count': 37357,
 'following': False,
 'friends_count': 251,
 'geo_enabled': False,
 'has_extended_profile': False,
 'id': 126118683,
 'id_str': '126118683',
 'is_translation_enabled': False,
 'is_translator': False,
 'lang': 'en',
 'listed_count': 567,
 'location': 'Worldwide',
 'name': 'AIG',
 'notifications': False,
 'profile_background_color': 'EDF3F5',
 'profile_background_image_url': 'http://abs.twimg.com/images/themes/the

In [None]:
re_name = re.search('[\w]+ [\w]+', 'American')
print(re_name)

In [None]:
nl = int(np.around(len('test')*.66))
regex = '[\D\d]{%s}' %(nl)
regex