# Twitter scraping using Tweepy

To install: `pip install tweepy`

In [8]:
import tweepy
from configparser import ConfigParser
import pandas as pd
import os.path as path

## Load credentials

In [3]:
cp = ConfigParser()
cp.read('../.secret/credentials.ini')

['../.secret/credentials.ini']

## Get companies' Twitter ids

Load the Twitter handles for companies.

In [4]:
DATA_DIR = "../data/"

In [12]:
companies_twitter_handles_uk = pd.read_csv(path.join(DATA_DIR, 'companies_twitter_handles_uk.csv'), header=None)
companies_twitter_handles_uk.columns = ['twitter_handle', 'ticker', 'sector']
companies_twitter_handles_uk = companies_twitter_handles_uk.append(
    {'twitter_handle': 'UKGamesWorkshop'}, ignore_index=True
)
companies_twitter_handles_uk_list = list(companies_twitter_handles_uk['twitter_handle'].values)

In [10]:
companies_twitter_handles_uk.head()

Unnamed: 0,twitter_handle,ticker,sector
0,3i,III,FinancialServices
1,3iInfrastructure,3IN,
2,888Holdings,888,
3,AA,AA.,
4,AberforthSmallerCompaniesTrust,ASL,


Create a Tweepy authentication handler.

In [17]:
auth = tweepy.OAuthHandler(
    cp['emas_twitter_credentials']['consumer_key'],
    cp['emas_twitter_credentials']['consumer_secret'])
auth.set_access_token(
    cp['emas_twitter_credentials']['access_token'],
    cp['emas_twitter_credentials']['access_token_secret']
)

Insantiate a Tweepy API object (a wrapper for the Twitter API).

In [19]:
api = tweepy.API(auth_handler=auth)

Get users' (companies') ids given the handles.

In [23]:
api.get_user(companies_twitter_handles_uk_list[0]).id

18563305

In [25]:
twitter_ids_dict = {}

In [26]:
def get_users_ids(api, twitter_handles_list, twitter_ids_dict, n_users=None):
    """
    PARAMS
    ------
        api: Tweepy API object.
        twitter_handles_list: list of Twitter handles (screen names)
        twitter_ids_dict: dictionary to which to append the new {'handle': 'id'}
            key-value pairs
        n_users: maximum number of users of which to fetch the ids. Default: None
         (i.e. range(twitter_handles_list)).
    """
    if not n_users:
        n_users = len(twitter_handles_list)
        
    print(f"Fetching {n_users} ids")
    
    for i in range(n_users):
        twitter_handle = twitter_handles_list[i]
        
        print(f"Fetching id: @{twitter_handle}")
        
        try:
            twitter_id = api.get_user(twitter_handle).id
            
            twitter_ids_dict.update(
                {twitter_handle: twitter_id}
            )
        except Exception as e:
            print(e)

In [27]:
get_users_ids(api, companies_twitter_handles_uk_list, twitter_ids_dict, n_users=10)

Fetching 10 ids
Fetching id: @3i
Fetching id: @3iInfrastructure
[{'code': 50, 'message': 'User not found.'}]
Fetching id: @888Holdings
Fetching id: @AA
Fetching id: @AberforthSmallerCompaniesTrust
[{'code': 50, 'message': 'User not found.'}]
Fetching id: @AdmiralGroup
Fetching id: @Aggreko
Fetching id: @AllianceTrust
Fetching id: @Amigo
Fetching id: @AngloAmerican


In [29]:
twitter_ids_dict['3i']

18563305