# Pulling data from Twitter with twint

In [None]:
# Run this block if you need to install the relevant modules

!pip install twint==2.1.20

!pip install nest_asyncio==1.4.0 
# ^^ Need to install this for twint event loop to work in a jupyter notebook setting

Import nest_asyncio if needed for current environment

In [1]:
'''
This block of code is needed for using twint within a jupyter notebook.  Without it, twint searches generate runtime errors because an event loop is already running.
'''
try:
    ip = get_ipython()
    
    if ip.has_trait('kernel'): 
        #TODO: Create outcome
        import nest_asyncio
        nest_asyncio.apply()
        
except:
    Exception('Not working in an iPython environment, skipping this step.')

Import other modules and define some key variables:

In [2]:
import os
import pandas as pd

import twint
print('twint: '+twint.__version__)

data_path = '../data/raw/' # define location for data to be saved relative to notebook location

twint: 2.1.20


## Extracting lists of followers

In [3]:
def get_followers(username, fp, full=False, suppress=True):
    '''
    Function to scrape a list of followers of a specific user.
    
    Parameters
    ----------
    username : str
        the twitter handle of the user whose followers to pull.
    fp : str
        Filepath to directory where data should be stored.  
    full : bool
        if true, scrapes all user info; only name if false.
    suppress : bool
        If true, suppress the printed output of the scraping. 
        
    Returns
    -------
    followers : list of str
    '''
    full_path = fp+'followers_'
    if not full:
        full_path += 'names_'
    full_path += username+'.db'
    
    c = twint.Config()
    c.Username = username
    c.Hide_output = suppress
    c.User_full = full
    c.Database = full_path
    
    twint.run.Followers(c)
    
    print('Follower data saved to "'+full_path+'"')

In [6]:
get_followers('RoyalReporter', '../data/raw/', suppress=True)

[+] Inserting into Database: ../data/raw/followers_names_RoyalReporter.db


CRITICAL:root:twint.get:User:'NoneType' object is not subscriptable


KeyboardInterrupt: 

In [20]:
print(len(followers))
print(followers[:5])

TypeError: object of type 'NoneType' has no len()

In [6]:
def get_following(username, fp, full=False, suppress=True):
    '''
    Function to scrape a list of followed users of a specific user.
    
    Parameters
    ----------
    username : str
        the twitter handle of the user whose follows to pull.
    fp : str
        Filepath to directory where data should be stored.  
    full : bool
        if true, scrapes all user info; only name if false.
    suppress : bool
        If true, suppress the printed output of the scraping. 
        
    Returns
    -------
    followers : list of str
        
    TODO: Abstract RAM storage of output
    '''
    full_path = fp+'following_'
    if not full:
        full_path += 'names_'
    full_path += username+'.db'
    
    c = twint.Config()
    c.Username = username
    c.Hide_output = suppress
    c.User_full = full
    c.Database = full_path
    
    twint.run.Following(c)
    
    print('Follow data saved to "'+full_path+'"')

In [7]:
get_following('bobthephysicist', '../data/raw/', suppress=True)

[+] Inserting into Database: ../data/raw/following_names_bobthephysicist.db


CRITICAL:root:twint.get:User:'NoneType' object is not subscriptable
CRITICAL:root:twint.feed:Follow:IndexError
CRITICAL:root:twint.feed:Follow:IndexError


Follow data saved to "../data/raw/following_names_bobthephysicist.db"
