## Pulling Twitter Data
The following code pulls all the followers of the accounts.

- screen_name
- name
- id
- location
- followers_count
- friends_count
- description

In [16]:
import datetime
import tweepy
import csv

from My_API_Keys import api_key, api_key_secret, access_token, access_token_secret

In [17]:
# Authenticate the Tweepy API
auth = tweepy.OAuthHandler(api_key,api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

In [18]:
# I'm putting the handles in a list to iterate through below
team_handles = ['ThornsFC', 'ORLPride']


# This will iterate through each Twitter handle that we're collecting from
for screen_name in team_handles:
    
    # Tells Tweepy we want information on the handle we're collecting from
    # The next line specifies which information we want, which in this case is the number of followers 
    user = api.get_user(screen_name) 
    followers_count = user.followers_count

    # Let's see roughly how long it will take to grab all the follower IDs. 
    print(f'''
    @{screen_name} has {followers_count} followers. 
    That will take roughly {followers_count/(5000*60):.0f} hours and {followers_count/(5000):.2f} minutes
    ''')


    @ThornsFC has 110206 followers. 
    That will take roughly 0 hours and 22.04 minutes
    

    @ORLPride has 122068 followers. 
    That will take roughly 0 hours and 24.41 minutes
    


In [19]:
# This creates a dictionary containing a list for each Twitter handle we'll be grabbing follower IDs from
id_dict = {'ThornsFC' : [],
           'ORLPride' : []}

# Grabs the time when we start making requests to the API
start_time = datetime.datetime.now()

# .keys() allows us to iterate through each key in the dictionary
for handle in id_dict.keys():
    
    # Each page contains 5,000 records, so since we know there are much more than 5,000 followers for both
    # the Thorns and Pride, we must iterate through each of the pages in order to get all follower IDs
    # To grab the follower IDs, we will be using followers_ids
    for page in tweepy.Cursor(api.followers_ids,
                              # This is how we will get around the issue of not being able to grab all ids at once
                              # Once the rate limit is hit, we will be notified that we must wait 15 mins (900 secs)
                              wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True,
                              screen_name=handle).pages():

        # The page variable comes back as a list, so we have to use .extend rather than .append
        id_dict[handle].extend(page)
        

# Let's see how long it took to grab all follower IDs
end_time = datetime.datetime.now()
elapsed_time = end_time - start_time
print(elapsed_time)

Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 894
Rate limit reached. Sleeping for: 894


0:45:18.037227


In [20]:
users_dict = {'ORLPride' : [], 'ThornsFC' : []}

for handle in id_dict:
    start=0 #feed the API 100 ID's at a time, this will iterate through them
    end=0
    followers=[]
    while end-1 <= len(id_dict[handle]): #quit when we get past the end of our list
        end += 100 #update the end of our slice
        if end <= len(id_dict[handle]): #split into if else statement to slice correctly
            try:
                followers_temp = api.lookup_users(id_dict[handle][start:end])
            
            except tweepy.TweepError as err: 
                if err.code == 103: #if we get a rate limit error, go to sleep
                    print('sleeping, 900 seconds')
                    time.sleep(900)
        else:
            try:
                followers_temp = api.lookup_users(id_dict[handle][start:])
            except tweepy.TweepError as err:
                if err.code == 103:
                    print('sleeping, 900 seconds')
                    time.sleep(900)
        followers.extend(followers_temp)
        start = end #update our starting slice index for next loop
    users_dict[handle].extend(followers)

In [None]:
headers = ['screen_name', 'name', 'location', 'followers_count', 'friends_count', 'description']

for team in id_dict.keys():
    
    # Descriptions with emoji or non-Roman letters can cause trouble. Encoding your .txt file in utf-8 will help
    with open(f'{team}_followers.txt','w', encoding='utf-8') as out_file:
        out_file.write('\t'.join(headers) + '\n')

        for idx, ids in enumerate(id_dict[team]):
            
            # For accounts set to private, we won't be able to get the description unless we follow them
            # Putting in a try/except statement, we can get around this issue.
            try:
                user = api.get_user(ids)
                name = user.name
                location = user.location
                followers_count = user.followers_count
                friends_count = user.friends_count
                description = str(user.description).replace('\t',' ').replace('\n',' ')
                outline = [user.screen_name, user.name, user.location, user.followers_count, 
                           user.friends_count, user.description]
                
                out_file.write('\t'.join([str(item) for item in outline]) + '\n')
                
            except:
                continue

In [None]:
print(user.location)