# Pulling Twitter User Info using the API

In [2]:
import os
import time
import json
import pandas as pd

import tweepy

In [3]:
api_keys_fp = '../twitter_credentials.json'

In [4]:
def load_api(keys_file):
    '''
    
    '''
    # Load twitter credentials
    with open(keys_file, 'r') as file:
        creds = json.load(file)

    # Use credentials to set up API access authorisation
    auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
    auth.set_access_token(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
    api = tweepy.API(auth, wait_on_rate_limit=True)
    
    return api

In [5]:
tw_api = load_api(api_keys_fp)

In [8]:
type(tw_api)

tweepy.api.API

## Extracting information from get_user API request

In [6]:
user = tw_api.get_user('kimknilsson')

In [7]:
type(user)

tweepy.models.User

After requesting a user profile from the API as with the example line of code above, we want to pull out all the attributes and store them in a suitable format.

Can view the attributes using one of the two following functions, which are equivalent (`vars()` return the `__dict__` of the object).  The `list()` operator allows us to more easily interact with the object attributes.

In [74]:
user.__dict__.keys()

dict_keys(['_api', '_json', 'id', 'id_str', 'name', 'screen_name', 'location', 'profile_location', 'description', 'url', 'entities', 'protected', 'followers_count', 'friends_count', 'listed_count', 'created_at', 'favourites_count', 'utc_offset', 'time_zone', 'geo_enabled', 'verified', 'statuses_count', 'lang', 'status', 'contributors_enabled', 'is_translator', 'is_translation_enabled', 'profile_background_color', 'profile_background_image_url', 'profile_background_image_url_https', 'profile_background_tile', 'profile_image_url', 'profile_image_url_https', 'profile_banner_url', 'profile_link_color', 'profile_sidebar_border_color', 'profile_sidebar_fill_color', 'profile_text_color', 'profile_use_background_image', 'has_extended_profile', 'default_profile', 'default_profile_image', 'following', 'follow_request_sent', 'notifications', 'translator_type', 'suspended', 'needs_phone_verification'])

In [16]:
list(vars(user).keys())

['_api',
 '_json',
 'id',
 'id_str',
 'name',
 'screen_name',
 'location',
 'profile_location',
 'description',
 'url',
 'entities',
 'protected',
 'followers_count',
 'friends_count',
 'listed_count',
 'created_at',
 'favourites_count',
 'utc_offset',
 'time_zone',
 'geo_enabled',
 'verified',
 'statuses_count',
 'lang',
 'status',
 'contributors_enabled',
 'is_translator',
 'is_translation_enabled',
 'profile_background_color',
 'profile_background_image_url',
 'profile_background_image_url_https',
 'profile_background_tile',
 'profile_image_url',
 'profile_image_url_https',
 'profile_banner_url',
 'profile_link_color',
 'profile_sidebar_border_color',
 'profile_sidebar_fill_color',
 'profile_text_color',
 'profile_use_background_image',
 'has_extended_profile',
 'default_profile',
 'default_profile_image',
 'following',
 'follow_request_sent',
 'notifications',
 'translator_type']

A deeper inspection of the attributes (not shown, but can be accessed by running `vars(user)`) indicates that the first two keys `_api` and `_json` are not fields we want/need to retain.  
An example of how we can call a specific attribute is shown below:

In [18]:
vars(user)['entities']

{'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7',
    'expanded_url': 'http://www.pivigo.com',
    'display_url': 'pivigo.com',
    'indices': [0, 22]}]},
 'description': {'urls': []}}

Using this method of calling attributes, we should be able to quite easily use the keys to automatically build a Pandas Dataframe from the results.  

In [85]:
df = pd.DataFrame.from_dict({key: vars(user)[key] for key in list(vars(user).keys())[2:]}) # By using list comprehension, exclude the api and json keys from the dataframe

In [86]:
df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
description,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,{'urls': []},False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
url,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,"{'urls': [{'url': 'http://t.co/m8Q3QgBqn7', 'e...",False,...,DAECF4,663B12,False,False,False,False,True,False,False,none


Having figured out how to create the dataframe, we now want to cleanly add to it.  Let's look at a different account.

In [84]:
other = tw_api.get_user('jlesliedata')

In [87]:
other_df = pd.DataFrame.from_dict({key: vars(other)[key] for key in list(vars(other).keys())[2:]})
other_df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
description,735580694508933121,735580694508933121,Jonathan Leslie,jlesliedata,"London, England",,"Data scientist, #rstats enthusiast, mentor, bi...",https://t.co/pSWzkEKMS0,{'urls': []},False,...,DDEEF6,333333,True,False,True,False,True,False,False,none
url,735580694508933121,735580694508933121,Jonathan Leslie,jlesliedata,"London, England",,"Data scientist, #rstats enthusiast, mentor, bi...",https://t.co/pSWzkEKMS0,"{'urls': [{'url': 'https://t.co/pSWzkEKMS0', '...",False,...,DDEEF6,333333,True,False,True,False,True,False,False,none


Can see from the dataframe that there is a different set of fields compared to those retrieved before.  We are going to need a way to dynamically update the table.  

In [88]:
joined = df.iloc[[0]].append(other_df.iloc[[0]])

In [89]:
joined

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
description,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,{'urls': []},False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
description,735580694508933121,735580694508933121,Jonathan Leslie,jlesliedata,"London, England",,"Data scientist, #rstats enthusiast, mentor, bi...",https://t.co/pSWzkEKMS0,{'urls': []},False,...,DDEEF6,333333,True,False,True,False,True,False,False,none


In [36]:
list(vars(other).keys())

['_api',
 '_json',
 'id',
 'id_str',
 'name',
 'screen_name',
 'location',
 'profile_location',
 'description',
 'url',
 'entities',
 'protected',
 'followers_count',
 'friends_count',
 'listed_count',
 'created_at',
 'favourites_count',
 'utc_offset',
 'time_zone',
 'geo_enabled',
 'verified',
 'statuses_count',
 'lang',
 'status',
 'contributors_enabled',
 'is_translator',
 'is_translation_enabled',
 'profile_background_color',
 'profile_background_image_url',
 'profile_background_image_url_https',
 'profile_background_tile',
 'profile_image_url',
 'profile_image_url_https',
 'profile_banner_url',
 'profile_link_color',
 'profile_sidebar_border_color',
 'profile_sidebar_fill_color',
 'profile_text_color',
 'profile_use_background_image',
 'has_extended_profile',
 'default_profile',
 'default_profile_image',
 'following',
 'follow_request_sent',
 'notifications',
 'translator_type']

In [65]:
other_df.iloc[[0]]

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
description,16736682,16736682,FindingAda,FindingAda,"London, England",,"Join us on 13 Oct 2020 for Ada Lovelace Day, c...",https://t.co/gQJ4523M9w,{'urls': []},False,...,DDEEF6,333333,True,True,True,False,True,False,False,none


In [14]:
test_dict = {'user_id': 1553045132}
test_user = tw_api.get_user(**test_dict)

In [15]:
test_user

User(_api=<tweepy.api.API object at 0x000001AF214D7208>, _json={'id': 1553045132, 'id_str': '1553045132', 'name': 'Kim Nilsson', 'screen_name': 'kimknilsson', 'location': 'London, UK', 'profile_location': None, 'description': 'Ex-astronomer turned start-up CEO of @Pivigo. Lead organiser of @S2DS. Passionate about all things data.', 'url': 'http://t.co/m8Q3QgBqn7', 'entities': {'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7', 'expanded_url': 'http://www.pivigo.com', 'display_url': 'pivigo.com', 'indices': [0, 22]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 1170, 'friends_count': 606, 'listed_count': 131, 'created_at': 'Fri Jun 28 12:59:32 +0000 2013', 'favourites_count': 936, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 3021, 'lang': None, 'status': {'created_at': 'Tue Jul 28 14:13:11 +0000 2020', 'id': 1288115288035098627, 'id_str': '1288115288035098627', 'text': 'Great initiative for female tech founde

In [50]:
test_dict = {key: vars(user)[key] for key in list(vars(user).keys())}

In [51]:
for key in ['_api', '_json']:
    test_dict.pop(key)

In [52]:
test_dict

{'id': 1553045132,
 'id_str': '1553045132',
 'name': 'Kim Nilsson',
 'screen_name': 'kimknilsson',
 'location': 'London, UK',
 'profile_location': None,
 'description': 'Ex-astronomer turned start-up CEO of @Pivigo. Lead organiser of @S2DS. Passionate about all things data.',
 'url': 'http://t.co/m8Q3QgBqn7',
 'entities': {'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7',
     'expanded_url': 'http://www.pivigo.com',
     'display_url': 'pivigo.com',
     'indices': [0, 22]}]},
  'description': {'urls': []}},
 'protected': False,
 'followers_count': 1170,
 'friends_count': 606,
 'listed_count': 131,
 'created_at': datetime.datetime(2013, 6, 28, 12, 59, 32),
 'favourites_count': 936,
 'utc_offset': None,
 'time_zone': None,
 'geo_enabled': False,
 'verified': False,
 'statuses_count': 3021,
 'lang': None,
 'status': Status(_api=<tweepy.api.API object at 0x000001AF214D7208>, _json={'created_at': 'Tue Jul 28 14:13:11 +0000 2020', 'id': 1288115288035098627, 'id_str': '1288115288035098627'

In [57]:
test_df = pd.DataFrame(test_dict, index=[0])

In [58]:
test_df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
1,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,,False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
