# Pulling Twitter User Info using the API

In [1]:
import os
import time
import json
import pandas as pd

import tweepy

In [2]:
api_keys_fp = '../twitter_credentials.json'

In [3]:
def load_api(keys_file):
    '''
    
    '''
    # Load twitter credentials
    with open(keys_file, 'r') as file:
        creds = json.load(file)

    # Use credentials to set up API access authorisation
    auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
    auth.set_access_token(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
    api = tweepy.API(auth, wait_on_rate_limit=True)
    
    return api

In [4]:
tw_api = load_api(api_keys_fp)

In [5]:
type(tw_api)

tweepy.api.API

## Extracting information from get_user API request

In [8]:
user = tw_api.get_user(1553045132)

In [7]:
type(user)

tweepy.models.User

After requesting a user profile from the API as with the example line of code above, we want to pull out all the attributes and store them in a suitable format.

Can view the attributes using one of the two following functions, which are equivalent (`vars()` return the `__dict__` of the object).  The `list()` operator allows us to more easily interact with the object attributes.

In [74]:
user.__dict__.keys()

dict_keys(['_api', '_json', 'id', 'id_str', 'name', 'screen_name', 'location', 'profile_location', 'description', 'url', 'entities', 'protected', 'followers_count', 'friends_count', 'listed_count', 'created_at', 'favourites_count', 'utc_offset', 'time_zone', 'geo_enabled', 'verified', 'statuses_count', 'lang', 'status', 'contributors_enabled', 'is_translator', 'is_translation_enabled', 'profile_background_color', 'profile_background_image_url', 'profile_background_image_url_https', 'profile_background_tile', 'profile_image_url', 'profile_image_url_https', 'profile_banner_url', 'profile_link_color', 'profile_sidebar_border_color', 'profile_sidebar_fill_color', 'profile_text_color', 'profile_use_background_image', 'has_extended_profile', 'default_profile', 'default_profile_image', 'following', 'follow_request_sent', 'notifications', 'translator_type', 'suspended', 'needs_phone_verification'])

In [9]:
vars(user)

{'_api': <tweepy.api.API at 0x1e9488907b8>,
 '_json': {'id': 1553045132,
  'id_str': '1553045132',
  'name': 'Kim Nilsson',
  'screen_name': 'kimknilsson',
  'location': 'London, UK',
  'profile_location': None,
  'description': 'Ex-astronomer turned start-up CEO of @Pivigo. Lead organiser of @S2DS. Passionate about all things data.',
  'url': 'http://t.co/m8Q3QgBqn7',
  'entities': {'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7',
      'expanded_url': 'http://www.pivigo.com',
      'display_url': 'pivigo.com',
      'indices': [0, 22]}]},
   'description': {'urls': []}},
  'protected': False,
  'followers_count': 1170,
  'friends_count': 606,
  'listed_count': 131,
  'created_at': 'Fri Jun 28 12:59:32 +0000 2013',
  'favourites_count': 936,
  'utc_offset': None,
  'time_zone': None,
  'geo_enabled': False,
  'verified': False,
  'statuses_count': 3021,
  'lang': None,
  'status': {'created_at': 'Tue Jul 28 14:13:11 +0000 2020',
   'id': 1288115288035098627,
   'id_str': '1288115288

A deeper inspection of the attributes (not shown, but can be accessed by running `vars(user)`) indicates that the first two keys `_api` and `_json` are not fields we want/need to retain.  
An example of how we can call a specific attribute is shown below:

In [18]:
vars(user)['entities']

{'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7',
    'expanded_url': 'http://www.pivigo.com',
    'display_url': 'pivigo.com',
    'indices': [0, 22]}]},
 'description': {'urls': []}}

Using this method of calling attributes, we should be able to quite easily use the keys to automatically build a Pandas Dataframe from the results.  

In [15]:
user_dict = vars(user).copy()
for key in ['_api', '_json', 'entities']:
    user_dict.pop(key)
    
#df = pd.DataFrame(user_dict, index=[0])
df = pd.DataFrame.from_dict({key: vars(user)[key] for key in list(vars(user).keys())[2:]}) # By using list comprehension, exclude the api and json keys from the dataframe

In [16]:
df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
description,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,{'urls': []},False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
url,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,"{'urls': [{'url': 'http://t.co/m8Q3QgBqn7', 'e...",False,...,DAECF4,663B12,False,False,False,False,True,False,False,none


In [18]:
vars(user_dict['status'])

{'_api': <tweepy.api.API at 0x1556ea6ff60>,
 '_json': {'created_at': 'Tue Jul 28 14:13:11 +0000 2020',
  'id': 1288115288035098627,
  'id_str': '1288115288035098627',
  'text': 'Great initiative for female tech founders! https://t.co/kjj1usXrUt',
  'truncated': False,
  'entities': {'hashtags': [],
   'symbols': [],
   'user_mentions': [],
   'urls': [{'url': 'https://t.co/kjj1usXrUt',
     'expanded_url': 'https://twitter.com/SVC2UK/status/1287727618394128385',
     'display_url': 'twitter.com/SVC2UK/status/…',
     'indices': [43, 66]}]},
  'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>',
  'in_reply_to_status_id': None,
  'in_reply_to_status_id_str': None,
  'in_reply_to_user_id': None,
  'in_reply_to_user_id_str': None,
  'in_reply_to_screen_name': None,
  'geo': None,
  'coordinates': None,
  'place': None,
  'contributors': None,
  'is_quote_status': True,
  'quoted_status_id': 1287727618394128385,
  'quoted_status_id_str': '128772

Having figured out how to create the dataframe, we now want to cleanly add to it.  Let's look at a different account.

In [15]:
other = tw_api.get_user('jlesliedata')

In [16]:
other_dict = vars(other)
for key in ['_api', '_json', 'entities']:
    other_dict.pop(key)
    
other_df = pd.DataFrame(other_dict, index=[0])

#other_df = pd.DataFrame.from_dict({key: vars(other)[key] for key in list(vars(other).keys())[2:]})
other_df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,protected,followers_count,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
0,735580694508933121,735580694508933121,Jonathan Leslie,jlesliedata,"London, England",,"Data scientist, #rstats enthusiast, mentor, bi...",https://t.co/pSWzkEKMS0,False,212,...,DDEEF6,333333,True,False,True,False,True,False,False,none


Can see from the dataframe that there is a different set of fields compared to those retrieved before.  We are going to need a way to dynamically update the table.  

In [17]:
joined = df.append(other_df)

In [18]:
joined

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,protected,followers_count,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
0,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,False,1170,...,DAECF4,663B12,False,False,False,False,True,False,False,none
0,735580694508933121,735580694508933121,Jonathan Leslie,jlesliedata,"London, England",,"Data scientist, #rstats enthusiast, mentor, bi...",https://t.co/pSWzkEKMS0,False,212,...,DDEEF6,333333,True,False,True,False,True,False,False,none


In [36]:
list(vars(other).keys())

['_api',
 '_json',
 'id',
 'id_str',
 'name',
 'screen_name',
 'location',
 'profile_location',
 'description',
 'url',
 'entities',
 'protected',
 'followers_count',
 'friends_count',
 'listed_count',
 'created_at',
 'favourites_count',
 'utc_offset',
 'time_zone',
 'geo_enabled',
 'verified',
 'statuses_count',
 'lang',
 'status',
 'contributors_enabled',
 'is_translator',
 'is_translation_enabled',
 'profile_background_color',
 'profile_background_image_url',
 'profile_background_image_url_https',
 'profile_background_tile',
 'profile_image_url',
 'profile_image_url_https',
 'profile_banner_url',
 'profile_link_color',
 'profile_sidebar_border_color',
 'profile_sidebar_fill_color',
 'profile_text_color',
 'profile_use_background_image',
 'has_extended_profile',
 'default_profile',
 'default_profile_image',
 'following',
 'follow_request_sent',
 'notifications',
 'translator_type']

In [65]:
other_df.iloc[[0]]

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
description,16736682,16736682,FindingAda,FindingAda,"London, England",,"Join us on 13 Oct 2020 for Ada Lovelace Day, c...",https://t.co/gQJ4523M9w,{'urls': []},False,...,DDEEF6,333333,True,True,True,False,True,False,False,none


In [21]:
test_dict = {'user_id': 1553045132}
test_user = tw_api.get_user(**test_dict)

In [15]:
test_user

User(_api=<tweepy.api.API object at 0x000001AF214D7208>, _json={'id': 1553045132, 'id_str': '1553045132', 'name': 'Kim Nilsson', 'screen_name': 'kimknilsson', 'location': 'London, UK', 'profile_location': None, 'description': 'Ex-astronomer turned start-up CEO of @Pivigo. Lead organiser of @S2DS. Passionate about all things data.', 'url': 'http://t.co/m8Q3QgBqn7', 'entities': {'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7', 'expanded_url': 'http://www.pivigo.com', 'display_url': 'pivigo.com', 'indices': [0, 22]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 1170, 'friends_count': 606, 'listed_count': 131, 'created_at': 'Fri Jun 28 12:59:32 +0000 2013', 'favourites_count': 936, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 3021, 'lang': None, 'status': {'created_at': 'Tue Jul 28 14:13:11 +0000 2020', 'id': 1288115288035098627, 'id_str': '1288115288035098627', 'text': 'Great initiative for female tech founde

In [39]:
test_dict = {key: vars(user)[key] for key in list(vars(user).keys())}

In [40]:
for key in ['_api', '_json']:
    test_dict.pop(key)

KeyError: '_api'

In [38]:
test_dict

{'user_id': 1553045132}

In [36]:
test_df = pd.DataFrame(test_dict)

ValueError: If using all scalar values, you must pass an index

In [58]:
test_df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
1,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,,False,...,DAECF4,663B12,False,False,False,False,True,False,False,none


In [22]:
user_json = vars(test_user)['_json']

In [35]:
user_json

{'id': 1553045132,
 'id_str': '1553045132',
 'name': 'Kim Nilsson',
 'screen_name': 'kimknilsson',
 'location': 'London, UK',
 'profile_location': None,
 'description': 'Ex-astronomer turned start-up CEO of @Pivigo. Lead organiser of @S2DS. Passionate about all things data.',
 'url': 'http://t.co/m8Q3QgBqn7',
 'entities': {'url': {'urls': [{'url': 'http://t.co/m8Q3QgBqn7',
     'expanded_url': 'http://www.pivigo.com',
     'display_url': 'pivigo.com',
     'indices': [0, 22]}]},
  'description': {'urls': []}},
 'protected': False,
 'followers_count': 1170,
 'friends_count': 606,
 'listed_count': 131,
 'created_at': 'Fri Jun 28 12:59:32 +0000 2013',
 'favourites_count': 936,
 'utc_offset': None,
 'time_zone': None,
 'geo_enabled': False,
 'verified': False,
 'statuses_count': 3021,
 'lang': None,
 'status': {'created_at': 'Tue Jul 28 14:13:11 +0000 2020',
  'id': 1288115288035098627,
  'id_str': '1288115288035098627',
  'text': 'Great initiative for female tech founders! https://t.co/kj

In [25]:
json_test = json.dumps(user_json)

In [26]:
json_test

'{"id": 1553045132, "id_str": "1553045132", "name": "Kim Nilsson", "screen_name": "kimknilsson", "location": "London, UK", "profile_location": null, "description": "Ex-astronomer turned start-up CEO of @Pivigo. Lead organiser of @S2DS. Passionate about all things data.", "url": "http://t.co/m8Q3QgBqn7", "entities": {"url": {"urls": [{"url": "http://t.co/m8Q3QgBqn7", "expanded_url": "http://www.pivigo.com", "display_url": "pivigo.com", "indices": [0, 22]}]}, "description": {"urls": []}}, "protected": false, "followers_count": 1170, "friends_count": 606, "listed_count": 131, "created_at": "Fri Jun 28 12:59:32 +0000 2013", "favourites_count": 936, "utc_offset": null, "time_zone": null, "geo_enabled": false, "verified": false, "statuses_count": 3021, "lang": null, "status": {"created_at": "Tue Jul 28 14:13:11 +0000 2020", "id": 1288115288035098627, "id_str": "1288115288035098627", "text": "Great initiative for female tech founders! https://t.co/kjj1usXrUt", "truncated": false, "entities": 

In [27]:
type(json_test)

str

In [32]:
json_df = pd.DataFrame(user_json)

In [33]:
json_df.head()

Unnamed: 0,id,id_str,name,screen_name,location,profile_location,description,url,entities,protected,...,profile_sidebar_fill_color,profile_text_color,profile_use_background_image,has_extended_profile,default_profile,default_profile_image,following,follow_request_sent,notifications,translator_type
url,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,"{'urls': [{'url': 'http://t.co/m8Q3QgBqn7', 'e...",False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
description,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,{'urls': []},False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
created_at,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,,False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
id,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,,False,...,DAECF4,663B12,False,False,False,False,True,False,False,none
id_str,1553045132,1553045132,Kim Nilsson,kimknilsson,"London, UK",,Ex-astronomer turned start-up CEO of @Pivigo. ...,http://t.co/m8Q3QgBqn7,,False,...,DAECF4,663B12,False,False,False,False,True,False,False,none


In [34]:
json_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, url to lang
Data columns (total 44 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   id                                  27 non-null     int64 
 1   id_str                              27 non-null     object
 2   name                                27 non-null     object
 3   screen_name                         27 non-null     object
 4   location                            27 non-null     object
 5   profile_location                    0 non-null      object
 6   description                         27 non-null     object
 7   url                                 27 non-null     object
 8   entities                            2 non-null      object
 9   protected                           27 non-null     bool  
 10  followers_count                     27 non-null     int64 
 11  friends_count                       27 non-null     int64 
 1

In [19]:
dict([1,2,3,4,5,10])

TypeError: cannot convert dictionary update sequence element #0 to a sequence

In [20]:
test = [1,2,3,4,5]
test_keys = [5,6,7,8,9]
test_dict = dict.fromkeys(test, test_keys)
test_dict

{1: [5, 6, 7, 8, 9],
 2: [5, 6, 7, 8, 9],
 3: [5, 6, 7, 8, 9],
 4: [5, 6, 7, 8, 9],
 5: [5, 6, 7, 8, 9]}

In [21]:
me = tw_api.get_user('bobthephysicist')

In [22]:
me.statuses_count

291