# REST URL ONLY: Scrape Twitter for US Governors in 2022 Mid-Terms

# Setup

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import requests
import json
from pprint import pprint

In [3]:
import tweepy
import gc #collecting garbage
from wordcloud import WordCloud,STOPWORDS, ImageColorGenerator #visualisation

sns.set_theme(context='notebook', style='whitegrid', palette='vlag', font='serif')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Get Data

In [4]:
govs_df = pd.read_csv('twitter_2022midterm_us_governors.csv', index_col=[0])
govs_df.head()
govs_df.info()

Unnamed: 0_level_0,Republican,Democrat
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Wisconsin:,https://twitter.com/michelsforgov,https://twitter.com/GovEvers
Orgeon:,https://twitter.com/ChristineDrazan,https://twitter.com/TinaKotek
Nevada:,https://twitter.com/JoeLombardoNV,https://twitter.com/GovSisolak
Arizona:,https://twitter.com/KariLake,https://twitter.com/katiehobbs
Kansas:,https://twitter.com/DerekSchmidtKS,https://twitter.com/LauraKellyKS


<class 'pandas.core.frame.DataFrame'>
Index: 35 entries, Wisconsin: to Idaho:
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Republican  35 non-null     object
 1   Democrat    35 non-null     object
dtypes: object(2)
memory usage: 840.0+ bytes


In [5]:
# Clean imported DataFrame

# Lowercase Columns
govs_df.rename(columns={'Republican':'republican', 'Democrat':'democrat'}, inplace=True)


# Move State Index to separate colum for joining
state_orig_ls = govs_df.index.to_list()
# state_orig_ls[:5]

state_clean_ls = [x.lower().strip(':') for x in state_orig_ls]
# state_clean_ls[:5]

# Insert state col in first position
govs_df.insert(loc=0, column='state', value=state_clean_ls)
# govs_df['state'] = state_clean_ls

# Drop the State Index
govs_df.reset_index(drop=True, inplace=True)

govs_df.head()


Unnamed: 0,state,republican,democrat
0,wisconsin,https://twitter.com/michelsforgov,https://twitter.com/GovEvers
1,orgeon,https://twitter.com/ChristineDrazan,https://twitter.com/TinaKotek
2,nevada,https://twitter.com/JoeLombardoNV,https://twitter.com/GovSisolak
3,arizona,https://twitter.com/KariLake,https://twitter.com/katiehobbs
4,kansas,https://twitter.com/DerekSchmidtKS,https://twitter.com/LauraKellyKS


In [6]:
# Split into Two (1) Dem and (2) Rep tables

govs_d_df = govs_df[['state','democrat']].copy(deep=True)
# Extract username from url
govs_d_df['screen_name'] = govs_df["democrat"].apply(lambda x: str(x).split('/')[-1])
govs_d_df.head()

govs_r_df = govs_df[['state','republican']].copy(deep=True) 
# Extract username from url
govs_r_df['screen_name'] = govs_df["republican"].apply(lambda x: str(x).split('/')[-1])
govs_r_df.head()

Unnamed: 0,state,democrat,screen_name
0,wisconsin,https://twitter.com/GovEvers,GovEvers
1,orgeon,https://twitter.com/TinaKotek,TinaKotek
2,nevada,https://twitter.com/GovSisolak,GovSisolak
3,arizona,https://twitter.com/katiehobbs,katiehobbs
4,kansas,https://twitter.com/LauraKellyKS,LauraKellyKS


Unnamed: 0,state,republican,screen_name
0,wisconsin,https://twitter.com/michelsforgov,michelsforgov
1,orgeon,https://twitter.com/ChristineDrazan,ChristineDrazan
2,nevada,https://twitter.com/JoeLombardoNV,JoeLombardoNV
3,arizona,https://twitter.com/KariLake,KariLake
4,kansas,https://twitter.com/DerekSchmidtKS,DerekSchmidtKS


# Get OAuth Tokens

In [7]:
import config_twitter

api_key = config_twitter.API_KEY #Your API/Consumer key 
api_secret = config_twitter.API_KEY_SECRET #Your API/Consumer Secret Key
bearer_token = config_twitter.BEARER_TOKEN # Your API/Bearer Token
access_token = config_twitter.ACCESS_TOKEN    #Your Access token key
access_token_secret = config_twitter.ACCESS_TOKEN_SECRET #Your Access token Secret key

# REST URI Endpoints (v2.0 API)

Regular 

* https://github.com/twitterdev/Twitter-API-v2-sample-code

Academic

* https://github.com/twitterdev/getting-started-with-the-twitter-api-v2-for-academic-research/blob/main/modules/3-deciding-which-endpoints-to-use.md

In [8]:
%whos DataFrame

Variable    Type         Data/Info
----------------------------------
govs_d_df   DataFrame                 state       <...>dtforgov      heidtforgov
govs_df     DataFrame                 state       <...>twitter.com/heidtforgov  
govs_r_df   DataFrame                 state       <...>alABundy       RealABundy


In [9]:
# ONLY RUN if you skipped API v1/v2 search and merge above

govs_d_user_df = govs_d_df.copy()
govs_r_user_df = govs_r_df.copy()

In [10]:
govs_d_user_df.head()
govs_d_user_df.info()

Unnamed: 0,state,democrat,screen_name
0,wisconsin,https://twitter.com/GovEvers,GovEvers
1,orgeon,https://twitter.com/TinaKotek,TinaKotek
2,nevada,https://twitter.com/GovSisolak,GovSisolak
3,arizona,https://twitter.com/katiehobbs,katiehobbs
4,kansas,https://twitter.com/LauraKellyKS,LauraKellyKS


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   state        35 non-null     object
 1   democrat     35 non-null     object
 2   screen_name  35 non-null     object
dtypes: object(3)
memory usage: 968.0+ bytes


## Common Functions

In [11]:
def bearer_oauth(r):
    """
    Method required by bearer token authentication.
    """

    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2UserLookupPython"
    return r

In [12]:
def connect_to_endpoint_wo_params(url):
    response = requests.request("GET", url, auth=bearer_oauth,)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    return response.json()

In [13]:
def connect_to_endpoint_with_params(url, tweet_fields):
    response = requests.request(
        "GET", url, auth=bearer_oauth, params=tweet_fields)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    return response.json()

In [14]:
def get_followers(user_id):
    url = "https://api.twitter.com/2/users/{}/following".format(user_id)
    params = {"user.fields": "created_at"}
    json_response = connect_to_endpoint_with_params(url, params)
    return json_response
    
# Test
user_id = 234896532
json_response = get_followers(user_id)
print(json.dumps(json_response, indent=4, sort_keys=True))

my_followers_df = pd.DataFrame(json_response['data'])
my_followers_df.head()
my_followers_df.info()
    

200
{
    "data": [
        {
            "created_at": "2021-05-04T19:31:07.000Z",
            "id": "1389663738945495040",
            "name": "Dr. Jill Underly, WI State Superintendent",
            "username": "DrJillUnderly"
        },
        {
            "created_at": "2021-01-06T17:02:07.000Z",
            "id": "1346864195304792065",
            "name": "Senator Brad Pfaff",
            "username": "SenBradPfaff"
        },
        {
            "created_at": "2009-05-27T19:35:11.000Z",
            "id": "42953875",
            "name": "Western Tech College",
            "username": "WesternTC"
        },
        {
            "created_at": "2008-07-21T18:48:42.000Z",
            "id": "15518969",
            "name": "Ducks Unlimited",
            "username": "DucksUnlimited"
        },
        {
            "created_at": "2018-05-01T14:15:25.000Z",
            "id": "991320189634478080",
            "name": "Forward Madison FC",
            "username": "ForwardMSNFC"
       

Unnamed: 0,name,created_at,id,username
0,"Dr. Jill Underly, WI State Superintendent",2021-05-04T19:31:07.000Z,1389663738945495040,DrJillUnderly
1,Senator Brad Pfaff,2021-01-06T17:02:07.000Z,1346864195304792065,SenBradPfaff
2,Western Tech College,2009-05-27T19:35:11.000Z,42953875,WesternTC
3,Ducks Unlimited,2008-07-21T18:48:42.000Z,15518969,DucksUnlimited
4,Forward Madison FC,2018-05-01T14:15:25.000Z,991320189634478080,ForwardMSNFC


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        100 non-null    object
 1   created_at  100 non-null    object
 2   id          100 non-null    object
 3   username    100 non-null    object
dtypes: object(4)
memory usage: 3.2+ KB


In [15]:
def get_user_by_username(username, param_str):

    url = f'https://api.twitter.com/2/users/by/username/{username}?user.fields={param_str}'
    # url = f"https://api.twitter.com/2/users/by?user.fields={params}"
    params = {"user.fields": param_str}
    json_response = connect_to_endpoint_with_params(url, params)
    return json_response
    
# Test

username = 'KariLake'
params = 'created_at,description' # ,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld'

user_id = 234896532
json_response = get_user_by_username(username, params)
print(json.dumps(json_response, indent=4, sort_keys=True))

400


Exception: Request returned an error: 400 {"errors":[{"parameters":{"user.fields":["created_at,description","created_at,description"]},"message":"Duplicate parameters are not allowed: the `user.fields` query parameter"}],"title":"Invalid Request","detail":"One or more parameters to your request was invalid.","type":"https://api.twitter.com/2/problems/invalid-request"}

In [17]:
def get_user_by_username(username, param_str):

    url = f'https://api.twitter.com/2/users/by/username/{username}?user.fields={param_str}'
    # url = f"https://api.twitter.com/2/users/by?user.fields={params}"
    params = {"user.fields": param_str}
    json_response = connect_to_endpoint_wo_params(url) # , params)
    return json_response
    
# Test

username = 'KariLake'
params = 'created_at,description,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld'

# user_id = 234896532
json_response = get_user_by_username(username, params)
print(json.dumps(json_response, indent=4, sort_keys=True))

my_followers_df = pd.DataFrame(pd.json_normalize(json_response['data']))
my_followers_df.head()
my_followers_df.info()



url = 'https://api.twitter.com/2/users/by?user.fields='
url


200
{
    "data": {
        "created_at": "2013-01-12T07:47:51.000Z",
        "description": "Official Twitter Account for Trump-Endorsed Candidate for Arizona Governor. Text KARI to 70789. Find me on TRUTH Social at karilake & FB at TheKariLake.",
        "entities": {
            "url": {
                "urls": [
                    {
                        "display_url": "KariLake.com",
                        "end": 23,
                        "expanded_url": "http://KariLake.com",
                        "start": 0,
                        "url": "https://t.co/EmzgORJKeQ"
                    }
                ]
            }
        },
        "id": "1082197856",
        "location": "Iowa & Arizona",
        "name": "Kari Lake",
        "pinned_tweet_id": "1575893306487103498",
        "profile_image_url": "https://pbs.twimg.com/profile_images/1560073130822885378/55oLUgB0_normal.jpg",
        "protected": false,
        "public_metrics": {
            "followers_count": 707747,


Unnamed: 0,username,name,created_at,description,location,url,verified,pinned_tweet_id,id,profile_image_url,protected,public_metrics.followers_count,public_metrics.following_count,public_metrics.tweet_count,public_metrics.listed_count,entities.url.urls
0,KariLake,Kari Lake,2013-01-12T07:47:51.000Z,Official Twitter Account for Trump-Endorsed Ca...,Iowa & Arizona,https://t.co/EmzgORJKeQ,True,1575893306487103498,1082197856,https://pbs.twimg.com/profile_images/156007313...,False,707747,973,24399,1342,"[{'start': 0, 'end': 23, 'url': 'https://t.co/..."


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   username                        1 non-null      object
 1   name                            1 non-null      object
 2   created_at                      1 non-null      object
 3   description                     1 non-null      object
 4   location                        1 non-null      object
 5   url                             1 non-null      object
 6   verified                        1 non-null      bool  
 7   pinned_tweet_id                 1 non-null      object
 8   id                              1 non-null      object
 9   profile_image_url               1 non-null      object
 10  protected                       1 non-null      bool  
 11  public_metrics.followers_count  1 non-null      int64 
 12  public_metrics.following_count  1 non-null      int64 

'https://api.twitter.com/2/users/by?user.fields='

### User Info

In [18]:
# Define DataFrame columns for all the User data elements we want to collect

govs_d_user_df = pd.DataFrame(columns=['screen_name',
                                    'id',
                                    'name',
                                    'location',
                                    'description',
                                    'url',
                                    'created_at',
                                    'verified',
                                    'protected',
                                    # 'followers',
                                    'followers_count',
                                    # 'followers_id',
                                    # 'following',
                                    'follow_request_sent',
                                    # 'follow',
                                    # 'friends',
                                    'friends_count',
                                    'favorites_count',
                                    'listed_count',
                                    # 'status',
                                    # 'statuses_count',
                                    ])

govs_r_user_df = pd.DataFrame(columns=['screen_name',
                                    'id',
                                    'name',
                                    'location',
                                    'description',
                                    'url',
                                    'created_at',
                                    'verified',
                                    'protected',
                                    # 'followers',
                                    'followers_count',
                                    # 'followers_id',
                                    # 'following',
                                    'follow_request_sent',
                                    # 'follow',
                                    # 'friends',
                                    'friends_count',
                                    'favorites_count',
                                    'listed_count',
                                    # 'status',
                                    # 'statuses_count',
                                    ])

In [19]:
govs_d_df.head()

Unnamed: 0,state,democrat,screen_name
0,wisconsin,https://twitter.com/GovEvers,GovEvers
1,orgeon,https://twitter.com/TinaKotek,TinaKotek
2,nevada,https://twitter.com/GovSisolak,GovSisolak
3,arizona,https://twitter.com/katiehobbs,katiehobbs
4,kansas,https://twitter.com/LauraKellyKS,LauraKellyKS


In [None]:
username = 'KariLake'

# user_id = 234896532
json_response = get_user_by_username(username, params)
print(json.dumps(json_response, indent=4, sort_keys=True))



In [32]:
# Get User data for each Dem Governor

govs_d_user_df = pd.DataFrame()

params = 'created_at,description,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld'

i = 0
for i,agov_username in enumerate(govs_d_df.screen_name.to_list()):
    print(f'Processing #{i}: {agov_username}')
    try:
        json_response = get_user_by_username(agov_username, params)
        # print(json.dumps(json_response, indent=4, sort_keys=True))

        my_userdata_df = pd.DataFrame(pd.json_normalize(json_response['data']))
        
        print(f'    Appending row: {my_userdata_df.head()}\n\n')
        
        if i == 0:
            govs_d_user_df = my_userdata_df.copy(deep=True)
        else:
            govs_d_user_df = pd.concat([govs_d_user_df, my_userdata_df], ignore_index=True)                

        
    except:
        print(f'    ERROR: Could not retrieve Tweepy User Object')
    

    i += 1



Processing #0: GovEvers
200
    Appending row:    username                 name                created_at  \
0  GovEvers  Governor Tony Evers  2011-01-06T21:05:10.000Z   

                                         description            location  \
0  Husband to Kathy, lifelong educator, Euchre an...  Madison, Wisconsin   

                       url  verified      pinned_tweet_id         id  \
0  https://t.co/JyH2JWfeyt      True  1590413394724913152  234896532   

                                   profile_image_url  protected  \
0  https://pbs.twimg.com/profile_images/136168111...      False   

   public_metrics.followers_count  public_metrics.following_count  \
0                          106281                             245   

   public_metrics.tweet_count  public_metrics.listed_count  \
0                        7963                          906   

                                   entities.url.urls  
0  [{'start': 0, 'end': 23, 'url': 'https://t.co/...  


Processing #1: Tina

In [33]:
govs_d_user_df.head()

Unnamed: 0,username,name,created_at,description,location,url,verified,pinned_tweet_id,id,profile_image_url,protected,public_metrics.followers_count,public_metrics.following_count,public_metrics.tweet_count,public_metrics.listed_count,entities.url.urls,entities.description.hashtags,entities.description.mentions,entities.description.urls
0,GovEvers,Governor Tony Evers,2011-01-06T21:05:10.000Z,"Husband to Kathy, lifelong educator, Euchre an...","Madison, Wisconsin",https://t.co/JyH2JWfeyt,True,1590413394724913152,234896532,https://pbs.twimg.com/profile_images/136168111...,False,106281,245,7963,906,"[{'start': 0, 'end': 23, 'url': 'https://t.co/...",,,
1,TinaKotek,Tina Kotek,2014-09-30T18:28:02.000Z,Progressive Democrat. Candidate for Oregon Gov...,Oregon,https://t.co/U0kpZIDyUZ,True,1590555133624348672,2835931010,https://pbs.twimg.com/profile_images/150087938...,False,26350,338,1633,304,"[{'start': 0, 'end': 23, 'url': 'https://t.co/...",,,
2,GovSisolak,Governor Sisolak,2019-01-02T22:07:36.000Z,Governor of Nevada. Proud Dad and Husband. Sta...,"Nevada, USA",https://t.co/9iq0pinVZs,True,1575878545829568513,1080586433088258048,https://pbs.twimg.com/profile_images/139958353...,False,100717,123,12646,603,"[{'start': 0, 'end': 23, 'url': 'https://t.co/...","[{'start': 98, 'end': 114, 'tag': '3MillionRea...",,
3,katiehobbs,Katie Hobbs,2009-03-26T01:55:21.000Z,"Social worker, mom, wife, Arizona’s Secretary ...","Phoenix, AZ",https://t.co/otdUvzR8kj,True,1592343416981303296,26659619,https://pbs.twimg.com/profile_images/152449855...,False,166335,1584,10025,929,"[{'start': 0, 'end': 23, 'url': 'https://t.co/...",,,
4,LauraKellyKS,Laura Kelly,2011-08-30T13:39:23.000Z,"48th Governor of Kansas, Wife, Mother, Champio...","Topeka, KS",https://t.co/ErZXA4bB3F,True,1590388380399198208,364864453,https://pbs.twimg.com/profile_images/154147570...,False,55536,554,2781,391,"[{'start': 0, 'end': 23, 'url': 'https://t.co/...",,,


In [34]:
# 

# Specify the usernames that you want to lookup below
# You can enter up to 100 comma-separated values.
usernames = "usernames=TwitterDev,TwitterAPI"
user_fields = "user.fields=description,created_at"
# User fields are adjustable, options include:
# created_at, description, entities, id, location, name,
# pinned_tweet_id, profile_image_url, protected,
# public_metrics, url, username, verified, and withheld
url = "https://api.twitter.com/2/users/by?{}&{}".format(usernames, user_fields)
    
json_response = connect_to_endpoint_wo_params(url)
print(json.dumps(json_response, indent=4, sort_keys=True))

200
{
    "data": [
        {
            "created_at": "2013-12-14T04:35:55.000Z",
            "description": "The voice of the #TwitterDev team and your official source for updates, news, and events, related to the #TwitterAPI.",
            "id": "2244994945",
            "name": "Twitter Dev",
            "username": "TwitterDev"
        },
        {
            "created_at": "2007-05-23T06:01:13.000Z",
            "description": "Tweets about changes and service issues. Follow @TwitterDev\u00a0for more.",
            "id": "6253282",
            "name": "Twitter API",
            "username": "TwitterAPI"
        }
    ]
}


### User Followers

In [43]:
%whos dict

Variable        Type    Data/Info
---------------------------------
json_response   dict    n=2
params          dict    n=1


In [68]:
# Test
user_id = 2244994945

url = "https://api.twitter.com/2/users/{}/followers".format(user_id)
params = {"user.fields": "created_at"}
json_response = connect_to_endpoint_with_params(url, params)

followers_df = pd.json_normalize(json_response['data'])
followers_ct = followers_df.shape[0]
followers_df.head()
followers_df.info()

200


In [72]:
def get_followers(user_id):
    url = "https://api.twitter.com/2/users/{}/followers".format(user_id)
    params = {"user.fields": "created_at"}
    json_response = connect_to_endpoint_with_params(url, params)
    
    followers_df = pd.json_normalize(json_response['data'])
    followers_ct = followers_df.shape[0]

    try:
        page_next_token = json_response['meta']['next_token']
    except:
        pass
    
    if not (page_next_token is None):
        
        while not (page_next_token is None):
            try:
                params = {"user.fields": "created_at", "pagination_token": page_next_token}
                json_response = connect_to_endpoint_with_params(url, params)
                my_followers_df = pd.json_normalize(json_response['data'])
                
                followers_df = pd.concat([followers_df, my_followers_df], ignore_index=True)
                followers_ct += len(json_response['data'])
                print(f'Current users_ct: {followers_ct}')
                
                try:
                    page_next_token = json_response['meta']['next_token']
                except:
                    pass
                      
            except:
                page_next_token = None
    
    print(f'Final users_ct: {followers_df}')
    
    return followers_df

In [73]:
govs_d_followers_df = get_followers(2244994945)
govs_d_followers_df.head()
govs_d_followers_df.info()

200
200
Current users_ct: 200
200
Current users_ct: 300
200
Current users_ct: 400
200
Current users_ct: 500
200
Current users_ct: 600
200
Current users_ct: 700
200
Current users_ct: 800
200
Current users_ct: 900
200
Current users_ct: 1000
200
Current users_ct: 1100
200
Current users_ct: 1200
200
Current users_ct: 1300
200
Current users_ct: 1400
429
Final users_ct:                     created_at         username                   id  \
0     2009-09-11T15:44:27.000Z    scmaisonneuve             73412991   
1     2021-04-13T16:58:24.000Z      ademarlocke  1382015262686375946   
2     2022-11-16T07:56:35.000Z         dot_ts22  1592788479406063616   
3     2021-02-27T11:36:00.000Z       s_fahimian  1365626689259151360   
4     2015-10-02T16:40:24.000Z    FaithnhMaster           3760878493   
...                        ...              ...                  ...   
1395  2021-04-05T11:50:56.000Z       leejubong3  1379038812609802241   
1396  2022-10-25T00:35:37.000Z   jdbantay101722  15847045

Unnamed: 0,created_at,username,id,name
0,2009-09-11T15:44:27.000Z,scmaisonneuve,73412991,S.C.Maisonneuve
1,2021-04-13T16:58:24.000Z,ademarlocke,1382015262686375946,ademar locke ∘⁺✧◞₊⋅✱
2,2022-11-16T07:56:35.000Z,dot_ts22,1592788479406063616,Dorothy Melquiades
3,2021-02-27T11:36:00.000Z,s_fahimian,1365626689259151360,Sajad
4,2015-10-02T16:40:24.000Z,FaithnhMaster,3760878493,faithnh


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1400 entries, 0 to 1399
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   created_at  1400 non-null   object
 1   username    1400 non-null   object
 2   id          1400 non-null   object
 3   name        1400 non-null   object
dtypes: object(4)
memory usage: 43.9+ KB


In [74]:
# Check not just repeating every 100

govs_d_followers_df.iloc[0:10]
govs_d_followers_df.iloc[95:105]

Unnamed: 0,created_at,username,id,name
0,2009-09-11T15:44:27.000Z,scmaisonneuve,73412991,S.C.Maisonneuve
1,2021-04-13T16:58:24.000Z,ademarlocke,1382015262686375946,ademar locke ∘⁺✧◞₊⋅✱
2,2022-11-16T07:56:35.000Z,dot_ts22,1592788479406063616,Dorothy Melquiades
3,2021-02-27T11:36:00.000Z,s_fahimian,1365626689259151360,Sajad
4,2015-10-02T16:40:24.000Z,FaithnhMaster,3760878493,faithnh
5,2011-04-13T02:35:04.000Z,maulikbavishi,281326951,Maulik Bavishi
6,2022-11-16T04:07:22.000Z,JenniferHilme16,1592730926210985985,Jennifer Hilmes
7,2022-11-16T03:38:03.000Z,TrinaHa91731712,1592723515853004801,Trina Harris
8,2022-01-06T18:48:50.000Z,m_kroener,1479162499584905217,Lori M Kroener
9,2022-08-18T20:24:06.000Z,X0r1337h4,1560361864038580225,1337h4X0r


Unnamed: 0,created_at,username,id,name
95,2018-11-26T17:39:02.000Z,mpm_pedro,1067110492462870528,pedro.mpm
96,2015-02-07T10:41:59.000Z,alpa1903,3023016851,İbrahim Halil Alpa
97,2020-05-14T09:20:00.000Z,wmlsmithuk,1260862205941231616,William Smith
98,2018-12-23T17:06:57.000Z,jsm_valdivia,1076886890295504897,Juan Valdivia
99,2012-02-03T09:58:23.000Z,PurTahan,481919846,🚩 PurTahan | پورطاهان 🚩
100,2021-11-22T04:38:31.000Z,nvtanalyst,1462641566720294912,Nvt Analyst.lens
101,2022-11-12T13:39:21.000Z,rishiba14362633,1591424171397242885,rishi baghel
102,2020-12-19T06:54:08.000Z,Shado_Enzo,1340188568518537216,Enzo | Shado
103,2022-09-04T18:12:24.000Z,Blueorgreen567,1566489114295373824,RoyalPurple3
104,2013-03-03T00:26:58.000Z,RhondaHMaylett,1237113206,RhondaHailesMaylett


In [75]:
govs_d_followers_df.tail()

Unnamed: 0,created_at,username,id,name
1395,2021-04-05T11:50:56.000Z,leejubong3,1379038812609802241,leejubong
1396,2022-10-25T00:35:37.000Z,jdbantay101722,1584704590880309248,Jeanette D. Bantay
1397,2022-11-02T08:25:08.000Z,EmpressBlueDia1,1587722332214370304,Duppins
1398,2022-09-25T08:34:14.000Z,AbramNFT1,1573953885323993088,Abram NFT
1399,2010-09-22T07:39:25.000Z,one_jons,193609716,Jono


In [81]:
# Save

filename_out = f'twitter_follower_{user_id}.csv'
govs_d_followers_df.to_csv(filename_out, index=False)

### User Following

In [76]:
def get_following(user_id):
    url = "https://api.twitter.com/2/users/{}/following".format(user_id)
    params = {"user.fields": "created_at"}
    json_response = connect_to_endpoint_with_params(url, params)
    
    following_df = pd.json_normalize(json_response['data'])
    following_ct = following_df.shape[0]

    try:
        page_next_token = json_response['meta']['next_token']
    except:
        pass
    
    if not (page_next_token is None):
        
        while not (page_next_token is None):
            try:
                params = {"user.fields": "created_at", "pagination_token": page_next_token}
                json_response = connect_to_endpoint_with_params(url, params)
                my_following_df = pd.json_normalize(json_response['data'])
                
                following_df = pd.concat([following_df, my_following_df], ignore_index=True)
                following_ct += len(json_response['data'])
                print(f'Current users_ct: {following_ct}')
                
                try:
                    page_next_token = json_response['meta']['next_token']
                except:
                    pass
                      
            except:
                page_next_token = None
    
    print(f'Final users_ct: {following_ct}')
    
    return following_df

In [77]:
user_id = 2244994945
govs_d_following_df = get_following(user_id)
govs_d_following_df.head()
govs_d_following_df.info()

200
200
Current users_ct: 200
200
Current users_ct: 300
200
Current users_ct: 400
200
Current users_ct: 500
200
Current users_ct: 600
200
Current users_ct: 700
200
Current users_ct: 800
200
Current users_ct: 900
200
Current users_ct: 1000
200
Current users_ct: 1100
200
Current users_ct: 1200
200
Current users_ct: 1300
200
Current users_ct: 1400
200
Current users_ct: 1500
429
Final users_ct: 1500


Unnamed: 0,created_at,id,username,name,withheld.country_codes
0,2009-06-02T20:12:29.000Z,44196397,elonmusk,Elon Musk,
1,2012-07-11T16:54:40.000Z,633050374,2006legend,Aba Micah,
2,2008-08-08T02:16:23.000Z,15772978,jessicagarson,Jessica Garson,
3,2016-01-23T19:14:25.000Z,4839528448,bitcoincoreorg,Bitcoin Core Project,
4,2009-03-18T04:43:46.000Z,25029451,Punkaboo,blue checkmark (real),


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   created_at              1500 non-null   object
 1   id                      1500 non-null   object
 2   username                1500 non-null   object
 3   name                    1500 non-null   object
 4   withheld.country_codes  1 non-null      object
dtypes: object(5)
memory usage: 58.7+ KB


In [78]:
# Check not just repeating every 100

govs_d_following_df.iloc[0:10]
govs_d_following_df.iloc[95:105]

Unnamed: 0,created_at,id,username,name,withheld.country_codes
0,2009-06-02T20:12:29.000Z,44196397,elonmusk,Elon Musk,
1,2012-07-11T16:54:40.000Z,633050374,2006legend,Aba Micah,
2,2008-08-08T02:16:23.000Z,15772978,jessicagarson,Jessica Garson,
3,2016-01-23T19:14:25.000Z,4839528448,bitcoincoreorg,Bitcoin Core Project,
4,2009-03-18T04:43:46.000Z,25029451,Punkaboo,blue checkmark (real),
5,2018-06-20T17:13:38.000Z,1009484430250053632,coraF256,cora,
6,2010-01-25T05:56:22.000Z,108209516,ashevat,Amir Shevat,
7,2013-01-05T06:56:49.000Z,1062359582,TheSherylKlein,Sheryl Klein Lavi🏴󠁧󠁢󠁳󠁣󠁴󠁿,
8,2011-01-27T15:10:03.000Z,243665363,mongeliliana,Liliana Aidé Monge,
9,2012-01-10T04:01:22.000Z,459860328,JulieMendoza206,julie✨,


Unnamed: 0,created_at,id,username,name,withheld.country_codes
95,2012-09-22T12:30:57.000Z,839668676,DarylOwenJ,オーウェン,
96,2012-06-25T11:52:19.000Z,618029513,edisdev,Hatice Edis 📟,
97,2013-10-04T15:46:40.000Z,1934669329,ikirker,Ian Kirker,
98,2010-09-25T01:35:35.000Z,194811144,jolaurin24,Jonathan Laurin,
99,2009-05-06T22:21:23.000Z,38292395,alfongj,Alfonso 🍀🪐,
100,2006-09-02T01:59:11.000Z,5177,serdar,Serdar Kiliç,
101,2013-01-03T13:52:19.000Z,1057825998,chmanw,Chima,
102,2006-11-20T00:04:50.000Z,13148,inky,liam,
103,2008-07-12T02:02:26.000Z,15399427,jony_silva,Jonathan,
104,2012-03-23T18:32:49.000Z,534682665,simon_jthompson,Simon,


In [79]:
govs_d_following_df.tail()

Unnamed: 0,created_at,id,username,name,withheld.country_codes
1495,2009-06-15T20:33:22.000Z,47436444,jrmontag,Josh Montague,
1496,2010-04-07T22:41:40.000Z,130649891,TwitterLive,Twitter Live,
1497,2010-04-27T19:00:07.000Z,137780739,diana_clarke,diana,
1498,2013-09-04T04:34:33.000Z,1727585126,pmcgee,Patrick McGee,
1499,2011-09-21T04:12:04.000Z,377170853,joep,Joep R.,


In [80]:
# Save

filename_out = f'twitter_following_{user_id}.csv'
govs_d_following_df.to_csv(filename_out, index=False)

### User Timeline

In [69]:
def get_user_timeline(id, params):
    url = "https://api.twitter.com/2/users/{}/tweets".format(id)
    # params = {"tweet.fields": "created_at"}
    json_response = connect_to_endpoint_with_params(url, params)
    return json_response


# Tweet fields are adjustable.
# Options include:
# attachments, author_id, context_annotations,
# conversation_id, created_at, entities, geo, id,
# in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
# possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
# source, text, and withheld
my_params = {"tweet.fields": "created_at"}

# Test
user_id = 2244994945

json_response = get_user_timeline(user_id, my_params)
print(json.dumps(json_response, indent=4, sort_keys=True))

my_timeline_df = pd.DataFrame(json_response['data'])
my_timeline_df.head()
my_timeline_df.info()

200
{
    "data": [
        {
            "created_at": "2022-11-02T23:15:29.000Z",
            "edit_history_tweet_ids": [
                "1587946527955329024"
            ],
            "id": "1587946527955329024",
            "text": "As always, we\u2019re just a Tweet away, so feel free to reach out with any questions. We\u2019re grateful for your partnership to #BuildWhatsNext"
        },
        {
            "created_at": "2022-11-02T23:15:29.000Z",
            "edit_history_tweet_ids": [
                "1587946526617264128"
            ],
            "id": "1587946526617264128",
            "text": "We\u2019ll still celebrate the soon-to-be-announced winners of our Chirp Developer Challenge - stay tuned for more details!"
        },
        {
            "created_at": "2022-11-02T23:15:28.000Z",
            "edit_history_tweet_ids": [
                "1587946525245816832"
            ],
            "id": "1587946525245816832",
            "text": "We\u2019re currently hard at

Unnamed: 0,text,edit_history_tweet_ids,created_at,id
0,"As always, we’re just a Tweet away, so feel fr...",[1587946527955329024],2022-11-02T23:15:29.000Z,1587946527955329024
1,We’ll still celebrate the soon-to-be-announced...,[1587946526617264128],2022-11-02T23:15:29.000Z,1587946526617264128
2,We’re currently hard at work to make Twitter b...,[1587946525245816832],2022-11-02T23:15:28.000Z,1587946525245816832
3,💡 #TipTuesday: Ever wondered how to get the v...,[1587519847281397767],2022-11-01T19:00:00.000Z,1587519847281397767
4,✍️Fill in the blank ⬇️\n\nI start my morning o...,[1587066866824085505],2022-10-31T13:00:01.000Z,1587066866824085505


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   text                    10 non-null     object
 1   edit_history_tweet_ids  10 non-null     object
 2   created_at              10 non-null     object
 3   id                      10 non-null     object
dtypes: object(4)
memory usage: 448.0+ bytes


### Liked Tweets

In [80]:
def get_liked_tweets(id, params):
    url = "https://api.twitter.com/2/users/{}/liked_tweets".format(id)
    # params = {"tweet.fields": "created_at"}
    json_response = connect_to_endpoint_with_params(url, params)
    return json_response


# Tweet fields are adjustable.
# Options include:
# attachments, author_id, context_annotations,
# conversation_id, created_at, entities, geo, id,
# in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
# possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
# source, text, and withheld
tweet_fields = "tweet.fields=lang,author_id"
# Be sure to replace your-user-id with your own user ID or one of an authenticating user
# You can find a user ID by using the user lookup endpoint
id = "your-user-id"
# You can adjust ids to include a single Tweets.
# Or you can add to up to 100 comma-separated IDs
url = "https://api.twitter.com/2/users/{}/liked_tweets".format(id)

# Test
user_id = 2244994945

json_response = get_liked_tweets(user_id, tweet_fields)
print(json.dumps(json_response, indent=4, sort_keys=True))

my_liked_tweets_df = pd.DataFrame(json_response['data'])
my_liked_tweets_df.head()
my_liked_tweets_df.info() 

200
{
    "data": [
        {
            "author_id": "1136175005060878337",
            "edit_history_tweet_ids": [
                "1581308268122877953"
            ],
            "id": "1581308268122877953",
            "lang": "en",
            "text": "@TwitterDev Tips and tricks would be cool!!"
        },
        {
            "author_id": "109628138",
            "edit_history_tweet_ids": [
                "1575158698225258500"
            ],
            "id": "1575158698225258500",
            "lang": "en",
            "text": "On a call with @TwitterDev team looking at the upcoming platform functionality \u2014 some exiting functionality in the works. \n\nAlso, I \u2764\ufe0f the approach twitter is taking to feature development. Good job, bird ppl"
        },
        {
            "author_id": "1120050519182016513",
            "edit_history_tweet_ids": [
                "1575117901647986691"
            ],
            "id": "1575117901647986691",
            "lang": "en",


Unnamed: 0,text,id,lang,edit_history_tweet_ids,author_id
0,@TwitterDev Tips and tricks would be cool!!,1581308268122877953,en,[1581308268122877953],1136175005060878337
1,On a call with @TwitterDev team looking at the...,1575158698225258500,en,[1575158698225258500],109628138
2,Big thanks @TwitterDev for these wonderful hig...,1575117901647986691,en,[1575117901647986691],1120050519182016513
3,Congratulations to @triketora and the @blockpa...,1572733815965253632,en,[1572733815965253632],108209516
4,"🎉 Thank you so much for the kind gesture, @Twi...",1571949434010992640,en,[1571949434010992640],1264433760


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   text                    97 non-null     object
 1   id                      97 non-null     object
 2   lang                    97 non-null     object
 3   edit_history_tweet_ids  97 non-null     object
 4   author_id               97 non-null     object
dtypes: object(5)
memory usage: 3.9+ KB


### Liking Users

In [74]:
json_response = get_liking_users(user_id, user_fields)
pprint(json_response)

200
{'meta': {'next_token': '7140dibdnow9c7btw420jlmkll17j0clkx1bl4w5gi6nr',
          'result_count': 0}}


In [81]:
def get_liking_users(id, params):
    url = "https://api.twitter.com/2/tweets/{}/liking_users".format(id)
    # params = {"tweet.fields": "created_at"}
    json_response = connect_to_endpoint_with_params(url, params)
    return json_response


# User fields are adjustable, options include:
# created_at, description, entities, id, location, name,
# pinned_tweet_id, profile_image_url, protected,
# public_metrics, url, username, verified, and withheld
user_fields = "user.fields=created_at,description"
# You can replace the ID given with the Tweet ID you wish to like.
# You can find an ID by using the Tweet lookup endpoint
id = "1590693373056122880"
# You can adjust ids to include a single Tweets.
# Or you can add to up to 100 comma-separated IDs

# Test
user_id = "1590693373056122880"

json_response = get_liking_users(user_id, user_fields)
print(json.dumps(json_response, indent=4, sort_keys=True))

my_liking_users_df = pd.DataFrame(json_response['data'])
my_liking_users_df.head()
my_liking_users_df.info() 

200
{
    "data": [
        {
            "created_at": "2011-10-15T22:25:54.000Z",
            "description": "",
            "id": "391659128",
            "name": "Dimitri Schmid",
            "username": "dimitrischmid"
        },
        {
            "created_at": "2015-03-31T15:54:56.000Z",
            "description": "Programming Officer at ONGC!!!",
            "id": "3120779995",
            "name": "ABHISHEK BHARDWAJ",
            "username": "abhishek21795"
        },
        {
            "created_at": "2020-04-29T19:27:35.000Z",
            "description": "",
            "id": "1255579413934030848",
            "name": "devesh_deepak",
            "username": "deveshdeepak9"
        },
        {
            "created_at": "2018-11-29T20:52:12.000Z",
            "description": "I enable insights using data and math. Builder of the contact index. AI consultant for German federal government. Views and titles (Dr. habil.) my own.",
            "id": "1068246270752538630",
     

Unnamed: 0,id,name,description,username,created_at
0,391659128,Dimitri Schmid,,dimitrischmid,2011-10-15T22:25:54.000Z
1,3120779995,ABHISHEK BHARDWAJ,Programming Officer at ONGC!!!,abhishek21795,2015-03-31T15:54:56.000Z
2,1255579413934030848,devesh_deepak,,deveshdeepak9,2020-04-29T19:27:35.000Z
3,1068246270752538630,Sten Rüdiger,I enable insights using data and math. Builder...,StenRuediger,2018-11-29T20:52:12.000Z
4,3319164733,Ifty Mohammad Rezwan,"Data Monger,\nAll opinions are my own.",imr165,2015-08-18T20:34:13.000Z


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           93 non-null     object
 1   name         93 non-null     object
 2   description  93 non-null     object
 3   username     93 non-null     object
 4   created_at   93 non-null     object
dtypes: object(5)
memory usage: 3.8+ KB


# END OF NOTEBOOK

## Appendix: Set Union

In [814]:
# Find common elements in 2 data collections (here lists)
# 
aoc_ls = [1,2,3,4,5,6,7,8,9,10]
mgt_ls = [8,9,10,11,12,13,14,15,16,17,18]

common_ls = list(set(aoc_ls).intersection(set(mgt_ls)))

[8, 9, 10]

In [None]:
for auser in common_ls:
    user (user_fields=['created_at','public_metrics'])

## Appendix: Join csv

In [100]:
aoc_followers_df = pd.read_csv('aoc_following_ids.csv', index_col=[0])
aoc_followers_df.head()
aoc_followers_df.info()

Unnamed: 0,id,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
,13479,,,,,
,21619519,,,,,
,1391845341641773067,,,,,
,1051837159639007233,,,,,
,136550204,,,,,


<class 'pandas.core.frame.DataFrame'>
Float64Index: 3939 entries, nan to nan
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          3939 non-null   int64  
 1   Unnamed: 2  3 non-null      object 
 2   Unnamed: 3  0 non-null      float64
 3   Unnamed: 4  0 non-null      float64
 4   Unnamed: 5  0 non-null      float64
 5   Unnamed: 6  0 non-null      float64
dtypes: float64(4), int64(1), object(1)
memory usage: 215.4+ KB


In [101]:
mtg_followers_df = pd.read_csv('mtg_following_ids.csv', index_col=[0])
mtg_followers_df.head()
mtg_followers_df.info()

Unnamed: 0,id,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
,935942321756966912,,,,,
,902200087,,,,,
,214924095,,,,,
,1565070900243873792,,,,,
,44196397,,,,,


<class 'pandas.core.frame.DataFrame'>
Float64Index: 275 entries, nan to nan
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          275 non-null    int64  
 1   Unnamed: 2  0 non-null      float64
 2   Unnamed: 3  0 non-null      float64
 3   Unnamed: 4  0 non-null      float64
 4   Unnamed: 5  0 non-null      float64
 5   Unnamed: 6  0 non-null      float64
dtypes: float64(5), int64(1)
memory usage: 15.0 KB


In [103]:
aoc_followers_ls = aoc_followers_df.id.to_list()
len(aoc_followers_ls)
aoc_followers_ls[:5]


3939

[13479, 21619519, 1391845341641773067, 1051837159639007233, 136550204]

In [104]:
mtg_followers_ls = mtg_followers_df.id.to_list()
len(mtg_followers_ls)
mtg_followers_ls[:5]

275

[935942321756966912, 902200087, 214924095, 1565070900243873792, 44196397]

In [106]:
aoc_mtg_followers_ls = list(set(aoc_followers_ls).intersection(set(mtg_followers_ls)))
len(aoc_mtg_followers_ls)
aoc_mtg_followers_ls

8

[2916305152, 18208354, 975200486, 17685258, 12, 963480595, 15207668, 353890966]

## Appendix: Junk

In [None]:
# Twitter Standard API v1.1
# Get Tweet Timelines
# https://developer.twitter.com/en/docs/twitter-api/v1/tweets/timelines/api-reference/get-statuses-user_timeline

api.user_timeline('GovEvers')

In [38]:
govs_tweets_df = pd.DataFrame()

for i, agov_username in enumerate(govs_username_ls[:3]):
    print(f'Processing #{i}: {agov_username}')
    get_user_tweets(x.Twitter_username, x.Political_party),axis=1)
gc.collect()
    # govs_tweets_df.append(agov_username)
    # govs_df.apply(lambda x:get_user_tweets(x.Twitter_username, x.Political_party),axis=1)
    df.apply(lambda x:get_user_tweets(x.Twitter_username, x.Political_party),axis=1)
gc.collect()
    gc.collect()

Processing #0: GovEvers


26

Processing #1: TinaKotek


0

Processing #2: GovSisolak


0