In [1]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml
import tweepy
import pickle
import plotnine
from plotnine import *

## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', None)

## function to load credentials yaml
def load_creds(path: str):
    with open(path, 'r') as stream:
        try:
            creds = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)
    return(creds)

# Setup: authenticate to Twitter API

In [2]:
## read creds file
creds = load_creds("../my_cred_JH.yaml")

## use bearer token to initialize a tweepy client (connection with twitter API)
client = tweepy.Client(bearer_token= creds['twitter_api']['bearer_token'])

## print type
print(type(client))

<class 'tweepy.client.Client'>


# Activity

1. Choose a public user (e.g., a politician; celebrity) and pull 100 tweets from their timeline and metadata about those tweets. When pulling metadata, make sure to get the conversation_id and count of replies (latter is in public_metrics)
2. Choose one of their tweets to focus on that got a lot of replies and get the conversation_id of that tweet
3. Paste the conversation id of that tweet into a query using this documentation for query building: https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query#examples
4. Similar to example 1.1 in the example code, use the `search_recent_tweets` method to pull tweets that are in response to the focal tweet from step 2
5. Place them in a dataframe and do some text analysis of the results (eg sentiment; tokenizing and top words)


In [3]:
## focal user: MayorBowser

### step 1: get numeric id
mmb_id = client.get_user(username= "MayorBowser").data['id']

### step 2: pull max 200 tweets
tweet_attr = ['id',
              'created_at', 'author_id', 
             'text', 'lang', 'geo',
             'conversation_id', 'public_metrics']
mmb_t_resp = client.get_users_tweets(id = mmb_id,
                max_results = 100, 
                tweet_fields = tweet_attr)

print(type(mmb_t_resp))


<class 'tweepy.client.Response'>


In [4]:
### step 3: clean up; this time, i want to get all the diff engagement metrics
### so i'm modifying the function
### function to iterate over attributes
def pull_attr_flat(one_tweet, which_attr):
    all_attr = [one_tweet[attr]   
                for attr in which_attr]
    return(all_attr)

def pull_attr_nested(one_tweet, attr = 'public_metrics'):
    all_attr_vals = [value[1] for value in
                    one_tweet[attr].items()]
    return all_attr_vals

In [5]:
tweet_att_1 = pd.DataFrame([pull_attr_flat(one_tweet,
                [att for att in tweet_attr if att != "public_metrics"]) 
                for one_tweet in mmb_t_resp.data],
                columns = [att for att in tweet_attr if att != "public_metrics"])

tweet_att_2 = pd.DataFrame([pull_attr_nested(one_tweet) 
                for one_tweet in mmb_t_resp.data],
                columns = [key[0] for key in mmb_t_resp.data[0]['public_metrics'].items()])


In [6]:
## combine
mmb_tweet_att = pd.concat([tweet_att_1, tweet_att_2], axis = 1)
mmb_tweet_att.sort_values(by = 'reply_count', ascending = False).head()


## use query function to pull some replies
### step 1: get conversation id
focal_convo = mmb_tweet_att.conversation_id[mmb_tweet_att.reply_count == np.max(mmb_tweet_att.reply_count)].iloc[0]

### step 2: write query to pull tweets w/ that convo id
query = "conversation_id:{}".format(focal_convo)



Unnamed: 0,id,created_at,author_id,text,lang,geo,conversation_id,retweet_count,reply_count,like_count,quote_count
35,1493266895461654530,2022-02-14 16:52:26+00:00,976542720,"On March 1, masks will continue to be required at a number of locations, including schools. https://t.co/c8Fwfy15sk",en,,1493262671050776577,45,314,47,117
34,1493267494315999237,2022-02-14 16:54:48+00:00,976542720,"Beginning February 15, 2022, indoor venues will no longer be required to verify that patrons are vaccinated.\n\nBusinesses may choose to keep vaccination requirements in place. https://t.co/iUzgEHfhsa",en,,1493262671050776577,120,265,172,287
61,1492158197477658624,2022-02-11 15:26:51+00:00,976542720,"LIVE: Mayor Bowser Celebrates the Grand Opening of the First Starbucks Drive-Thru Location in Washington, DC https://t.co/nCfSU7gpUt",en,,1492158197477658624,40,130,131,202
36,1493265542962106375,2022-02-14 16:47:03+00:00,976542720,"The District’s indoor mask requirements will be dialed back on March 1, 2022. https://t.co/JZWpo2J5Hg",en,,1493262671050776577,72,73,94,57
32,1493270218499665923,2022-02-14 17:05:38+00:00,976542720,It’s critical that all DC residents get their booster shot. \n\nUnvaccinated individuals are 97x more likely to die from COVID-19 compared to those who are boosted. https://t.co/itVui1R4Eb,en,,1493262671050776577,6,35,8,10


In [7]:

### step 3: search based on query
tweets_mask = client.search_recent_tweets(query = query, max_results = 100,
                                    tweet_fields = ['created_at',
                                                    'author_id',
                                                   'geo',
                                                   'lang', 
                                                   'public_metrics'],
                                    user_fields = ['description',
                                                  'location',
                                                  'verified',
                                                  'public_metrics'],
                                    expansions = 'author_id')

tweets_mask_df = pd.DataFrame([pull_attr_flat(one_tweet,
                [att for att in tweet_attr if att != "public_metrics"]) 
                for one_tweet in tweets_mask.data],
                columns = [att for att in tweet_attr if att != "public_metrics"])
tweets_mask_df.head()

Unnamed: 0,id,created_at,author_id,text,lang,geo,conversation_id
0,1494389160131665921,2022-02-17 19:11:54+00:00,798173612134043648,"@AnitaKPatelMD @MayorBowser You are aware, as an MD, that the vaccinations do not slow (at all) the spread of the disease, right?\n\nJust double checking as I'm amazed that you're an educated doctor who is apparently tweeting about some kind irrational fear that makes no sense.",en,,
1,1494383745310380037,2022-02-17 18:50:23+00:00,111664809,@AnitaKPatelMD @MayorBowser Kids aren’t in danger from covid. Weird that a doctor wouldn’t know that by now.,en,,
2,1494381330511155204,2022-02-17 18:40:48+00:00,1462905406078930945,"@AnitaKPatelMD @MayorBowser I feel sorry for your patients. You steal their money and pump them with false information. Follow the science, doc.",en,,
3,1494378319403442176,2022-02-17 18:28:50+00:00,1275834716009172992,@AnitaKPatelMD @MayorBowser You really need to read the actual data. Healthy and unvaccinated kids are at practically no risk from covid. https://t.co/PLBdwV26pI,en,,
4,1494375809490071554,2022-02-17 18:18:51+00:00,1408854285232840704,@AnitaKPatelMD @MayorBowser Vaccine passports don’t work when vaccines don’t stop transmission.,en,,
