In [2]:
# retrieve botometer scores for any unprocessed twitter usernames

In [3]:
# load and optionally install required packages
if (!require('pacman')) install.packages('pacman')
pacman::p_load(
    dotenv,
    dplyr,
    httr,
    jsonlite,
    readr,
    RJSONIO,
    tictoc,
    xml2
)

Loading required package: pacman



In [4]:
# set necessary constants
load_dot_env()
TODAY <- format(Sys.Date(), "%Y-%m-%d")
MAX_CALLS_PER_DAY <- 17000
BATCH_CALLS_LIMIT <- 1000
BOTOMETER_API_KEY <- Sys.getenv("BOTOMETER_API_KEY")
TWITTER_APP_CONSUMER_KEY <- Sys.getenv("TWITTER_APP_CONSUMER_KEY")
TWITTER_APP_CONSUMER_SECRET <- Sys.getenv("TWITTER_APP_CONSUMER_SECRET")
TWITTER_APP_ACCESS_TOKEN <- Sys.getenv("TWITTER_APP_ACCESS_TOKEN")
TWITTER_APP_ACCESS_SECRET <- Sys.getenv("TWITTER_APP_ACCESS_SECRET")

In [5]:
# load necessary files

# get memoized twitter botometer scores
twitter_usernames_botometer_path = './data/generated/twitter/usernames_botometer.csv'
if(!file.exists(twitter_usernames_botometer_path)){
    # create temporary tibble
    tibble(
        username = character(),
        score_cap_english = numeric(),
        score_cap_universal = numeric(),
        score_raw_english = numeric(),
        score_raw_universal = numeric(),
        date_checked = character()
    ) %>%
    write_csv(twitter_usernames_botometer_path)
}

# get remaining usernames we need to check
twitter_usernames <- read_csv(twitter_usernames_botometer_path, col_types = cols(
        username = col_character(),
        score_cap_english = col_number(),
        score_cap_universal = col_number(),
        score_raw_english = col_number(),
        score_raw_universal = col_number(),
        date_checked = col_character()
    ))
unchecked_twitter_usernames_count <- twitter_usernames %>% filter(is.na(date_checked)) %>% nrow()
if(unchecked_twitter_usernames_count == 0){
    cat('no usernames left to check')
    quit()
}

no usernames left to check

In [6]:
# load call log
log_path = './data/generated/twitter/botometer_log.csv'
if(!file.exists(log_path)){
    # create temporary tibble
    tibble(
        date = character(),
        calls_made = numeric()
    ) %>%
    write_csv(log_path)
}

call_log = read_csv(log_path, col_types = cols(
    date = col_character(),
    calls_made = col_number()
))

In [7]:
# generate how many calls we can make today
todays_log <- call_log %>% filter(date == TODAY)
if(nrow(todays_log) == 0){
    calls_made_today <- 0
}else{
    calls_made_today <- todays_log[1,]$calls_made
}
calls_left_today <- MAX_CALLS_PER_DAY - calls_made_today

In [8]:
# prepare variables for botcheck to run
myapp <- oauth_app('twitter', key=TWITTER_APP_CONSUMER_KEY, secret=TWITTER_APP_CONSUMER_SECRET)
sig <- sign_oauth1.0(myapp, token=TWITTER_APP_ACCESS_TOKEN, token_secret=TWITTER_APP_ACCESS_SECRET)
rapidapi_key <- BOTOMETER_API_KEY

In [9]:
botcheck <- function(user, rapidapi_key){
  # rapidapi key is the access token to botometer API in the rapidAPI platform
  
  users_url <- "https://api.twitter.com/1.1/users/show.json?screen_name="
  statuses_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name="
  search_url <- "https://api.twitter.com/1.1/search/tweets.json?q=%40"
  rapidapi_url <- "https://botometer-pro.p.rapidapi.com/4/check_account"                                                # this line should be updated whenever the API updates
  opts = "&count=200"
  
  userdata = GET(paste0(users_url, user, opts), config = sig)
  tweets = GET(paste0(statuses_url, user, opts), config = sig)
  mentions = GET(paste0(search_url, user, opts), config = sig)
  
  body = list(timeline = content(tweets, type = "application/json"), 
              mentions = content(mentions, type = "application/json"), 
              user = content(userdata, type = "application/json"))
  body_json = RJSONIO::toJSON(body, auto_unbox = T, pretty = T)
  result = POST(rapidapi_url, 
                encode = "json", add_headers(`X-RapidAPI-Key` = rapidapi_key), 
                body = body_json)
  return(content(result, as = 'text', encoding = 'UTF-8'))
}

In [10]:
get_next_botometer_score <- function(){
    twitter_usernames = twitter_usernames %>% arrange(!is.na(date_checked), date_checked)
    next_username <- twitter_usernames[1,]$username
    response <- botcheck(next_username, rapidapi_key)
    scores <- response %>% fromJSON()
    if(!is.null(scores[['error']])){
        twitter_usernames[1,] = list(
            username = next_username,
            score_cap_english = -999,
            score_cap_universal = -999,
            scores_raw_english = -999,
            scores_raw_universal = -999,
            date_checked = TODAY
        )        
    }else{
        twitter_usernames[1,] = list(
            username = next_username,
            score_cap_english = scores$cap[['english']],
            score_cap_universal = scores$cap[['universal']],
            score_raw_english = scores$raw_scores$english[['overall']],
            score_raw_universal = scores$raw_scores$universal[['overall']],
            date_checked = TODAY
        )
    }
    return(twitter_usernames)
}

In [11]:
log_call <- function(){
    todays_log <- call_log %>% filter(date == TODAY)
    if(todays_log %>% nrow() == 0){
        call_log[1,] = list(
            date = TODAY,
            calls_made = 1
        )
    }else{
        calls_made <- call_log[which(call_log$date == TODAY),]$calls_made
        call_log[which(call_log$date == TODAY),]$calls_made <- calls_made + 1
    }
    return(call_log)
}

In [12]:
for(val in c(1:BATCH_CALLS_LIMIT)){
    if(calls_left_today > 0){
        twitter_usernames <- get_next_botometer_score()
        write_csv(twitter_usernames, twitter_usernames_botometer_path)
        call_log <- log_call()
        write_csv(call_log, log_path)
        calls_left_today <- calls_left_today - 1
        Sys.sleep(5);
    }else{
        print('no calls left today')
    }
}

ERROR: Error in curl::curl_fetch_memory(url, handle = handle): Operation was aborted by an application callback
