# 2. Rehydrate tweets from the BTW17 collection

**In this step, each users tweets are rehydrated to see which tweets still exists. The resulting dataframes also specify the error code if a tweet could not be retrieved. Each dataframe is stored in a directory.**

## Libraries

In [None]:
library(tidyverse)
library(academictwitteR)

# disable scientific notation for long integers
options(scipen = 999)

## Parameters

In [None]:
# store list of userids (numeric type!) here
userlist <- list(36327895, 25816024, 21788339, 136266976,999)

# it is recommended store twitter bearer token in a file
# or, you can also type it directly, but make sure to keep it private (e.g. not push to GitHub)
# twitter_token <- "'Bearer AAAA..."
twitter_token <- readLines("~/.tw_br/.br")

# directory in which the dataframe from MongoDB are stored
# only change if a the default was changed in step 1
mongo_folder <- "data/MongoDB_dataframes/"

## Functions

### get_tweetids()
- This function takes an userid and extracts all the tweetids in it's associated mongo dataframe

In [None]:
get_tweetids <- function(userid, mongo_folder){
  
  df <- readRDS(file = paste0(mongo_folder, userid, ".rds"))
  
  res <- df %>% 
    select(`_id`) %>%
    mutate(`_id` = as.character(`_id`)) %>%
    rename(tweetid = `_id`)
  
    return(res$tweetid)
  
}

### rehydrate()
- This function takes the userlist and the directory of mongodb dataframes to rehydrate each users tweets
- If a destination folder is specified (relative path!), the dataframe gets stored there. Otherwise a default folder called "Rehydrated tweets/" is created

In [None]:
rehydrate <- function(userlist, mongo_folder, destination_folder = NULL){
    
    # destination specified -> destination is created
    if(!is.null(destination_folder)){
        
        if(!dir.exists(destination_folder)){  
            dir.create(destination_folder, recursive = TRUE)
        }
    
    # otherwise a default directory is created
    } else{
        
        if(!dir.exists("data/MongoDB_rehydrated_dataframes/")){  
            dir.create("data/MongoDB_rehydrated_dataframes/", recursive = TRUE)
        }
        
        destination_folder <- "data/MongoDB_rehydrated_dataframes/"
    }
    
    
    
    # iterate through userlist
    for(userid in userlist){
        
        # if file doesn't exist, skip to next user
        if(!file.exists(paste0(mongo_folder, userid, ".rds"))){
            next
            
        # otherwise read tweet ids of user 
        } else{
            
            # call hydrate_tweets from academictwitteR
            res <- hydrate_tweets(get_tweetids(userid, mongo_folder),
                                  bearer_token = twitter_token,
                                  bind_tweets = TRUE,
                                  errors = TRUE,
                                  verbose = FALSE)
            
            # and save the file
            saveRDS(res, file = paste0(destination_folder, userid, ".rds"))
            
        }
  }
  return("SUCCESS")
}

## Run

In [None]:
rehydrate(userlist, mongo_folder, destination_folder = 'data/MongoDB_rehydrated_dataframes/')