# Import Packages and Mount Drive 

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import glob, json, zipfile, os, csv
import pandas as pd
import copy as cp
from tqdm.notebook import tqdm as tq 

## Initalize Paths

In [None]:
DATA_PATH = '/content/drive/MyDrive/UCLA REU 2021 KG /TweetData'
KEYWORD_PATH = '/content/drive/MyDrive/UCLA REU 2021 KG /Preprocessing/COVID_keywords.txt'
BATCH_PATH = '/content/drive/MyDrive/UCLA REU 2021 KG /results/2020Mar11-2021Apr16/batched_data'
CHECKPOINT_PATH = '/content/drive/MyDrive/UCLA REU 2021 KG /results/2020Mar11-2021Apr16/checkpoints'

## Initialize Checkpoint I/O

In [None]:

def write_json(filename, df, path):
  
  with open(path + '/'+ filename, 'a') as f: 
      result = df.to_json(orient = 'records', double_precision = 15)
      parsed = json.loads(result)
      f.write(json.dumps(parsed))
  f.close()

def read_json(filename, path):
  
  with open(path + '/'+ filename, 'r') as f: 
    read = pd.read_json(f, 
                        orient='records', keep_default_dates=False, precise_float=True) 
  return pd.DataFrame(read)

def write_dict(filename, dct, path):
  
  with open(path + '/' + filename, 'w') as f:
    writer = csv.writer(f)
    for k,v in dct.items():
      writer.writerow([k,v])

def read_dict(filename, path):
  d = {}
  with open(path + '/' + filename) as f:
    reader = csv.reader(f)
    for k,v in reader: 
      if filename[:7] == 'visited':
        d[k] = v
      else:
        v = v.strip('][').split(', ')
        for i in range(len(v)):
          v[i] = v[i].strip('"')
        d[k] = v

  return d
"""
Imports list of COVID keywords from TweetsCOV19 Knowledge Base found at 
https://arxiv.org/pdf/2006.14492.pdf

Parameters:   
  file -- .txt file from directory
"""
def import_keywords(file):
  keywords = []

  with open(file, 'r') as f:
    lines = f.readlines()
    for line in lines:
      line = line.replace(' ', '')
      keywords.append(line.replace('\n', ''))
  return keywords 

KEYWORDS = import_keywords(KEYWORD_PATH)
VISITED = {}
OUT_OF_BATCH_REPLIES = {}
OUT_OF_BATCH_QUOTES = {}

In [None]:
VISITED = read_dict('visited_20200808.csv', CHECKPOINT_PATH)
OUT_OF_BATCH_QUOTES = read_dict('out_of_batch_replies_20200808.csv', CHECKPOINT_PATH)
OUT_OF_BATCH_REPLIES = read_dict('out_of_batch_quotes_20200808.csv', CHECKPOINT_PATH)

# Read in files
The bellow code reads in raw twitter data and outputs them into a list of dictionaries. This list is later turned into a pandas dataframe for preprocessing. 

To extract data from .zip files as .json files, uncomment the top section and input path directory.

Runtime: 57s for 3 days -- O(n)



In [None]:
def read_files(files):
    
    json_files = []
    print("-----------Extracting Data from Zip Files-----------")
    total0 = len(files)
    pbar0 = tq(total=total0, position = 0, leave = True) 
    for file in files:
      with zipfile.ZipFile(file, 'r') as zip_ref:
          zip_ref.extractall(os.getcwd())
          cur_path = os.getcwd() 
          txt_file = cur_path + file[-13:-4] + '.txt'
          json_file = txt_file[:-4] + '.json'
          os.rename(txt_file, json_file)
          json_files.append(json_file)
      pbar0.update(1)
    pbar0.close()
    tweets_data = []
    print("------------Reading Data from JSON files------------")
    total1 = len(json_files)
    pbar1 = tq(total=total1, position = 0, leave = True)
    for f in json_files:
      with open (f, 'r') as json_file:
          for line in json_file.readlines():
              tweet = json.loads(line)
              # Read in tweets and store in list: tweets_data 
              tweets_data.append(tweet)
          pbar1.update(1) 
    pbar1.close()
    return tweets_data 

#Flatten Tweets




In [None]:
def flatten_tweets(tweets):
  """ Flattens out tweet dictionaries so relevant JSON is
      in a top-level dictionary. """ 

  tweets_list = []
  total2 = len(tweets)
  print("-----------------Flattening Tweets------------------")
  pbar2 = tq(total=total2, position = 0, leave = True)
  # Iterate through each tweet
  for tweet_obj in tweets:
    cur_tweet = {}
    ''' User info'''
    
    if 'user' in tweet_obj:
      # Store the user screen name in 'user-screen_name'
      cur_tweet['user-screen_name'] = tweet_obj['user']['screen_name']

      # Store the user location
      cur_tweet['user-location'] = tweet_obj['user']['location']

      # Store user id 
      cur_tweet['user-id'] = tweet_obj['user']['id_str']

      # Store followers count 
      cur_tweet['followers-count'] = tweet_obj['user']['followers_count']

      # Store verified status (boolean)
      cur_tweet['verified'] = tweet_obj['user']['verified']

      # Store favorites count
      cur_tweet['favorites-count'] = tweet_obj['user']['favourites_count']

      # Store activity count
      cur_tweet['status-count'] = tweet_obj['user']['statuses_count']

      # Store followers count
      cur_tweet['following-count'] = tweet_obj['user']['friends_count']

    ''' Text info'''
    # Store tweet id number
    if 'id_str' in tweet_obj:
      cur_tweet['tweet-id'] = tweet_obj['id_str']
    else:
      cur_tweet['tweet-id'] = None

    # Store timestamp 
    if 'created_at' in tweet_obj:
      cur_tweet['timestamp'] = tweet_obj['created_at']
      
    else:
      cur_tweet['timestamp'] = None
    # Store Language
    if 'lang' in tweet_obj:
      cur_tweet['lang'] = tweet_obj['lang']
    else:
      cur_tweet['lang'] = None
    # Store reply to tweet id 
    if "in_reply_to_status_id_str" in tweet_obj:
      cur_tweet['in-reply-to'] = tweet_obj["in_reply_to_status_id_str"]
      cur_tweet['is_reply'] = True
    else:
      cur_tweet['in-reply-to'] = None
      cur_tweet['is_reply'] = False
    
    if 'text' in tweet_obj:
      cur_tweet['text'] = tweet_obj['text']
    else:
      cur_tweet['text'] = ""
    # Store sentiment score 
    
    # if 'place' in tweet_obj:
    #   if 'full_name' in tweet_obj['place']:
    #     cur_tweet['location-name'] = tweet_obj['place']['full_name']
    #   else: 
    #     cur_tweet['location-name'] = None
    #   if 'id' in tweet_obj['id']:
    #     cur_tweet['location-id'] = tweet_obj['place']['id']
    #   else:
    #     cur_tweet['location-id'] = None
    # else:
    #   cur_tweet['location-name'] = None
    #   cur_tweet['location-id'] = None
    
    # Store hashtags 
    if 'entities' in tweet_obj:
      if tweet_obj['entities']['hashtags']: 
        hashtags = []
        for tag in tweet_obj['entities']['hashtags']:
          hashtags.append(tag['text'])
        cur_tweet['hashtags'] = hashtags
      else:
          cur_tweet['hashtags'] = []

    # Store mentions
    
      if tweet_obj['entities']['user_mentions']:
        mentions = []
        for men in tweet_obj['entities']['user_mentions']:
          mentions.append(men['screen_name'])
        cur_tweet['mentions'] = mentions
      else:
          cur_tweet['mentions'] = []
    
  
    # Check if this is a 140+ character tweet
  
    if 'extended_tweet' in tweet_obj:
        # Store the extended tweet text in 'extended_tweet-full_text'
        cur_tweet['text'] = \
            tweet_obj['extended_tweet']['full_text']
 

    if 'quoted_status' in tweet_obj:
      # Store quoted_by
      cur_tweet['quoted'] = tweet_obj['quoted_status_id_str']
      cur_tweet['is_quote'] = True
      if 'extended_tweet' in tweet_obj['quoted_status']:
        # Store the extended retweet text in
        # 'retweeted_status-extended_tweet-full_text'
        cur_tweet['text'] = \
            tweet_obj['quoted_status']['extended_tweet']['full_text']
    else:
      cur_tweet['quoted'] = None
      cur_tweet['is_quote'] = False

    if 'retweeted_status-extended_tweet-full_text' in tweet_obj:
      cur_tweet['text'] = tweet_obj['retweeted_status-extended_tweet-full_text']

    elif 'retweeted_status-text' in tweet_obj:
      cur_tweet['text'] = tweet_obj['retweeted_status-text']

    elif 'extended_tweet-full_text' in tweet_obj:
      cur_tweet['text'] = tweet_obj['extended_tweet-full_text']
    
    tweets_list.append(cur_tweet)
    pbar2.update(1)
  pbar2.close()
  return tweets_list


# Find Connected Tweets

First imports list of COVID-19 related keywords. Then iterates through dataset to find connected tweets, storing reply and quote information in tweet dictionary. Tracks tweets that were replied too but missing from dataset. 

Runtime: 2:45 for ~300000 tweets (1.15 GB) -- O(n^2)

In [None]:

"""
Imports list of COVID keywords from TweetsCOV19 Knowledge Base found at 
https://arxiv.org/pdf/2006.14492.pdf

Parameters:   
  file -- .txt file from directory
"""
def import_keywords(file):
  keywords = []

  with open(file, 'r') as f:
    lines = f.readlines()
    for line in lines:
      line = line.replace(' ', '')
      keywords.append(line.replace('\n', ''))
  return keywords 

"""
Helper function that checks whether a given tweet contains a COVID-19 related 
keyword. 

Paramemeters: 
  text -- string of tweet text
  hashtags -- list of hashtags from tweet

Returns: 
  bool -- True if tweet contains COVID keyword, False otherwise
"""

def has_COVID_keyword(text, hashtags):
  if type(text) == str:
    for word in text.split():
      if word.lower() in KEYWORDS:
        return True[do[]]
  if hashtags:
    if type(hashtags) == list:
      for tag in hashtags:
        if tag.lower() in KEYWORDS:
          return True
  return False


""" 
Given a set of tweets, finds tweets that are connected by either replies or quotes
on Twitter. This is done by iterating through a dataframe of tweets and checking 
whether the tweet is a reply to another tweet or quotes another tweet. If the id
of the in reply to or quoted tweet exists in the dataset, we search tweets in the 
dataset that were created before. If the orgin tweet is not found, this is 
recorded its id.

Parameters: 
  tweets -- list of tweet dictionaries

Returns:
  VISITED -- list of connected tweet dictionaries 
  missing_reply_orgins -- list of replied to tweet ids not in dataset
  missing_quote_orgins -- list of quoted tweet ids not in dataset

"""
def find_connections(tweets):
  tweets_list = []
  total3 = len(tweets)
  print("---------------Finding Connections----------------")
  pbar3 = tq(total=total3, position = 0, leave = True)
  cur_visited = {}
  tweets['replies_count'] = 0
  tweets['quote_count'] = 0
  tweets['replies'] = tweets.apply(lambda x: [], axis=1)
  tweets['quotes'] = tweets.apply(lambda x: [], axis=1)

  
  for index, tweet in tweets.iterrows():

    # if tweet is a reply, quote or related to COVID, add to VISITED
    if has_COVID_keyword(tweet['text'], tweet['hashtags']):
      VISITED[tweet['tweet-id']] = index
      cur_visited[tweet['tweet-id']] = index
    # if the tweet is a reply or quotes another tweet, enter loop
    if tweet['is_reply'] or tweet['is_quote']:
      
      # record reply and quote tweet ids
      reply_id = tweet['in-reply-to'] 
      quote_id = tweet['quoted']

      # search previous tweets for the reply 
      if reply_id in VISITED:
        VISITED[tweet['tweet-id']] = index
        cur_visited[tweet['tweet-id']] = index
        i = VISITED[reply_id]
        if reply_id not in cur_visited:
          if reply_id in OUT_OF_BATCH_REPLIES:
            OUT_OF_BATCH_REPLIES[reply_id].append(tweet['tweet-id'])
          else:
            OUT_OF_BATCH_REPLIES[reply_id] = [tweet['tweet-id']]
        else: 
          tweets.at[i, 'replies'] = [*tweets.at[i,'replies'], tweet['tweet-id']]
          tweets.at[i, 'replies_count'] = tweets.at[i, 'replies_count'] + 1 
      
      elif quote_id in VISITED:
        VISITED[tweet['tweet-id']] = index
        cur_visited[tweet['tweet-id']] = index 
        i = VISITED[quote_id]
        if quote_id not in cur_visited:
          if quote_id in OUT_OF_BATCH_QUOTES:
            OUT_OF_BATCH_QUOTES[quote_id].append(tweet['tweet-id'])
          else:
            OUT_OF_BATCH_QUOTES[quote_id] = [tweet['tweet-id']]
        else: 
          tweets.at[i, 'quotes'] = [*tweets.at[i, 'quotes'], tweet['tweet-id']]
          tweets.at[i, 'quote_count'] = tweets.at[i, 'quote_count'] + 1  
        
     
    pbar3.update(1)
  pbar3.close()
  return tweets, cur_visited


#Main

In [None]:

files = sorted(list(glob.iglob(DATA_PATH + '/*.zip')))

total9 = len(files)//12 - 12
print("----------------------------------------------------")
print("                   Total Progress                   ")
print("----------------------------------------------------")
pbar9 = tq(total=total9, position = 0, leave = True)
print("----------------------------------------------------")
print()
count = 0
j = 150
for i in range(len(files)//12 - 12):
  cur_batch = files[j+12*i:(j+12*i)+ 12]
  start_date = cur_batch[0][-12:-4]
  end_date = cur_batch[-1][-12:-4]
  print("----------------------------------------------------")
  print("Batch number: " + str(i+1) +"/" + str(total9) + " -- Date Range: " + start_date + " - " + end_date)
  print("----------------------------------------------------")
  # Read in twitter data
  tweets_data = read_files(cur_batch)
  # Flatten data
  flattened_tweets = flatten_tweets(tweets_data)
  del tweets_data
  # Transform to pandas dataframe
  flattened_df = pd.DataFrame(flattened_tweets)
  del flattened_tweets
  # Find connections 
  results = find_connections(flattened_df)
  res_df = results[0]
  filtered = results[1]
  del flattened_df 
  # Apply filter
  connected_df = res_df[res_df['tweet-id'].isin(filtered)]
  del res_df

  num_tweets = int(connected_df.shape[0])
  num_users = len(pd.unique(connected_df['user-id']))
  
  write_name = 'connected_tweets_' + start_date + '-' + end_date + '.json' 

  # Export Data
  write_json(write_name, connected_df, BATCH_PATH)
  # Save visited 
  checkpoint_name = 'visited_' + end_date + '.csv'
  out_batch_replies_name = 'out_of_batch_replies_' + end_date + '.csv'
  out_batch_quotes_name = 'out_of_batch_quotes_' + end_date + '.csv'
  write_dict(checkpoint_name, VISITED, CHECKPOINT_PATH)
  write_dict(out_batch_replies_name, OUT_OF_BATCH_REPLIES, CHECKPOINT_PATH)
  write_dict(out_batch_quotes_name, OUT_OF_BATCH_QUOTES, CHECKPOINT_PATH)
  del connected_df
  print("Successfully exported  " + str(num_tweets) + " tweets from " + str(num_users) + ' unique users.')

  
  pbar9.update(1)




----------------------------------------------------
                   Total Progress                   
----------------------------------------------------


HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))

----------------------------------------------------

----------------------------------------------------
Batch number: 1/26 -- Date Range: 20200809 - 20200820
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=843016.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=843016.0), HTML(value='')))


Successfully exported  12702 tweets from 6188 unique users.
----------------------------------------------------
Batch number: 2/26 -- Date Range: 20200821 - 20200901
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=816645.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=816645.0), HTML(value='')))


Successfully exported  10635 tweets from 5347 unique users.
----------------------------------------------------
Batch number: 3/26 -- Date Range: 20200902 - 20200913
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=815041.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=815041.0), HTML(value='')))


Successfully exported  11385 tweets from 5532 unique users.
----------------------------------------------------
Batch number: 4/26 -- Date Range: 20200914 - 20200925
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=832124.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=832124.0), HTML(value='')))


Successfully exported  9505 tweets from 4875 unique users.
----------------------------------------------------
Batch number: 5/26 -- Date Range: 20200926 - 20201007
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=889633.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=889633.0), HTML(value='')))


Successfully exported  18323 tweets from 7516 unique users.
----------------------------------------------------
Batch number: 6/26 -- Date Range: 20201008 - 20201019
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=813409.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=813409.0), HTML(value='')))


Successfully exported  10045 tweets from 4778 unique users.
----------------------------------------------------
Batch number: 7/26 -- Date Range: 20201020 - 20201031
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=835969.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=835969.0), HTML(value='')))


Successfully exported  11910 tweets from 5561 unique users.
----------------------------------------------------
Batch number: 8/26 -- Date Range: 20201101 - 20201112
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=946694.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=946694.0), HTML(value='')))


Successfully exported  11124 tweets from 5442 unique users.
----------------------------------------------------
Batch number: 9/26 -- Date Range: 20201113 - 20201124
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=786127.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=786127.0), HTML(value='')))


Successfully exported  14324 tweets from 6489 unique users.
----------------------------------------------------
Batch number: 10/26 -- Date Range: 20201125 - 20201206
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=695497.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=695497.0), HTML(value='')))


Successfully exported  11956 tweets from 5680 unique users.
----------------------------------------------------
Batch number: 11/26 -- Date Range: 20201207 - 20201218
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=695967.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=695967.0), HTML(value='')))


Successfully exported  14662 tweets from 6606 unique users.
----------------------------------------------------
Batch number: 12/26 -- Date Range: 20201219 - 20201230
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=625589.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=625589.0), HTML(value='')))


Successfully exported  11902 tweets from 5546 unique users.
----------------------------------------------------
Batch number: 13/26 -- Date Range: 20201231 - 20210111
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=704205.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=704205.0), HTML(value='')))


Successfully exported  10262 tweets from 5181 unique users.
----------------------------------------------------
Batch number: 14/26 -- Date Range: 20210112 - 20210123
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=644387.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=644387.0), HTML(value='')))


Successfully exported  9909 tweets from 4888 unique users.
----------------------------------------------------
Batch number: 15/26 -- Date Range: 20210124 - 20210204
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=579799.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=579799.0), HTML(value='')))


Successfully exported  8474 tweets from 4324 unique users.
----------------------------------------------------
Batch number: 16/26 -- Date Range: 20210205 - 20210216
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=555323.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=555323.0), HTML(value='')))


Successfully exported  6455 tweets from 3508 unique users.
----------------------------------------------------
Batch number: 17/26 -- Date Range: 20210217 - 20210228
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=523257.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=523257.0), HTML(value='')))


Successfully exported  6175 tweets from 3285 unique users.
----------------------------------------------------
Batch number: 18/26 -- Date Range: 20210301 - 20210312
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=528234.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=528234.0), HTML(value='')))


Successfully exported  8328 tweets from 4071 unique users.
----------------------------------------------------
Batch number: 19/26 -- Date Range: 20210313 - 20210324
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=511638.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=511638.0), HTML(value='')))


Successfully exported  6833 tweets from 3560 unique users.
----------------------------------------------------
Batch number: 20/26 -- Date Range: 20210325 - 20210405
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=509962.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=509962.0), HTML(value='')))


Successfully exported  6655 tweets from 3542 unique users.
----------------------------------------------------
Batch number: 21/26 -- Date Range: 20210406 - 20210417
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=513581.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=513581.0), HTML(value='')))


Successfully exported  6610 tweets from 3677 unique users.
----------------------------------------------------
Batch number: 22/26 -- Date Range: 20210418 - 20210429
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=512290.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=512290.0), HTML(value='')))


Successfully exported  5824 tweets from 3227 unique users.
----------------------------------------------------
Batch number: 23/26 -- Date Range: 20210430 - 20210511
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=497216.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=497216.0), HTML(value='')))


Successfully exported  5060 tweets from 2804 unique users.
----------------------------------------------------
Batch number: 24/26 -- Date Range: 20210512 - 20210523
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=491547.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=491547.0), HTML(value='')))


Successfully exported  5624 tweets from 3215 unique users.
----------------------------------------------------
Batch number: 25/26 -- Date Range: 20210524 - 20210604
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=499013.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=499013.0), HTML(value='')))


Successfully exported  3839 tweets from 2214 unique users.
----------------------------------------------------
Batch number: 26/26 -- Date Range: 20210605 - 20210616
----------------------------------------------------
-----------Extracting Data from Zip Files-----------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


------------Reading Data from JSON files------------


HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


-----------------Flattening Tweets------------------


HBox(children=(FloatProgress(value=0.0, max=480330.0), HTML(value='')))


---------------Finding Connections----------------


HBox(children=(FloatProgress(value=0.0, max=480330.0), HTML(value='')))


Successfully exported  4142 tweets from 2549 unique users.
