### Load libraries and local environment variables

In [1]:
import pandas as pd
import tweepy
import os
import json

consumer_key = os.getenv('TWEET_CONSUMER_KEY')
consumer_secret = os.getenv('TWEET_CONSUMER_SECRET')
hoaxy_file_source_location = './Datasets/Covid_AND_FLU_hoaxy_visualization_2020_05_15.csv'
hoaxy_file_target_location = './Datasets/Covid_AND_FLU_hoaxy_visualization_2020_05_15_extend.csv'

### Extract user_id and user_screen_name from both from_user and to_user

In [4]:
df_visual = pd.read_csv(hoaxy_file_source_location,encoding= 'unicode_escape')

df_from_user_id = df_visual[['from_user_id', 'from_user_screen_name']]
df_from_user_id.drop_duplicates(subset ="from_user_screen_name", keep = "first", inplace = True)
df_from_user_id.columns = ['user_id', 'user_screen_name']

df_to_user_id = df_visual[['to_user_id', 'to_user_screen_name']]
df_to_user_id.drop_duplicates(subset ="to_user_screen_name", keep = "first", inplace = True)
df_to_user_id.columns = ['user_id', 'user_screen_name']

dfs = [df_from_user_id, df_to_user_id]
df_users = pd.concat(dfs, axis=0, ignore_index=True)

df_users.drop_duplicates(subset ="user_screen_name", keep = "first", inplace = True)

df_users = df_users.sort_values(by='user_id', ascending=True).reset_index().drop(columns=['index'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


### Initialise twitter API and lookup corresponding users' information

In [6]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)

users_info = []
for i in range(0, len(df_users.index), 100):
    users_screen_names_str = ",".join(df_users.iloc[i:(i+100)]['user_screen_name'].tolist())
    users_response = api.lookup_users(screen_names=[users_screen_names_str])
    for j in range(len(users_response)):
        user_info = users_response[j]._json
        users_info.append(user_info)

### Extract useful information from users' information

In [8]:
users_pd = pd.DataFrame(users_info)
columns_oi = ['id_str', 'screen_name', 'location', 'created_at', 'followers_count', 'friends_count', 'listed_count']
users_pd_oi = users_pd[columns_oi]

### Join back to Hoaxy data

In [9]:
df_merge1 = df_visual.merge(users_pd_oi.add_prefix("from_user_"), how='left', on='from_user_screen_name')
df_merge = df_merge1.merge(users_pd_oi.add_prefix("to_user_"), how='left', on='to_user_screen_name')

### Tidy up and export to csv

In [10]:
df_merge = df_merge[sorted(df_merge.columns)]
df_merge.to_csv(hoaxy_file_target_location, index = False, header=True)