In [1]:
import pystache, plotly, json, random, sys, yaml, glob, os
import pandas as pd
import plotly.graph_objects as go

from google.cloud import bigquery
from google.oauth2 import service_account
from datetime import datetime


# Create Interactive Diffusion Graph for Retweets of the CDC that Trump also retweeted

1. Reads yaml configuration file from `/home/jupyter/data/www/covid19-static-pages/configs/cdc-trump.special`
2. Queries for retweets from Big Query including extra user information.
3. Processes and produces simplified JSON output of retweets.
3. Reads simplified JSON into Plotly and writes JSON configurations for Plotly Graphs and HTML files.

In [2]:
credentials = service_account.Credentials.from_service_account_file(
    '/home/jupyter/covid-19-data/.credentials/google-connector.json')
project_id = 'crypto-eon-164220'
client = bigquery.Client(credentials=credentials, project=project_id)

In [3]:
#read in our other datasets

In [4]:
complete_dataframe = pd.read_csv('./cdc_retweeted_by_trump.csv')
for_jim = pd.read_csv('./retweeted_times_with_dates.csv')

In [5]:
#Four 3? groups: 
cluster_times = [
  pd.Timestamp("2020-04-25T12:12:10Z"), 
  pd.Timestamp("2020-04-25T13:01:56Z"),
  pd.Timestamp("2020-05-25T02:29:01Z")]

# Part III: Who was retweeting Trump's stream during these times?

In [6]:
def get_trump_retweeter_stream(time):
    
    a = (time - pd.Timedelta(hours=1)).to_pydatetime()
    b = (time + pd.Timedelta(hours=2)).to_pydatetime()
    
    query = """

SELECT id,
       min(retweeted_status.id_str) AS retweeted_id,
       min(retweeted_status.created_at) AS original_tweet_time,
       min(created_at) AS created_at, 
       min(user.screen_name) as username, 
       min(user.created_at) AS user_join_date, 
       min(user.followers_count) AS followers_count, 
       min(user.id_str) AS user_id, 
       min(text) AS text

FROM `crypto-eon-164220.tweets.donald_trump` 

WHERE PARSE_TIMESTAMP('%a %b %d %T %z %Y', retweeted_status.created_at) > TIMESTAMP(DATETIME({}, {}, {}, {}, {}, {}))
  AND PARSE_TIMESTAMP('%a %b %d %T %z %Y', retweeted_status.created_at) < TIMESTAMP(DATETIME({}, {}, {}, {}, {}, {}))
  -- AND retweeted_status.user.screen_name = 'realDonaldTrump' 
 
GROUP BY id
ORDER BY id
""".format(a.year, a.month, a.day, a.hour, a.minute, a.second, b.year, b.month, b.day, b.hour, b.minute, b.second)
    
    query_job = client.query(query)
    
    return query_job.result().to_dataframe()

In [7]:
first_group = get_trump_retweeter_stream(cluster_times[0])

In [8]:
first_group.head(2)

Unnamed: 0,id,retweeted_id,original_tweet_time,created_at,username,user_join_date,followers_count,user_id,text
0,1254021092093693952,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:01 +0000 2020,Unfollow_Donald,Thu Jan 26 00:11:09 +0000 2017,244,824409302769303552,RT @realDonaldTrump: Thank you to William Perr...
1,1254021092236288002,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:01 +0000 2020,idontdonald,Fri Jan 27 04:35:34 +0000 2017,45,824838233762697216,RT @realDonaldTrump: Thank you to William Perr...


In [10]:
first_group.groupby('retweeted_id').aggregate({'id':'nunique','username':'nunique','text':min})

Unnamed: 0_level_0,id,username,text
retweeted_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1254021070379769856,9108,9082,RT @realDonaldTrump: Thank you to William Perr...
1254022414058631170,16033,15971,RT @realDonaldTrump: .@TripleH is a total winner!
1254028869859971072,14185,14162,RT @realDonaldTrump: Thank you to the people o...
1254032243770736642,8878,8855,"RT @realDonaldTrump: Many testing sites are, a..."
1254050117767880706,21825,21774,RT @realDonaldTrump: Just spoke to Prime Minis...


In [11]:
second_group = get_trump_retweeter_stream(cluster_times[1])
second_group.head()

Unnamed: 0,id,retweeted_id,original_tweet_time,created_at,username,user_join_date,followers_count,user_id,text
0,1254021092093693952,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:01 +0000 2020,Unfollow_Donald,Thu Jan 26 00:11:09 +0000 2017,244,824409302769303552,RT @realDonaldTrump: Thank you to William Perr...
1,1254021092236288002,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:01 +0000 2020,idontdonald,Fri Jan 27 04:35:34 +0000 2017,45,824838233762697216,RT @realDonaldTrump: Thank you to William Perr...
2,1254021093033009153,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:01 +0000 2020,trump2ja,Tue Jan 17 10:41:30 +0000 2017,3486,821306443512967168,RT @realDonaldTrump: Thank you to William Perr...
3,1254021096032124928,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:02 +0000 2020,la_cates,Sun Nov 08 17:44:11 +0000 2009,15947,88467528,RT @realDonaldTrump: Thank you to William Perr...
4,1254021102243905538,1254021070379769856,Sat Apr 25 12:14:56 +0000 2020,Sat Apr 25 12:15:04 +0000 2020,Mike422422,Fri Feb 07 21:19:39 +0000 2020,1276,1225891864877899776,RT @realDonaldTrump: Thank you to William Perr...


In [12]:
third_group = get_trump_retweeter_stream(cluster_times[2])
third_group.head()

Unnamed: 0,id,retweeted_id,original_tweet_time,created_at,username,user_join_date,followers_count,user_id,text
0,1264734523020935168,1264734501357436928,Mon May 25 01:46:17 +0000 2020,Mon May 25 01:46:22 +0000 2020,Unfollow_Donald,Thu Jan 26 00:11:09 +0000 2017,248,824409302769303552,RT @realDonaldTrump: Sleepy Joe’s representati...
1,1264734523213991936,1264734501357436928,Mon May 25 01:46:17 +0000 2020,Mon May 25 01:46:22 +0000 2020,idontdonald,Fri Jan 27 04:35:34 +0000 2017,45,824838233762697216,RT @realDonaldTrump: Sleepy Joe’s representati...
2,1264734523222343683,1264734501357436928,Mon May 25 01:46:17 +0000 2020,Mon May 25 01:46:22 +0000 2020,important_trump,Mon Jan 08 18:02:15 +0000 2018,36,950427444053528578,RT @realDonaldTrump: Sleepy Joe’s representati...
3,1264734523608248323,1264734501961433088,Mon May 25 01:46:17 +0000 2020,Mon May 25 01:46:22 +0000 2020,Unfollow_Donald,Thu Jan 26 00:11:09 +0000 2017,248,824409302769303552,"RT @realDonaldTrump: ...vacationing, relaxing ..."
4,1264734527903203328,1264734501961433088,Mon May 25 01:46:17 +0000 2020,Mon May 25 01:46:23 +0000 2020,DJTMentionsBot,Tue Jul 18 02:23:19 +0000 2017,240,887135664319451137,"RT @realDonaldTrump: ...vacationing, relaxing ..."


In [24]:
def prepare_list_and_retweet_times_for_all_trump_retweeters(df):
    
    for retweeted_id, retweets in df.groupby('retweeted_id'):
        
        retweets = retweets.sort_values(by='id').set_index('id')
        
        retweets.columns = ['original_id','original_tweet_time','retweet_time','user','user_join_date','user_followers_count','user_id','text']
        
        filename = str(retweeted_id) + "-" + str(retweets.original_tweet_time.values[0].replace(" ","_"))+".csv"
        print(filename)
        
        retweets.to_csv(open('./retweets_of_trump/'+filename,'w'))
        
prepare_list_and_retweet_times_for_all_trump_retweeters(first_group)

1254021070379769856-Sat_Apr_25_12:14:56_+0000_2020.csv
1254022414058631170-Sat_Apr_25_12:20:16_+0000_2020.csv
1254028869859971072-Sat_Apr_25_12:45:55_+0000_2020.csv
1254032243770736642-Sat_Apr_25_12:59:20_+0000_2020.csv
1254050117767880706-Sat_Apr_25_14:10:21_+0000_2020.csv


In [25]:
prepare_list_and_retweet_times_for_all_trump_retweeters(first_group)
prepare_list_and_retweet_times_for_all_trump_retweeters(second_group)
prepare_list_and_retweet_times_for_all_trump_retweeters(third_group)

1254021070379769856-Sat_Apr_25_12:14:56_+0000_2020.csv
1254022414058631170-Sat_Apr_25_12:20:16_+0000_2020.csv
1254028869859971072-Sat_Apr_25_12:45:55_+0000_2020.csv
1254032243770736642-Sat_Apr_25_12:59:20_+0000_2020.csv
1254050117767880706-Sat_Apr_25_14:10:21_+0000_2020.csv
1254021070379769856-Sat_Apr_25_12:14:56_+0000_2020.csv
1254022414058631170-Sat_Apr_25_12:20:16_+0000_2020.csv
1254028869859971072-Sat_Apr_25_12:45:55_+0000_2020.csv
1254032243770736642-Sat_Apr_25_12:59:20_+0000_2020.csv
1254050117767880706-Sat_Apr_25_14:10:21_+0000_2020.csv
1254060573655998466-Sat_Apr_25_14:51:54_+0000_2020.csv
1254061979607711745-Sat_Apr_25_14:57:29_+0000_2020.csv
1264734501357436928-Mon_May_25_01:46:17_+0000_2020.csv
1264734501961433088-Mon_May_25_01:46:17_+0000_2020.csv
1264737293249794048-Mon_May_25_01:57:23_+0000_2020.csv
1264741308440158213-Mon_May_25_02:13:20_+0000_2020.csv
1264742006007439364-Mon_May_25_02:16:06_+0000_2020.csv
1264748330594287622-Mon_May_25_02:41:14_+0000_2020.csv
1264754622

Unnamed: 0_level_0,id,created_at,screen_name,user_join_date,followers_count,id_str,text
retweeted_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1254021070379769856,9675,9675,9675,9675,9675,9675,9675
1254022414058631170,16871,16871,16871,16871,16871,16871,16871
1254028869859971072,14938,14938,14938,14938,14938,14938,14938
1254032243770736642,9515,9515,9515,9515,9515,9515,9515
