In [1]:
import pystache, plotly, json, random, sys, yaml, glob, os
import pandas as pd
import plotly.graph_objects as go

from google.cloud import bigquery
from google.oauth2 import service_account
from datetime import datetime

# Creates Interactive Diffusion Graphs

1. Reads yaml configuration files from `/home/jupyter/data/www/covid19-static-pages/configs`
2. Queries for retweets from Big Query
3. Processes and produces simplified JSON output of retweets.
3. Reads simplified JSON into Plotly and writes JSON configurations for Plotly Graphs and HTML files.

In [2]:
credentials = service_account.Credentials.from_service_account_file(
    '/home/jupyter/covid-19-data/.credentials/google-connector.json')
project_id = 'crypto-eon-164220'
client = bigquery.Client(credentials=credentials, project=project_id)

## 1. Get top retweets from Big Query

In [3]:
def get_top_N_retweeted_tweets(table, N=25):
    sys.stderr.write("Querying for top {} retweets in {}...".format(N, table))
    
    query_job = client.query("""
SELECT
  id AS retweet_id,
  min(original_tweet_id) AS orig_id,
  min(tweet_text) AS orig_text,
  min(source) AS source,
  min(times_retweeted_) AS times_retweeted,
  min(original_author) AS orig_author,
  min(original_followers) as orig_followers_count,
  min(original_posted) AS orig_posted,
  min(user.screen_name) AS retweeter,
  min(user.followers_count) AS retweeter_followers_count,
  min(PARSE_TIMESTAMP('%a %b %d %T %z %Y', created_at)) AS retweet_timestamp
FROM
  `crypto-eon-164220.tweets.{TABLE}` tweets,
  (
  SELECT
    MIN(retweeted_status.id) AS original_tweet_id,
    MIN(retweeted_status.text) AS tweet_text,
    MIN(retweeted_status.user.screen_name) AS original_author,
    MIN(retweeted_status.user.followers_count) AS original_followers,
    MIN(PARSE_TIMESTAMP('%a %b %d %T %z %Y', retweeted_status.created_at)) AS original_posted,
    COUNT(DISTINCT(id)) AS times_retweeted_
  FROM
    `crypto-eon-164220.tweets.{TABLE}`
  WHERE
    retweeted_status IS NOT NULL
    AND retweeted_status.id >= (SELECT MIN(id) FROM `crypto-eon-164220.tweets.{TABLE}`)
  GROUP BY
    retweeted_status.id
  ORDER BY times_retweeted_ DESC
  LIMIT {N}
  ) topRetweets
WHERE
  topRetweets.original_tweet_id = tweets.retweeted_status.id
GROUP by id
order by orig_id
""".format(TABLE=table,
           N=N) )
    
    sys.stderr.write("done; creating dataframe\n")
    return query_job.result().to_dataframe()

def get_tweets_retweeted_more_than_X_times(table, threshold=2000):
    sys.stderr.write("Querying for tweets retweeted more than {} times in {}...".format(threshold, table))
    
    query_job = client.query("""
SELECT
  id AS retweet_id,
  min(original_tweet_id) AS orig_id,
  min(tweet_text) AS orig_text,
  min(times_retweeted_) AS times_retweeted,
  min(original_author) AS orig_author,
  min(original_followers) as orig_followers_count,
  min(original_posted) AS orig_posted,
  min(user.screen_name) AS retweeter,
  min(user.followers_count) AS retweeter_followers_count,
  min(PARSE_TIMESTAMP('%a %b %d %T %z %Y', created_at)) AS retweet_timestamp
FROM
  `crypto-eon-164220.tweets.{TABLE}` tweets,
  (
  SELECT
    MIN(retweeted_status.id) AS original_tweet_id,
    MIN(retweeted_status.text) AS tweet_text,
    MIN(retweeted_status.user.screen_name) AS original_author,
    MIN(retweeted_status.user.followers_count) AS original_followers,
    MIN(PARSE_TIMESTAMP('%a %b %d %T %z %Y', retweeted_status.created_at)) AS original_posted,
    COUNT(DISTINCT(id)) AS times_retweeted_
  FROM
    `crypto-eon-164220.tweets.{TABLE}`
  WHERE
    retweeted_status IS NOT NULL
    AND retweeted_status.id >= (SELECT MIN(id) FROM `crypto-eon-164220.tweets.{TABLE}`)
  GROUP BY
    retweeted_status.id
  ORDER BY times_retweeted_ DESC
  ) topRetweets
WHERE
  topRetweets.original_tweet_id = tweets.retweeted_status.id
  AND topRetweets.times_retweeted_ > {TIMES_RETWEETED_THRESHOLD} 
GROUP by id
order by orig_id
""".format(TABLE=table,
           TIMES_RETWEETED_THRESHOLD=threshold) )
    
    sys.stderr.write("done; creating dataframe\n")
    return query_job.result().to_dataframe()

In [4]:
def create_dataframe_for_plotly(df, fileName):

    retweet_counts_by_id = list(df.orig_id.value_counts(ascending=False).keys())

    count = 0;
    to_return = pd.DataFrame()

    for original_id, retweets in df.groupby('orig_id'):
        count += 1;
        topN = retweet_counts_by_id.index(original_id) + 1 # The Top N tweet...
        print("Processing Tweet# {} - TopN [{}] - ID: {})".format(count, topN, original_id))

        sorted_retweets = retweets.sort_values(by='retweet_id').reindex()

        original_tweet = pd.DataFrame([{
            'id': sorted_retweets.iloc[0].orig_id,
            'created_at': sorted_retweets.iloc[0].orig_posted,
            'username': sorted_retweets.iloc[0].orig_author,
            'followers_count': sorted_retweets.iloc[0].orig_followers_count
        }])

        text = sorted_retweets.iloc[0].orig_text

        interested_rows = sorted_retweets[['retweet_id','retweet_timestamp','retweeter','retweeter_followers_count','source']]
        interested_rows.columns = ['id','created_at','username','followers_count','source']

        #Add the first row for the original tweet
        x = pd.concat([original_tweet, interested_rows]).reset_index(drop = True) 

        x['followers_count_cumsum'] = x.followers_count.cumsum()
        x['text'] = text
        x['top_N'] = topN
        
        x.created_at = x.created_at.apply(lambda x: x.isoformat())

        to_return = pd.concat([x, to_return])

    current_datestamp = datetime.today().strftime('%Y-%m-%d')
    with open(fileName +"_"+current_datestamp+'.json','w') as f:
        json.dump(to_return.sort_values(by='top_N').to_dict('records'), f)

In [5]:
def get_data(config):
    
    df = get_top_N_retweeted_tweets(config['table'], N= (config.get("topN") or 10) )
    
    create_dataframe_for_plotly(df, config['data'])

## 2. Create Plots

In [6]:
COLORS = plotly.colors.qualitative.Alphabet

In [7]:
def view_colors():
    sns.palplot(COLORS)

def normalize(values, desired_bounds):
    actual_bounds = (min(values), max(values))
    result = [desired_bounds[0] + (x - actual_bounds[0]) *
            (desired_bounds[1] - desired_bounds[0]) / \
            (actual_bounds[1] - actual_bounds[0]) for x in values]
    return [round(x,2) for x in result]

In [8]:
# Read the latest diffusion data...
def read_dataframe_from_file(CONFIG):

    #Get the latest file
    latest_file = sorted(glob.glob(CONFIG['data']+"*.json"))[-1]
    
    sys.stderr.write("Loading "+latest_file+"...")
    to_plot = json.load(open(latest_file,'r')) #Could put error handling here if necessary

    df = pd.DataFrame(to_plot)
    df['timestamp'] = df.created_at.apply(lambda t: pd.Timestamp(t))
    sys.stderr.write(("Read {:,} retweets\n".format(len(to_plot))))

    return df

In [9]:
def calculate_self_retweets(input_df):

    sys.stderr.write("Discounting self-retweets: [")
    df = pd.DataFrame.copy(input_df, deep=True)
    
    tweet_data = {}

    for topN in df.top_N.unique():
        newCounter = 0
        subValue = 0
        for idx, row in df[df.top_N == topN].sort_values(by='id').iterrows():
            if newCounter==0:
                thisUser = row.username
                origFollowerCount = row.followers_count
                origTweetID = str(row.id)
                tweet_data[str(topN)] = {
                    'text' : row.text,
                    'user' : thisUser,
                    'rank' : int(topN),
                    'color': COLORS[topN%len(COLORS)],
                    'id'   : origTweetID,
                    'self-rt' : [],
                    'time' : row.timestamp.isoformat()
                }
            else:
                if row.username == thisUser:
                    if row.followers_count > origFollowerCount:
                        subValue = origFollowerCount
                    else:
                        subValue = row.followers_count
                    tweet_data[str(topN)]['self-rt'].append(
#                         "2020-04-19T01:53:07+00:00"
                        {'x':row.timestamp.isoformat(), 'subValue':subValue}
                    )                    
            if subValue > 0:
                df.loc[idx,'followers_count_cumsum'] = row.followers_count_cumsum - subValue
            newCounter += 1;
        sys.stderr.write("{},".format(topN))
        
    sys.stderr.write("] done\n".format(topN))
    
    return (df, tweet_data)

In [10]:
def custom_label(row):
    return "{}: {} followers".format(row.username, row.followers_count)

def buildPlotlyGraph(df, topN=25):
    sys.stderr.write("Plotting top {} [".format(topN))
    
#     df['globalScaledMarker'] = df.followers_count.apply(lambda x: np.log2(x+1))
    df['globalScaledMarker'] = normalize(list(df.followers_count), (10,100))

    fig = go.Figure()

    for topNidx in range(1,topN+1):

        #Should be sorted safely?
        plot_df = df[df.top_N==topNidx].sort_values(by='id')

        if len(plot_df) > 0:

            tweetId   = str(plot_df.head(1).id.values[0])
            tweetText = plot_df.head(1).text.values[0]

            if pd.isna(tweetText):
                raise "No Tweet Text on First Entry"

            color = COLORS[topNidx%len(COLORS)]

            fig.add_trace(go.Scattergl(
                name = str(topNidx), #rank
                x    = plot_df.timestamp, 
                y    = plot_df.followers_count_cumsum,
                mode = 'markers+lines',
                marker = dict(
                    size  = plot_df.globalScaledMarker, #normalize(list(plot_df.followers_count), (10,35)),
                    color = color,
                    opacity = 0.5,
                    line=dict(
                        color='white',
                        width=0.4
                    ),
                ),
                line=dict(
                    color=color,
                    width=0.75,
                ),
                hovertemplate ='%{x} - %{text}',
                text = list(plot_df.apply(lambda row: custom_label(row), axis=1)),
                meta={'u':plot_df.username,
                      'f':plot_df.followers_count },
                showlegend = True
            ))
            sys.stderr.write(".")

    fig.update_layout(
        autosize=False,
        width=1400,
        height=600,
        margin=dict(
            t=1,r=50,l=1,b=1
        ),
        legend=dict(
            x=1,
            y=1,
            traceorder="normal",
            font=dict(
                family="sans-serif",
                size=12,
                color="black"
            ),
    ),
        yaxis_title="Potential Audience Exposure",)

    sys.stderr.write("] Done\n")
    return fig

In [11]:
# # For testing (initialize all below functions first (ah, jupyter)): 
# # c = full_run('/home/jupyter/data/www/covid19-static-pages/configs/cdc.yaml', query=False)

# # Then change the function as much as needed and access with:
# fig = buildPlotlyGraph(c['no_self_retweets'], (c.get('TopN') or 25))
# # fig.show()

# #Or write the JSON
# figJSON = json.loads(plotly.io.to_json(fig))
# figJSON['tweets'] = c['tweets']
# with open(STATIC_PAGES_ROOT +"/docs/"+ c['JSON'],'w') as outFile: 
#     json.dump(figJSON, outFile)
# buildSingleStaticPlotlyPage(c)

In [12]:
def buildSingleStaticPlotlyPage(CONFIG):
    sys.stderr.write("Writing HTML... ")

    main_template = open(STATIC_PAGES_ROOT + '/templates/plotly_js_template.html').read()

    with open(STATIC_PAGES_ROOT + "/" + CONFIG['output'],'w') as outFile:
        outFile.write(pystache.render(main_template, CONFIG))
    sys.stderr.write(" view at: http://epic.tweetsonamap.com/covid19-static-pages/"+CONFIG['output']+"\n")

<br><br><hr><br><br>

# Runtime


In [13]:
# GLOBAL VARIABLES?
STATIC_PAGES_ROOT = '/home/jupyter/data/www/covid19-static-pages'

In [14]:
def full_run(yaml_config, query=True, plot=True, write_html=True):
    print("Building page for: {}".format(yaml_config))
    config = yaml.load(open(yaml_config,'r'),
                       Loader=yaml.FullLoader)
    
    if query:
        get_data(config)
    
    if plot:
        config['df'] = read_dataframe_from_file(config)
    
        topN = config.get('topN') or 25
    
        config['no_self_retweets'], config['tweets'] = calculate_self_retweets(config['df'] )

        config['fig'] = buildPlotlyGraph(config['no_self_retweets'], topN=topN)
    
        figJSON = json.loads(plotly.io.to_json(config['fig']))
        figJSON['tweets'] = config['tweets']
        
        with open(STATIC_PAGES_ROOT +"/docs/"+ config['JSON'],'w') as outFile: 
            json.dump(figJSON, outFile)
            
    if write_html:
        buildSingleStaticPlotlyPage(config)
    
    return config

In [18]:
# Do some testing?
# full_run('/home/jupyter/data/www/covid19-static-pages/configs/covid-maps.yaml')

x = full_run('/home/jupyter/data/www/covid19-static-pages/configs/cdc.yaml', query=True, plot=True)


Building page for: /home/jupyter/data/www/covid19-static-pages/configs/cdc.yaml


Querying for top 25 retweets in cdc_userstreams...done; creating dataframe


Processing Tweet# 1 - TopN [24] - ID: 1251617761639489536)
Processing Tweet# 2 - TopN [3] - ID: 1252632573798473731)
Processing Tweet# 3 - TopN [11] - ID: 1252993080174796800)
Processing Tweet# 4 - TopN [8] - ID: 1253085285161803777)
Processing Tweet# 5 - TopN [6] - ID: 1253341609443250176)
Processing Tweet# 6 - TopN [15] - ID: 1253374836317044736)
Processing Tweet# 7 - TopN [2] - ID: 1253422154256756738)
Processing Tweet# 8 - TopN [13] - ID: 1253468948068290560)
Processing Tweet# 9 - TopN [14] - ID: 1253705772569067520)
Processing Tweet# 10 - TopN [1] - ID: 1253742258853199872)
Processing Tweet# 11 - TopN [7] - ID: 1253793094841090056)
Processing Tweet# 12 - TopN [10] - ID: 1253818998942314496)
Processing Tweet# 13 - TopN [18] - ID: 1254129055219298308)
Processing Tweet# 14 - TopN [17] - ID: 1254857606859898881)
Processing Tweet# 15 - TopN [20] - ID: 1255971941019762694)
Processing Tweet# 16 - TopN [5] - ID: 1256309675269615616)
Processing Tweet# 17 - TopN [22] - ID: 12566554511957155

Loading /home/jupyter/data/diffusion/cdc_userstreams_top_25_retweeted_2020-06-09.json...Read 57,572 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/cdc.html


### Load each configuration and do a full run

Loading from `/home/jupyter/data/www/covid19-static-pages/configs/`

Be sure to set `query=True` if we actually need to run the update; but let's only do that weekly because of cost.

In [15]:
pages = glob.glob(STATIC_PAGES_ROOT + "/configs/*.yaml")
print("Found {} configurations".format(len(pages)))

Found 8 configurations


In [16]:
for configuration_file in pages:
    x = full_run(configuration_file, query=True, plot=True)

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/cdc-keywords.yaml


Querying for top 25 retweets in cdc...done; creating dataframe


Processing Tweet# 1 - TopN [18] - ID: 1239964479544156161)
Processing Tweet# 2 - TopN [12] - ID: 1240317386059853825)
Processing Tweet# 3 - TopN [5] - ID: 1240380132226928640)
Processing Tweet# 4 - TopN [13] - ID: 1240567553249824769)
Processing Tweet# 5 - TopN [10] - ID: 1240669719595831296)
Processing Tweet# 6 - TopN [20] - ID: 1241183544849969153)
Processing Tweet# 7 - TopN [2] - ID: 1241367245143642113)
Processing Tweet# 8 - TopN [19] - ID: 1241883781293920256)
Processing Tweet# 9 - TopN [17] - ID: 1241910439841234945)
Processing Tweet# 10 - TopN [8] - ID: 1243304246759559174)
Processing Tweet# 11 - TopN [7] - ID: 1243592209418592258)
Processing Tweet# 12 - TopN [22] - ID: 1243915217689612288)
Processing Tweet# 13 - TopN [3] - ID: 1244056534583312384)
Processing Tweet# 14 - TopN [23] - ID: 1244263133516095488)
Processing Tweet# 15 - TopN [9] - ID: 1246128671372558336)
Processing Tweet# 16 - TopN [11] - ID: 1246891081612103685)
Processing Tweet# 17 - TopN [24] - ID: 1252306443497353

Loading /home/jupyter/data/diffusion/cdc_top_25_retweeted_2020-06-16.json...Read 255,589 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/cdc-keyword.html
Querying for top 25 retweets in covid_maps...

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/covid-maps.yaml


done; creating dataframe


Processing Tweet# 1 - TopN [11] - ID: 1247525392904118274)
Processing Tweet# 2 - TopN [12] - ID: 1247981587204935681)
Processing Tweet# 3 - TopN [22] - ID: 1248356504924753925)
Processing Tweet# 4 - TopN [25] - ID: 1248812705634373635)
Processing Tweet# 5 - TopN [23] - ID: 1251104709844234240)
Processing Tweet# 6 - TopN [4] - ID: 1251238755819814912)
Processing Tweet# 7 - TopN [13] - ID: 1252058477054308353)
Processing Tweet# 8 - TopN [5] - ID: 1252975821133840384)
Processing Tweet# 9 - TopN [10] - ID: 1253298630884241409)
Processing Tweet# 10 - TopN [21] - ID: 1255077875885187072)
Processing Tweet# 11 - TopN [17] - ID: 1255086086281281538)
Processing Tweet# 12 - TopN [6] - ID: 1255086554579525634)
Processing Tweet# 13 - TopN [20] - ID: 1255128464274763788)
Processing Tweet# 14 - TopN [14] - ID: 1255487754890285056)
Processing Tweet# 15 - TopN [16] - ID: 1256220002245652480)
Processing Tweet# 16 - TopN [19] - ID: 1260571831657996305)
Processing Tweet# 17 - TopN [3] - ID: 12631348155383

Loading /home/jupyter/data/diffusion/covid-maps_top_25_retweeted_2020-06-16.json...Read 112,082 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/covid-maps.html
Querying for top 25 retweets in covid_charts_and_graphics...

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/covid-charts.yaml


done; creating dataframe


Processing Tweet# 1 - TopN [19] - ID: 1243606217567744000)
Processing Tweet# 2 - TopN [12] - ID: 1244567505177210880)
Processing Tweet# 3 - TopN [14] - ID: 1245467940390133760)
Processing Tweet# 4 - TopN [5] - ID: 1251105543248363522)
Processing Tweet# 5 - TopN [1] - ID: 1251792857284751360)
Processing Tweet# 6 - TopN [7] - ID: 1251809060497829888)
Processing Tweet# 7 - TopN [8] - ID: 1252998685346148358)
Processing Tweet# 8 - TopN [6] - ID: 1253033795642671107)
Processing Tweet# 9 - TopN [13] - ID: 1253044684789886977)
Processing Tweet# 10 - TopN [15] - ID: 1255236263097126912)
Processing Tweet# 11 - TopN [23] - ID: 1255880290179956736)
Processing Tweet# 12 - TopN [11] - ID: 1257265014618173440)
Processing Tweet# 13 - TopN [18] - ID: 1259598334010036224)
Processing Tweet# 14 - TopN [20] - ID: 1259625130512318464)
Processing Tweet# 15 - TopN [9] - ID: 1260610012130545665)
Processing Tweet# 16 - TopN [22] - ID: 1260976692375564290)
Processing Tweet# 17 - TopN [24] - ID: 1261679137498058

Loading /home/jupyter/data/diffusion/covid-charts_top_25_retweeted_2020-06-16.json...Read 192,443 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/covid-charts.html
Querying for top 25 retweets in cdc_userstreams...

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/cdc.yaml


done; creating dataframe


Processing Tweet# 1 - TopN [3] - ID: 1252632573798473731)
Processing Tweet# 2 - TopN [11] - ID: 1252993080174796800)
Processing Tweet# 3 - TopN [8] - ID: 1253085285161803777)
Processing Tweet# 4 - TopN [6] - ID: 1253341609443250176)
Processing Tweet# 5 - TopN [15] - ID: 1253374836317044736)
Processing Tweet# 6 - TopN [2] - ID: 1253422154256756738)
Processing Tweet# 7 - TopN [13] - ID: 1253468948068290560)
Processing Tweet# 8 - TopN [14] - ID: 1253705772569067520)
Processing Tweet# 9 - TopN [1] - ID: 1253742258853199872)
Processing Tweet# 10 - TopN [7] - ID: 1253793094841090056)
Processing Tweet# 11 - TopN [10] - ID: 1253818998942314496)
Processing Tweet# 12 - TopN [18] - ID: 1254129055219298308)
Processing Tweet# 13 - TopN [17] - ID: 1254857606859898881)
Processing Tweet# 14 - TopN [22] - ID: 1255971941019762694)
Processing Tweet# 15 - TopN [5] - ID: 1256309675269615616)
Processing Tweet# 16 - TopN [24] - ID: 1256655451195715585)
Processing Tweet# 17 - TopN [16] - ID: 12610443726211891

Loading /home/jupyter/data/diffusion/cdc_userstreams_top_25_retweeted_2020-06-16.json...Read 58,174 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/cdc.html
Querying for top 10 retweets in donald_trump...

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/trump.yaml


done; creating dataframe


Processing Tweet# 1 - TopN [4] - ID: 1259561821289226248)
Processing Tweet# 2 - TopN [8] - ID: 1260735992727769096)
Processing Tweet# 3 - TopN [5] - ID: 1262361817373958145)
Processing Tweet# 4 - TopN [2] - ID: 1266354084036194306)
Processing Tweet# 5 - TopN [1] - ID: 1267129644228247552)
Processing Tweet# 6 - TopN [7] - ID: 1267515702652678144)
Processing Tweet# 7 - TopN [10] - ID: 1267609190140436481)
Processing Tweet# 8 - TopN [3] - ID: 1267992040010285057)
Processing Tweet# 9 - TopN [9] - ID: 1268497685553823745)
Processing Tweet# 10 - TopN [6] - ID: 1268671097010094082)


Loading /home/jupyter/data/diffusion/trump_top_10_retweeted_2020-06-16.json...Read 927,204 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,] done
Plotting top 10 [..........] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/trump.html
Querying for top 25 retweets in us_governors...

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/us_governors.yaml


done; creating dataframe


Processing Tweet# 1 - TopN [20] - ID: 1253356421577654273)
Processing Tweet# 2 - TopN [18] - ID: 1253358477415788548)
Processing Tweet# 3 - TopN [4] - ID: 1253729749177970689)
Processing Tweet# 4 - TopN [13] - ID: 1253784682262650881)
Processing Tweet# 5 - TopN [25] - ID: 1256982073195343878)
Processing Tweet# 6 - TopN [11] - ID: 1258404381722017793)
Processing Tweet# 7 - TopN [15] - ID: 1258879674437914624)
Processing Tweet# 8 - TopN [10] - ID: 1260273563275231234)
Processing Tweet# 9 - TopN [5] - ID: 1262257093031202816)
Processing Tweet# 10 - TopN [23] - ID: 1262776230786580480)
Processing Tweet# 11 - TopN [8] - ID: 1263163614971588608)
Processing Tweet# 12 - TopN [16] - ID: 1263190873908621316)
Processing Tweet# 13 - TopN [6] - ID: 1263814853921251333)
Processing Tweet# 14 - TopN [2] - ID: 1264558205683843073)
Processing Tweet# 15 - TopN [17] - ID: 1264924734472491009)
Processing Tweet# 16 - TopN [3] - ID: 1266036686053613574)
Processing Tweet# 17 - TopN [24] - ID: 1266765209391284

Loading /home/jupyter/data/diffusion/us_governors_top_25_retweeted_2020-06-16.json...Read 578,243 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/us-governors.html


Building page for: /home/jupyter/data/www/covid19-static-pages/configs/covid-data.yaml


Querying for top 25 retweets in covid_data_representations...done; creating dataframe


Processing Tweet# 1 - TopN [13] - ID: 1244382336533266432)
Processing Tweet# 2 - TopN [23] - ID: 1245612562730737667)
Processing Tweet# 3 - TopN [16] - ID: 1246629670570901504)
Processing Tweet# 4 - TopN [15] - ID: 1247191480835485698)
Processing Tweet# 5 - TopN [5] - ID: 1247291781978431489)
Processing Tweet# 6 - TopN [20] - ID: 1247917397400526848)
Processing Tweet# 7 - TopN [14] - ID: 1251352745824550912)
Processing Tweet# 8 - TopN [7] - ID: 1252239099198672899)
Processing Tweet# 9 - TopN [11] - ID: 1253455078071173128)
Processing Tweet# 10 - TopN [19] - ID: 1254461123753054209)
Processing Tweet# 11 - TopN [18] - ID: 1257096834218168321)
Processing Tweet# 12 - TopN [24] - ID: 1257437963467862019)
Processing Tweet# 13 - TopN [10] - ID: 1260359025704800264)
Processing Tweet# 14 - TopN [6] - ID: 1262208199152881664)
Processing Tweet# 15 - TopN [17] - ID: 1262569508553752579)
Processing Tweet# 16 - TopN [4] - ID: 1262589109517733888)
Processing Tweet# 17 - TopN [12] - ID: 12634683000691

Loading /home/jupyter/data/diffusion/covid-data_top_25_retweeted_2020-06-16.json...Read 445,289 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/covid-data.html
Querying for top 25 retweets in world_health_organization...

Building page for: /home/jupyter/data/www/covid19-static-pages/configs/who.yaml


done; creating dataframe


Processing Tweet# 1 - TopN [6] - ID: 1250555076156043264)
Processing Tweet# 2 - TopN [8] - ID: 1250938650310914049)
Processing Tweet# 3 - TopN [21] - ID: 1251191291381080064)
Processing Tweet# 4 - TopN [16] - ID: 1251454794260189184)
Processing Tweet# 5 - TopN [4] - ID: 1251571221998653440)
Processing Tweet# 6 - TopN [24] - ID: 1252257397793423360)
Processing Tweet# 7 - TopN [13] - ID: 1252260563343880194)
Processing Tweet# 8 - TopN [10] - ID: 1253034861033656320)
Processing Tweet# 9 - TopN [2] - ID: 1253464496443658249)
Processing Tweet# 10 - TopN [17] - ID: 1253917386371104769)
Processing Tweet# 11 - TopN [1] - ID: 1253995619921821698)
Processing Tweet# 12 - TopN [11] - ID: 1254160937805926405)
Processing Tweet# 13 - TopN [23] - ID: 1254897255292776457)
Processing Tweet# 14 - TopN [25] - ID: 1255609083446657024)
Processing Tweet# 15 - TopN [20] - ID: 1255935908978794497)
Processing Tweet# 16 - TopN [18] - ID: 1257586848198688768)
Processing Tweet# 17 - TopN [15] - ID: 125793794842475

Loading /home/jupyter/data/diffusion/who_top_25_retweeted_2020-06-16.json...Read 65,833 retweets
Discounting self-retweets: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,] done
Plotting top 25 [.........................] Done
Writing HTML...  view at: http://epic.tweetsonamap.com/covid19-static-pages/docs/who.html


<br><br><br><hr><br><br><br>

In [21]:
print("Be sure to run the following command to copy the data files to Google buckets:\n\n")
print("gsutil -m cp -r /home/jupyter/data/www/covid19-static-pages/docs/data gs://epic-covid19/diffusion-graphs/")

Be sure to run the following command to copy the data files to Google buckets:


gsutil -m cp -r /home/jupyter/data/www/covid19-static-pages/docs/data gs://epic-covid19/diffusion-graphs/
