In [1]:
import os
import json
import re
import pandas as pd
import csv
import collections
import numpy as np
from ipyfilechooser import FileChooser
from plotnine import ggplot, geom_point, geom_line, aes, geom_label, coord_cartesian, themes
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
from IPython.display import display
import random

In [2]:
# Create and display a FileChooser widget
fc = FileChooser('/home/breck/git/codatmo/red_team_blue_team/output')
display(fc)

FileChooser(path='/home/breck/git/codatmo/red_team_blue_team/output', filename='', title='HTML(value='', layou…

In [3]:
#@interact(num_samples=widgets.IntSlider(min=10, max=1000, step=10, value=10),
#          verbose = False)


def load_stan_run(num_samples, verbose):
    config = json.load(open(fc.selected))
    #print(config)

    stan_files = [f"{fc.selected_path}/{f}" for f in os.listdir(fc.selected_path) if f.endswith('csv')]

    fit = pd.DataFrame()
    last_index = 0
    for stan_csv in stan_files:
        if verbose:
            print(stan_csv)
        chain = pd.read_csv(stan_files[0], nrows=num_samples, comment='#')
        chain['index'] = range(last_index, last_index + len(chain))
        last_index = last_index + len(chain)
        fit = fit.append(chain)
    if verbose:
        print(fit.shape)
    RunData = collections.namedtuple('RunData', 'config fit')
    rd = RunData(config, fit)
    return(rd)
        
rd1 = interactive(load_stan_run, {'manual': True}, num_samples=widgets.IntSlider(min=10, max=1000, step=10, value=10),
          verbose = False)

display(rd1)


interactive(children=(IntSlider(value=10, description='num_samples', max=1000, min=10, step=10), Checkbox(valu…

In [24]:
def graph_coefs(plot, rd, num_samples):
    fit = rd.fit
    config = rd.config
    draws = fit.sample(n=num_samples)
    variables = []
    values = []
    draw = []
    vars_2_report = ['iDay1_est', 'beta', 'gamma', 'deathRate', 'lambda_twitter']
    for value in vars_2_report:
        variables.extend([value] * len(draws))
        values.extend(draws[value])
        draw.extend(draws['index'])

    df = pd.DataFrame({'variable' :  variables,
                      'value' : values,
                      'index' : draw})
    df['variable'] = pd.Categorical(df['variable'], categories=vars_2_report, 
                                    ordered=True)
    return(plot + geom_line(data=df, 
                     mapping=(aes(x='variable', y='value', group='index')))
          )

def graph_predictions(plot, rd, num_samples):
    fit = rd.fit
    config = rd.config
    sample = fit.sample(n=num_samples)
    time = []
    draws = []
    values = []
    variables = []
    vars_2_report = ['pred_tweets', 'pred_deaths']
    for var in vars_2_report:
        for t in range(1,292):
            values.extend(sample[f'{var}.{t}'])
            time.extend([t]*num_samples)
            variables.extend([var]*num_samples)
            draws.extend(range(0,num_samples))
    df = pd.DataFrame({'variable' : variables,
                       'draws' :  draws,
                       'value' : values,
                       'time' : time})
    plot = plot + geom_line(data=df[df['variable'] == 'pred_deaths'], 
                    mapping=(aes(x='time', y='value', group='draws')))
    plot = plot + geom_line(data=df[df['variable'] == 'pred_tweets'], 
              mapping=(aes(x='time', y='value', group='draws')))
    return(plot)

    

In [27]:
@interact(run_d = fixed(rd1.result), ylim=widgets.IntSlider(min=1, max=100000000, step=1000000, value=200000),
          add_sim = False,
          add_coefs = True,
          add_preds = True)
def graph(run_d, ylim, add_sim, add_coefs, add_preds):
   plot = (ggplot())
   if add_sim:
      (config, plot)
   if add_coefs:
      plot = graph_coefs(plot=plot, rd=run_d, num_samples=10)
   if add_preds:
      plot = graph_predictions(plot=plot, rd=run_d, num_samples=10 )
   print(plot)
           
      #  geom_point(size=0.25) +
      #  geom_line(data=, mapping=(aes(x='day', y='count')), color='red') +
      #  geom_line(data=df_long_brazil_tweets, mapping=(aes(x='day', y='count')), color='blue') +
      # geom_label(data = df_label, mapping=aes(label='label')) +
      
        
       # ))
      #coord_cartesian(ylim=(0, max_y),xlim=(0,n_days + 50))
    #graph_sim/graph_data
    #graph_ode
    #graph_predictions
    

interactive(children=(IntSlider(value=200000, description='ylim', max=100000000, min=1, step=1000000), Checkbo…

In [22]:
print(rd1.result.fit.columns[0:30])

Index(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__',
       'divergent__', 'energy__', 'gamma', 'beta', 'deathRate', 'iDay1_est',
       'lambda_twitter', 'normal_tweets_sd', 'normal_deaths_sd',
       'iDay_est_exp_rate', 'compartmentStartValues.1',
       'compartmentStartValues.2', 'compartmentStartValues.3',
       'compartmentStartValues.4', 'y.1.1', 'y.2.1', 'y.3.1', 'y.4.1', 'y.5.1',
       'y.6.1', 'y.7.1', 'y.8.1', 'y.9.1', 'y.10.1', 'y.11.1'],
      dtype='object')


In [None]:
#' Graph internal state counts for simulation and corresponding tweets for a run
#' of the runEval framework. Returns a ggplot geom_point element with x = days
#' y = count.
#' @param data_df one row of the run_df with simulation data added
#' @param hide_s Boolean to control whether to hide the s or susceptible counts
def graph_sim(config, plot) {
    if ('t' in config) { 
      sim_df = data.frame(day = 1:data_df$n_days, 
                        tweets = unlist(data_df$tweets), 
                        s = unlist(data_df$s),
                        i = unlist(data_df$i),
                        r = unlist(data_df$r),
                        t = unlist(data_df$t),
                        d = unlist(data_df$d))

      compartment_names <- c('s', 'i', 'r', 't', 'd')
    }
    else {
       sim_df = data.frame(day = 1:data_df$n_days, 
                        tweets = unlist(data_df$tweets), 
                        s = unlist(data_df$s),
                        i = unlist(data_df$i),
                        r = unlist(data_df$r),
                        d = unlist(data_df$d))
       compartment_names <- c('s', 'i', 'r', 'd')
    }
    if (hide_s) {
      compartment_names <- compartment_names[-1]
    }
    i_mean = mean(sim_df$i)
    gt_mean_days = sim_df[sim_df$i >= i_mean,]$day
    display_day = gt_mean_days[1]
    sim_long_df = gather(data = sim_df, key = "compartment_sim", value = "count",
                         all_of(c('tweets', compartment_names)))
    return(plot + 
             geom_point(data = sim_long_df, aes(y = count, 
                                                color = compartment_sim),
                      size = .5) + 
             geom_label_repel(data = subset(sim_long_df, 
                                            day == display_day), 
                              aes(label = compartment_sim,
                                  color = compartment_sim)))
    
}
