### Country Vx Throughput Analysis
 
**Note:**

* Inputs:
  - covax_supply_chain_analytics.analysis_vx_throughput_data
  - covax_supply_chain_analytics.covax_sca_country_dimension
  - covax_supply_chain_analytics.country_dimension.iso_mapping
  - covax_supply_chain_analytics.vx_received_supply
  - covax_supply_chain_analytics.analysis_vx_throughput_supply
  - covax_supply_chain_analytics.owid_covid_data
  - covax_supply_chain_analytics.population_coverage_measures
  - covax_supply_chain_analytics.country_characteristics
* Output:
  - covax_supply_chain_analytics.analysis_vx_throughput_output_daily

* Libraries: 
  - Python

* Built by: Jeremy Cooper
* Current owner: Jeremy Cooper
* Initial Build Date: 04/28/2021
* Latest Build Date: 01/03/2022

### Environment Management

In [0]:
# dbutils.widgets.removeAll()

In [0]:
# # Dataset Name, will be used for the Metastore Table, Folder Name for transformed outputs
# dbutils.widgets.text("Dataset", "dataset_name")

# # Project Name will be used for folder Name for transformed outputs
# dbutils.widgets.text("Project", "project_name")

# # Team name should be consistent with the Blob Storage Container
# dbutils.widgets.text("Partner","partner_name")

# # Team name should be consistent with the Blob Storage Container
# dbutils.widgets.text("Source","data_source")

# dbutils.widgets.text("iso_code", "")

#### Notebook Setup

##### Import any libraries or nested notebooks

In [0]:
import pandas as pd

from delta.tables import *
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql.window import Window

##### Initialize File Paths

In [0]:
storage_root = "/mnt/"+dbutils.widgets.get("Partner")+"/"
storage_branch = "/"+dbutils.widgets.get("Source")+"/" +dbutils.widgets.get("Dataset")

raw_storage_path = storage_root + "raw" +storage_branch
dbfs_raw_storage_path = "/dbfs"+raw_storage_path

transformed_storage_path = storage_root + "transformed" +storage_branch
dbfs_transformed_storage_path = "/dbfs"+transformed_storage_path

print(raw_storage_path)
print(transformed_storage_path)

##### Define any functions

In [0]:
# function takes the following parameters and will peform a pandas ffill() on a Spark DataFrame

def fill_forward(df, id_column, key_column, fill_column, new_column):

    # Fill null's with last *non null* value in the window
    ff = df.withColumn(
        new_column,
        last(fill_column, True) # True: fill with last non-null
        .over(
            Window.partitionBy(id_column)
            .orderBy(key_column)
            .rowsBetween(-sys.maxsize, 0))
        )
    
    return(ff)
  

### Get Data

In [0]:
iso_code = dbutils.widgets.get("iso_code")

In [0]:
# use this to get iso_code from country
iso_mapping = spark.sql("SELECT * FROM country_dimension.iso_mapping")

# get country level information
country_dimension = spark.sql("SELECT * FROM covax_supply_chain_analytics.covax_sca_country_dimension")

# get supply data
# received_supply = spark.sql("SELECT * FROM covax_supply_chain_analytics.vx_received_supply")
received_supply = spark.sql("SELECT * FROM covax_supply_chain_analytics.linksbridge_delivered_supply")

# get uti_supply
uti_supply = spark.sql("SELECT * FROM covax_supply_chain_analytics.analysis_vx_throughput_supply")

# get dose administration data for comparison
owid = spark.sql("SELECT * FROM covax_supply_chain_analytics.owid_covid_data")

# get population numbers
pop = spark.sql("SELECT * FROM covax_supply_chain_analytics.population_coverage_measures").select('iso_code', 'population_total_both')

# get primary data
who = spark.sql("SELECT * FROM covax_supply_chain_analytics.analysis_vx_throughput_data_cleaned")

# get country characteristics
cc = spark.sql("SELECT * FROM covax_supply_chain_analytics.country_characteristics")

### Transformation

In [0]:
country = country_dimension \
  .select('iso_code', 'country_name_friendly', 'sub_region_name', 'region_name', 'wb_income_group', 'is_amc92', 'affiliation', 'min_vx_rollout_date', 'first_covax_arrival_date', 'first_vx_shipment_received_date') \
#   .filter(col('is_amc92')==1)

display(country)

iso_code,country_name_friendly,sub_region_name,region_name,wb_income_group,is_amc92,affiliation,min_vx_rollout_date,first_covax_arrival_date,first_vx_shipment_received_date
TCA,Turks & Caicos Islands,Latin America and the Caribbean,Americas,H,0,,2021-01-11,,
UKR,Ukraine,Eastern Europe,Europe,LM,1,,2021-02-24,,2021-02-23 00:00:00
ARE,United Arab Emirates,Western Asia,Asia,H,0,,2020-12-14,,
TON,Tonga,Polynesia,Oceania,UM,1,,2021-04-14,,2021-03-31 00:00:00
TJK,Tajikistan,Central Asia,Asia,LM,1,,2021-04-07,,2021-03-08 00:00:00
UGA,Uganda,Sub-Saharan Africa,Africa,L,1,African Union,2021-03-10,,2021-03-05 00:00:00
TKL,Tokelau,Polynesia,Oceania,UNK,0,,2021-07-20,,
TKM,Turkmenistan,Central Asia,Asia,UM,0,,2021-02-24,,
THA,Thailand,South-eastern Asia,Asia,UM,0,,2021-02-28,,
TUV,Tuvalu,Polynesia,Oceania,UM,1,,2021-04-12,,2021-04-08 00:00:00


In [0]:
owid1 = owid \
  .select('iso_code', 'date', 'total_vaccinations_int') \
  .toDF('iso_code', 'date', 'total_doses_owid')

display(owid1)

iso_code,date,total_doses_owid
ARM,2020-02-03,
ARM,2020-02-04,
ARM,2020-02-05,
ARM,2020-02-06,
ARM,2020-02-07,
ARM,2020-02-08,
ARM,2020-02-09,
ARM,2020-02-10,
ARM,2020-02-11,
ARM,2020-02-12,


In [0]:
# supply side
# aggregate to received date
# generate cumulative supply
received_supply1 = received_supply \
  .filter(col('iso_code').isNotNull()) \
  .withColumn('received_date', to_date(col('date'))) \
  .groupBy('iso_code', 'received_date').agg(sum('doses_delivered').alias('doses_received')) \
  .withColumn('cumulative_doses_received', sum('doses_received').over(Window.partitionBy('iso_code').orderBy('received_date').rowsBetween(-sys.maxsize, 0))) \
  .withColumn('max_cumulative_doses_received', max('cumulative_doses_received').over(Window.partitionBy('iso_code', 'received_date'))) \
  .filter((col('max_cumulative_doses_received')==col('cumulative_doses_received')) | col('max_cumulative_doses_received').isNull()) \
  .drop('max_cumulative_doses_received') \
  .filter(col('doses_received')!=0) \
  .toDF('iso_code', 'date', 'doses_received', 'cumulative_doses_received')

display(received_supply1.orderBy('iso_code', 'received_date'))
display(received_supply1.filter(col('iso_code')==iso_code))

iso_code,date,doses_received,cumulative_doses_received
ABW,2021-03-01,28343.0,28343.0
ABW,2021-04-01,56209.0,84552.0
ABW,2021-05-01,41018.0,125570.0
ABW,2021-06-01,14412.0,139982.0
ABW,2021-07-01,12378.0,152360.0
ABW,2021-08-01,12285.0,164645.0
ABW,2021-09-01,7152.0,171797.0
ABW,2021-10-01,3550.0,175347.0
ABW,2021-11-01,2461.0,177808.0
ABW,2021-12-01,2328.0,180136.0


iso_code,date,doses_received,cumulative_doses_received
UGA,2021-03-01,964000.0,964000.0
UGA,2021-06-01,175200.0,1139200.0
UGA,2021-07-01,586080.0,1725280.0
UGA,2021-08-01,428080.0,2153360.0
UGA,2021-09-01,3312050.0,5465410.0
UGA,2021-10-01,3447400.0,8912810.0
UGA,2021-11-01,8178280.0,17091090.0
UGA,2021-12-01,16834650.0,33925740.0
UGA,2022-01-01,1684800.0,35610540.0
UGA,2022-02-01,2431660.0,38042200.0


In [0]:
# alternate supply, sourced by Marta
uti_supply1 = fill_forward(uti_supply, 'iso_code', 'date', 'cumulative_doses_received_uti', 'cumulative_doses_received_uti')

uti_supply1 = uti_supply1 \
  .withColumn('cumulative_doses_received_uti', col('cumulative_doses_received_uti').cast(DoubleType())) \
  .withColumn('date', to_date(col('date'))) \
  .fillna(0) \
  .withColumn('doses_received', lag(col('cumulative_doses_received_uti')).over(Window.partitionBy('iso_code').orderBy('date'))) \
  .withColumn('doses_received', col('cumulative_doses_received_uti') - col('doses_received')) \
  .withColumn('doses_received', when(col('doses_received').isNull(), col('cumulative_doses_received_uti')).otherwise(col('doses_received'))) \
  .select('iso_code', 'date', 'doses_received', 'cumulative_doses_received_uti') \
  .toDF('iso_code', 'date', 'doses_received', 'cumulative_doses_received') \

display(uti_supply1)

iso_code,date,doses_received,cumulative_doses_received
AFG,2021-03-01,968000.0,968000.0
AFG,2021-07-01,34088000.0,35056000.0
AFG,2021-08-01,800.0,35056800.0
AFG,2021-09-01,0.0,35056800.0
AFG,2021-10-01,-29808000.0,5248800.0
AFG,2021-11-01,151200.0,5400000.0
AFG,2021-12-01,3004000.0,8404000.0
AGO,2021-05-01,1509620.0,1509620.0
AGO,2021-06-01,101310.0,1610930.0
AGO,2021-07-01,224765.0,1835695.0


In [0]:
# define supply threshold
supply_threshold = 0.0

In [0]:
df_flags = who.select('iso_code', 'date', 'is_latest_week_reported', 'manual_adjustment', 'is_data_error', 'to_remove')

display(df_flags)

iso_code,date,is_latest_week_reported,manual_adjustment,is_data_error,to_remove
SOM,2021-09-01,1,0,0,0
FRA,2021-07-18,0,1,0,0
ABW,2021-09-17,0,0,0,0
ABW,2021-10-29,0,0,0,0
FRA,2021-04-25,0,1,0,0
SOM,2021-06-02,1,0,0,0
SOM,2021-05-30,1,0,0,0
ABW,2021-02-26,0,0,0,0
ABW,2022-02-18,0,0,0,0
FRA,2022-02-27,0,1,0,0


In [0]:
# filter out records we want to drop
# generate prev total_doses value
# generate days since prev reported date, if it's the first value > 0, then calc use days since vx intro to smooth things out
# generate daily_rate_per_week, our key measurement
w = Window.partitionBy('iso_code').orderBy('date')
df1 = who \
  .filter(col('to_remove')==0) \
  .filter(col('to_remove_1st')==0) \
  .filter(col('to_remove_2nd')==0) \

df1 = df1 \
  .join(country, 'iso_code', how='left') \
  .filter(col('country_name_friendly').isNotNull()) \
  .withColumn('min_date', min('date').over(Window.partitionBy('iso_code'))) \
  .withColumn('min_vx_rollout_date', when(col('min_vx_rollout_date')>=col('min_date'), col('min_date')-1).otherwise(col('min_vx_rollout_date'))) \
  .withColumn('days_since_vx_intro', datediff(col('date'), col('min_vx_rollout_date'))) \
  .withColumn('date_prev', lag(col('date')).over(w)) \
  .withColumn('days_since_vx_intro', datediff(col('date'), col('min_vx_rollout_date'))) \
  .withColumn('days_since_prev', when(col('date_prev').isNull(), col('days_since_vx_intro')).otherwise(datediff(col('date'), col('date_prev'))))

# create date range from vx rollout date to max report date
df_daterange = df1 \
  .withColumn('date_max', max('date').over(Window.partitionBy('iso_code'))) \
  .withColumn('min_vx_rollout_date', to_date(col('min_vx_rollout_date'))) \
  .select('iso_code', 'min_vx_rollout_date', 'date_max') \
  .drop_duplicates() \
  .withColumn('date', explode(expr('sequence(min_vx_rollout_date, date_max, interval 1 day)'))) \
  .select('iso_code', 'date') \

# subset to input measures
df2 = df1 \
  .select('iso_code', 'date', 'min_vx_rollout_date', 'total_doses', 'at_least_one_dose', 'fully_vaccinated', 'persons_booster_add_dose') \

# join and set vx rollout date to 0 as starting point (if currently null)
df_inter = df_daterange \
  .join(df2, ['iso_code', 'date'], how='left') \
  .withColumn('date_start', min('date').over(Window.partitionBy('iso_code'))) \
  .withColumn('total_doses', when((col('date')==col('date_start')) & (col('total_doses').isNull()), 0).otherwise(col('total_doses'))) \
  .withColumn('at_least_one_dose', when((col('date')==col('date_start')) & (col('at_least_one_dose').isNull()), 0).otherwise(col('at_least_one_dose'))) \
  .withColumn('fully_vaccinated', when((col('date')==col('date_start')) & (col('fully_vaccinated').isNull()), 0).otherwise(col('fully_vaccinated'))) \
  .withColumn('persons_booster_add_dose', when((col('date')==col('date_start')) & (col('persons_booster_add_dose').isNull()), 0).otherwise(col('persons_booster_add_dose'))) \

# create placeholder columns of correct data type
df_inter = df_inter \
  .withColumn('total_doses_int', col('total_doses').cast(IntegerType())) \
  .withColumn('at_least_one_dose_int', col('at_least_one_dose').cast(IntegerType())) \
  .withColumn('fully_vaccinated_int', col('fully_vaccinated').cast(IntegerType())) \
  .withColumn('persons_booster_add_dose_int', col('persons_booster_add_dose').cast(IntegerType())) \

# pandas_udf requires a schema to be defined
schema = df_inter.schema

# interpolation function
@pandas_udf(schema, functionType=PandasUDFType.GROUPED_MAP)
def interpolate_measures(df):
    df.sort_values(by=['iso_code', 'date'], ascending=True, inplace=True)
    df['total_doses_int'] = df['total_doses_int'].interpolate(method='linear', limit_direction='forward')
    df['at_least_one_dose_int'] = df['at_least_one_dose_int'].interpolate(method='linear', limit_direction='forward')
    df['fully_vaccinated_int'] = df['fully_vaccinated_int'].interpolate(method='linear', limit_direction='forward')
    df['persons_booster_add_dose_int'] = df['persons_booster_add_dose_int'].interpolate(method='linear', limit_direction='forward')
    return df

df_inter = df_inter.groupBy('iso_code').apply(interpolate_measures)

df3 = df_inter \
  .join(country.select('iso_code', 'country_name_friendly'), 'iso_code', how='left') \
  .withColumn('is_original_reported', when(col('min_vx_rollout_date').isNotNull(), 1).otherwise(0)) \
  .drop('total_doses', 'at_least_one_dose', 'fully_vaccinated', 'persons_booster_add_dose', 'date_start') \
  .select('iso_code', 'date', 'country_name_friendly', 'min_vx_rollout_date', 'total_doses_int', 'at_least_one_dose_int', 'fully_vaccinated_int', 'persons_booster_add_dose_int', 'is_original_reported') \
  .toDF('iso_code', 'date', 'country_name_friendly', 'min_vx_rollout_date', 'total_doses', 'at_least_one_dose', 'fully_vaccinated', 'persons_booster_add_dose', 'is_original_reported') \
  .withColumn('min_vx_rollout_date', min('min_vx_rollout_date').over(Window.partitionBy('iso_code')))

# join demand and supply, full join, fill forward, and then filter to "left join"
# take out 10% for wastage assumption

# df4 = df3 \
#   .join(uti_supply1, ['iso_code', 'date'], how='full')
# df4 = fill_forward(df4, 'iso_code', 'date', 'cumulative_doses_received', 'cumulative_doses_received')
# df4 = df4.filter(col('country_name_friendly').isNotNull()) \

df4 = df3 \
  .join(received_supply1, ['iso_code', 'date'], how='full')
df4 = fill_forward(df4, 'iso_code', 'date', 'cumulative_doses_received', 'cumulative_doses_received')
df4 = df4.withColumn('cumulative_doses_received', col('cumulative_doses_received')*.9)
df4 = df4.filter(col('country_name_friendly').isNotNull()) \

# join supply with demand
df4 = df4 \
  .withColumn('cumulative_doses_received', when(col('cumulative_doses_received') < col('total_doses'), col('total_doses')).otherwise(col('cumulative_doses_received'))) \
  .withColumn('total_doses_prev_week', lag(col('total_doses')).over(w)) \
  .withColumn('effective_supply', col('cumulative_doses_received') - col('total_doses_prev_week')) \
  .withColumn('cumulative_supply_20', col('cumulative_doses_received')*supply_threshold) \
  .withColumn('supply_constrained', when(col('effective_supply') < col('cumulative_supply_20'), 1) \
              .when(col('effective_supply') >= col('cumulative_supply_20'), 0)) \
  .drop('total_doses_prev_week')

display(df4)

iso_code,date,country_name_friendly,min_vx_rollout_date,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,is_original_reported,doses_received,cumulative_doses_received,effective_supply,cumulative_supply_20,supply_constrained
ABW,2021-02-17,Aruba,2021-02-17,0,0,0,0,0,,,,,
ABW,2021-02-18,Aruba,2021-02-17,6,6,0,0,0,,,,,
ABW,2021-02-19,Aruba,2021-02-17,12,12,0,0,1,,,,,
ABW,2021-02-20,Aruba,2021-02-17,424,424,0,0,0,,,,,
ABW,2021-02-21,Aruba,2021-02-17,836,836,0,0,0,,,,,
ABW,2021-02-22,Aruba,2021-02-17,1248,1248,0,0,0,,,,,
ABW,2021-02-23,Aruba,2021-02-17,1660,1660,0,0,0,,,,,
ABW,2021-02-24,Aruba,2021-02-17,2072,2072,0,0,0,,,,,
ABW,2021-02-25,Aruba,2021-02-17,2484,2484,0,0,0,,,,,
ABW,2021-02-26,Aruba,2021-02-17,2896,2896,0,0,1,,,,,


In [0]:
# key
# _td = total_doses
# _1d = at_least_one_dose
# _fv = fully_vaccinated

# define n days
days_in_weeks4 = 27
days_in_weeks8 = 55

# window partitions
w_rolling_avg_4 = Window.partitionBy('iso_code').orderBy('date').rowsBetween(-days_in_weeks4, 0)
w_rolling_avg_8 = Window.partitionBy('iso_code').orderBy('date').rowsBetween(-days_in_weeks8, 0)
w_rolling_avg_4_lastweek = Window.partitionBy('iso_code').orderBy('date').rowsBetween(-34, -7)
w_rolling_avg_4_lastmonth = Window.partitionBy('iso_code').orderBy('date').rowsBetween(-55, -28)


# create measure set for total_doses
var1 = 'total_doses'
var2 = 'daily_rate_td'
var3 = 'rolling_4_week_avg_td'
var4 = 'rolling_8_week_avg_td'
var5 = 'rolling_4_week_avg_td_lastweek'
var6 = 'rolling_4_week_avg_td_lastmonth'
var7 = 'max_rolling_4_week_avg_td'
var8 = 'med_rolling_4_week_avg_td'

# calculate
df5 = df4 \
  .withColumn('prev_total', lag(col(var1)).over(Window.partitionBy('iso_code').orderBy('date'))) \
  .withColumn(var2, when((col(var1) - col('prev_total')).isNull(), col(var1)).otherwise((col(var1) - col('prev_total')))) \
  .withColumn(var3, avg(var2).over(w_rolling_avg_4)) \
  .withColumn(var4, avg(var2).over(w_rolling_avg_8)) \
  .withColumn(var5, avg(var2).over(w_rolling_avg_4_lastweek)) \
  .withColumn(var6, avg(var2).over(w_rolling_avg_4_lastmonth)) \
  .withColumn(var7, max(var3).over(Window.partitionBy('iso_code'))) \
  .withColumn(var8, expr('percentile_approx({0}, .5)'.format(var3)).over(Window.partitionBy('iso_code'))) \
  .drop('prev_total')


# create measure set for at_least_one_dose
var1 = 'at_least_one_dose'
var2 = 'daily_rate_1d'
var3 = 'rolling_4_week_avg_1d'
var4 = 'rolling_8_week_avg_1d'
var5 = 'rolling_4_week_avg_1d_lastweek'
var6 = 'rolling_4_week_avg_1d_lastmonth'
var7 = 'max_rolling_4_week_avg_1d'
var8 = 'med_rolling_4_week_avg_1d'

# calculate
df6 = df5 \
  .withColumn('prev_total', lag(col(var1)).over(Window.partitionBy('iso_code').orderBy('date'))) \
  .withColumn(var2, when((col(var1) - col('prev_total')).isNull(), col(var1)).otherwise((col(var1) - col('prev_total')))) \
  .withColumn(var3, avg(var2).over(w_rolling_avg_4)) \
  .withColumn(var4, avg(var2).over(w_rolling_avg_8)) \
  .withColumn(var5, avg(var2).over(w_rolling_avg_4_lastweek)) \
  .withColumn(var6, avg(var2).over(w_rolling_avg_4_lastmonth)) \
  .withColumn(var7, max(var3).over(Window.partitionBy('iso_code'))) \
  .withColumn(var8, expr('percentile_approx({0}, .5)'.format(var3)).over(Window.partitionBy('iso_code'))) \
  .drop('prev_total')


# create measure set for fully_vaccinated
var1 = 'fully_vaccinated'
var2 = 'daily_rate_fv'
var3 = 'rolling_4_week_avg_fv'
var4 = 'rolling_8_week_avg_fv'
var5 = 'rolling_4_week_avg_fv_lastweek'
var6 = 'rolling_4_week_avg_fv_lastmonth'
var7 = 'max_rolling_4_week_avg_fv'
var8 = 'med_rolling_4_week_avg_fv'

# calculate
df7 = df6 \
  .withColumn('prev_total', lag(col(var1)).over(Window.partitionBy('iso_code').orderBy('date'))) \
  .withColumn(var2, when((col(var1) - col('prev_total')).isNull(), col(var1)).otherwise((col(var1) - col('prev_total')))) \
  .withColumn(var3, avg(var2).over(w_rolling_avg_4)) \
  .withColumn(var4, avg(var2).over(w_rolling_avg_8)) \
  .withColumn(var5, avg(var2).over(w_rolling_avg_4_lastweek)) \
  .withColumn(var6, avg(var2).over(w_rolling_avg_4_lastmonth)) \
  .withColumn(var7, max(var3).over(Window.partitionBy('iso_code'))) \
  .withColumn(var8, expr('percentile_approx({0}, .5)'.format(var3)).over(Window.partitionBy('iso_code'))) \
  .drop('prev_total')


df8 = df7

display(df8.orderBy('iso_code', 'date'))

iso_code,date,country_name_friendly,min_vx_rollout_date,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,is_original_reported,doses_received,cumulative_doses_received,effective_supply,cumulative_supply_20,supply_constrained,daily_rate_td,rolling_4_week_avg_td,rolling_8_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,daily_rate_1d,rolling_4_week_avg_1d,rolling_8_week_avg_1d,rolling_4_week_avg_1d_lastweek,rolling_4_week_avg_1d_lastmonth,max_rolling_4_week_avg_1d,med_rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,rolling_8_week_avg_fv,rolling_4_week_avg_fv_lastweek,rolling_4_week_avg_fv_lastmonth,max_rolling_4_week_avg_fv,med_rolling_4_week_avg_fv
ABW,2021-02-17,Aruba,2021-02-17,0,0,0,0,0,,,,,,0,0.0,0.0,,,1639.392857142857,264.0,0,0.0,0.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-18,Aruba,2021-02-17,6,6,0,0,0,,,,,,6,3.0,3.0,,,1639.392857142857,264.0,6,3.0,3.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-19,Aruba,2021-02-17,12,12,0,0,1,,,,,,6,4.0,4.0,,,1639.392857142857,264.0,6,4.0,4.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-20,Aruba,2021-02-17,424,424,0,0,0,,,,,,412,106.0,106.0,,,1639.392857142857,264.0,412,106.0,106.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-21,Aruba,2021-02-17,836,836,0,0,0,,,,,,412,167.2,167.2,,,1639.392857142857,264.0,412,167.2,167.2,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-22,Aruba,2021-02-17,1248,1248,0,0,0,,,,,,412,208.0,208.0,,,1639.392857142857,264.0,412,208.0,208.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-23,Aruba,2021-02-17,1660,1660,0,0,0,,,,,,412,237.14285714285717,237.14285714285717,,,1639.392857142857,264.0,412,237.14285714285717,237.14285714285717,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143
ABW,2021-02-24,Aruba,2021-02-17,2072,2072,0,0,0,,,,,,412,259.0,259.0,0.0,,1639.392857142857,264.0,412,259.0,259.0,0.0,,1266.607142857143,89.57142857142857,0,0.0,0.0,0.0,,1027.321428571429,81.42857142857143
ABW,2021-02-25,Aruba,2021-02-17,2484,2484,0,0,0,,,,,,412,276.0,276.0,3.0,,1639.392857142857,264.0,412,276.0,276.0,3.0,,1266.607142857143,89.57142857142857,0,0.0,0.0,0.0,,1027.321428571429,81.42857142857143
ABW,2021-02-26,Aruba,2021-02-17,2896,2896,0,0,1,,,,,,412,289.6,289.6,4.0,,1639.392857142857,264.0,412,289.6,289.6,4.0,,1266.607142857143,89.57142857142857,0,0.0,0.0,0.0,,1027.321428571429,81.42857142857143


In [0]:
# plot the interpolated series
print(iso_code)
display(df8.filter(col('iso_code')==iso_code).orderBy('date'))

iso_code,date,country_name_friendly,min_vx_rollout_date,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,is_original_reported,doses_received,cumulative_doses_received,effective_supply,cumulative_supply_20,supply_constrained,daily_rate_td,rolling_4_week_avg_td,rolling_8_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,daily_rate_1d,rolling_4_week_avg_1d,rolling_8_week_avg_1d,rolling_4_week_avg_1d_lastweek,rolling_4_week_avg_1d_lastmonth,max_rolling_4_week_avg_1d,med_rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,rolling_8_week_avg_fv,rolling_4_week_avg_fv_lastweek,rolling_4_week_avg_fv_lastmonth,max_rolling_4_week_avg_fv,med_rolling_4_week_avg_fv
UGA,2021-03-10,Uganda,2021-03-10,0,0,0,0,0,,867600.0,,0.0,,0,0.0,0.0,,,183164.67857142855,27581.964285714286,0,0.0,0.0,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-11,Uganda,2021-03-10,405,405,0,0,0,,867600.0,867600.0,0.0,0.0,405,202.5,202.5,,,183164.67857142855,27581.964285714286,405,202.5,202.5,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-12,Uganda,2021-03-10,810,810,0,0,0,,867600.0,867195.0,0.0,0.0,405,270.0,270.0,,,183164.67857142855,27581.964285714286,405,270.0,270.0,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-13,Uganda,2021-03-10,1215,1215,0,0,1,,867600.0,866790.0,0.0,0.0,405,303.75,303.75,,,183164.67857142855,27581.964285714286,405,303.75,303.75,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-14,Uganda,2021-03-10,3719,1104,0,0,0,,867600.0,866385.0,0.0,0.0,2504,743.8,743.8,,,183164.67857142855,27581.964285714286,-111,220.8,220.8,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-15,Uganda,2021-03-10,6223,994,0,0,0,,867600.0,863881.0,0.0,0.0,2504,1037.1666666666667,1037.1666666666667,,,183164.67857142855,27581.964285714286,-110,165.66666666666666,165.66666666666666,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-16,Uganda,2021-03-10,8727,883,0,0,0,,867600.0,861377.0,0.0,0.0,2504,1246.7142857142858,1246.7142857142858,,,183164.67857142855,27581.964285714286,-111,126.14285714285714,126.14285714285714,,,164794.92857142858,18112.17857142857,0,0.0,0.0,,,311943.3214285714,4508.928571428572
UGA,2021-03-17,Uganda,2021-03-10,11231,773,0,0,0,,867600.0,858873.0,0.0,0.0,2504,1403.875,1403.875,0.0,,183164.67857142855,27581.964285714286,-110,96.625,96.625,0.0,,164794.92857142858,18112.17857142857,0,0.0,0.0,0.0,,311943.3214285714,4508.928571428572
UGA,2021-03-18,Uganda,2021-03-10,13735,662,0,0,0,,867600.0,856369.0,0.0,0.0,2504,1526.111111111111,1526.111111111111,202.5,,183164.67857142855,27581.964285714286,-111,73.55555555555556,73.55555555555556,202.5,,164794.92857142858,18112.17857142857,0,0.0,0.0,0.0,,311943.3214285714,4508.928571428572
UGA,2021-03-19,Uganda,2021-03-10,16240,552,0,0,0,,867600.0,853865.0,0.0,0.0,2505,1624.0,1624.0,270.0,,183164.67857142855,27581.964285714286,-110,55.2,55.2,270.0,,164794.92857142858,18112.17857142857,0,0.0,0.0,0.0,,311943.3214285714,4508.928571428572


In [0]:
df9 = df8 \
  .join(cc, 'iso_code', how='inner') \
  .join(owid1, ['iso_code', 'date'],  how='left') \
  .withColumn('rolling_4_week_avg_td_per100', (col('rolling_4_week_avg_td')/col('population'))*100) \
  .withColumn('rolling_8_week_avg_td_per100', (col('rolling_8_week_avg_td')/col('population'))*100) \
  .withColumn('max_rolling_4_week_avg_td_per100', (col('max_rolling_4_week_avg_td')/col('population'))*100) \

df_date_week = df9 \
  .filter(col('is_original_reported')==1) \
  .select('iso_code', 'date', 'total_doses') \
  .withColumn('date_week', to_date(next_day(date_add(col('date'), -1), 'Fri'))) \
  .withColumn('week_max', max(col('total_doses')).over(Window.partitionBy('iso_code', 'date_week'))) \
  .filter(col('week_max')==col('total_doses')) \
  .withColumn('date_max', max(col('date')).over(Window.partitionBy('iso_code', 'date_week'))) \
  .filter(col('date_max')==col('date')) \
  .withColumn('max_date_week', max(col('date_week')).over(Window.partitionBy('iso_code'))) \
  .withColumn('is_latest', when(col('max_date_week')==col('date_week'), 1).otherwise(0)) \
  .withColumn('week_num', row_number().over(Window.partitionBy('iso_code').orderBy('date_week'))) \
  .select('iso_code', 'date', 'date_week', 'max_date_week', 'is_latest', 'week_num')

display(df_date_week.groupBy('iso_code', 'date_week').agg(count('*')).filter(col('count(1)')>1))

df10 = df9 \
  .join(df_date_week, ['iso_code', 'date'], how='left') \
  .join(df_flags.select('iso_code', 'is_latest_week_reported').drop_duplicates(), 'iso_code', how='left') \

df11 = df10 \
  .withColumn('prev_week_val', lag(col('total_doses')).over(Window.partitionBy('iso_code').orderBy('date'))) \
  .withColumn('no_change_from_previous', when(col('total_doses')==col('prev_week_val'), 1).otherwise(0)) \
  .drop('prev_week_val')

display(df11.orderBy('iso_code', 'date'))

iso_code,date_week,count(1)


iso_code,date,country_name_friendly,min_vx_rollout_date,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,is_original_reported,doses_received,cumulative_doses_received,effective_supply,cumulative_supply_20,supply_constrained,daily_rate_td,rolling_4_week_avg_td,rolling_8_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,daily_rate_1d,rolling_4_week_avg_1d,rolling_8_week_avg_1d,rolling_4_week_avg_1d_lastweek,rolling_4_week_avg_1d_lastmonth,max_rolling_4_week_avg_1d,med_rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,rolling_8_week_avg_fv,rolling_4_week_avg_fv_lastweek,rolling_4_week_avg_fv_lastmonth,max_rolling_4_week_avg_fv,med_rolling_4_week_avg_fv,entity_name,population,who_region,unicef_region,world_bank_region,world_bank_income_group,covax_status,amc_status,covax_participation_modality,procurement_mechanism,date_accessed,total_doses_owid,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,date_week,max_date_week,is_latest,week_num,is_latest_week_reported,no_change_from_previous
ABW,2021-02-17,Aruba,2021-02-17,0,0,0,0,0,,,,,,0,0.0,0.0,,,1639.392857142857,264.0,0,0.0,0.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.0,0.0,1.5355008683877425,,,,,0,0
ABW,2021-02-18,Aruba,2021-02-17,6,6,0,0,0,,,,,,6,3.0,3.0,,,1639.392857142857,264.0,6,3.0,3.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.0028098832961804,0.0028098832961804,1.5355008683877425,,,,,0,0
ABW,2021-02-19,Aruba,2021-02-17,12,12,0,0,1,,,,,,6,4.0,4.0,,,1639.392857142857,264.0,6,4.0,4.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.0037465110615739,0.0037465110615739,1.5355008683877425,2021-02-19,2022-04-29,0.0,1.0,0,0
ABW,2021-02-20,Aruba,2021-02-17,424,424,0,0,0,,,,,,412,106.0,106.0,,,1639.392857142857,264.0,412,106.0,106.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.0992825431317085,0.0992825431317085,1.5355008683877425,,,,,0,0
ABW,2021-02-21,Aruba,2021-02-17,836,836,0,0,0,,,,,,412,167.2,167.2,,,1639.392857142857,264.0,412,167.2,167.2,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.1566041623737894,0.1566041623737894,1.5355008683877425,,,,,0,0
ABW,2021-02-22,Aruba,2021-02-17,1248,1248,0,0,0,,,,,,412,208.0,208.0,,,1639.392857142857,264.0,412,208.0,208.0,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.1948185752018432,0.1948185752018432,1.5355008683877425,,,,,0,0
ABW,2021-02-23,Aruba,2021-02-17,1660,1660,0,0,0,,,,,,412,237.14285714285717,237.14285714285717,,,1639.392857142857,264.0,412,237.14285714285717,237.14285714285717,,,1266.607142857143,89.57142857142857,0,0.0,0.0,,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.2221145843647389,0.2221145843647389,1.5355008683877425,,,,,0,0
ABW,2021-02-24,Aruba,2021-02-17,2072,2072,0,0,0,,,,,,412,259.0,259.0,0.0,,1639.392857142857,264.0,412,259.0,259.0,0.0,,1266.607142857143,89.57142857142857,0,0.0,0.0,0.0,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.2425865912369106,0.2425865912369106,1.5355008683877425,,,,,0,0
ABW,2021-02-25,Aruba,2021-02-17,2484,2484,0,0,0,,,,,,412,276.0,276.0,3.0,,1639.392857142857,264.0,412,276.0,276.0,3.0,,1266.607142857143,89.57142857142857,0,0.0,0.0,0.0,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.2585092632485997,0.2585092632485997,1.5355008683877425,,,,,0,0
ABW,2021-02-26,Aruba,2021-02-17,2896,2896,0,0,1,,,,,,412,289.6,289.6,4.0,,1639.392857142857,264.0,412,289.6,289.6,4.0,,1266.607142857143,89.57142857142857,0,0.0,0.0,0.0,,1027.321428571429,81.42857142857143,Aruba,106766,Other,Other,LAC,HIC,Non-COVAX,Non-COVAX,,,2021-09-14,,0.271247400857951,0.271247400857951,1.5355008683877425,2021-02-26,2022-04-29,0.0,2.0,0,0


In [0]:
df12 = df11 \
  .select('iso_code', 'entity_name', 'population', 'date', 'is_original_reported', 
          'cumulative_doses_received', 'effective_supply',
          'total_doses_owid', 'total_doses', 'at_least_one_dose', 'fully_vaccinated', 'persons_booster_add_dose',
          'daily_rate_td', 'rolling_4_week_avg_td', 'max_rolling_4_week_avg_td', 'med_rolling_4_week_avg_td', 
          'rolling_4_week_avg_td_lastweek', 'rolling_4_week_avg_td_lastmonth', 'rolling_8_week_avg_td', 
          'rolling_4_week_avg_td_per100', 'rolling_8_week_avg_td_per100', 'max_rolling_4_week_avg_td_per100',
          'daily_rate_1d', 'rolling_4_week_avg_1d', 'daily_rate_fv', 'rolling_4_week_avg_fv', 
          'is_latest', 'is_latest_week_reported', 'no_change_from_previous')

df12 = df12.join(who.select('iso_code', 'date_accessed').drop_duplicates(), 'iso_code', how='left')

display(df12.orderBy('iso_code', 'date').filter(col('iso_code')==iso_code))
display(df12.filter(col('is_latest')==1))
display(df12.filter(col('is_latest_weeK_reported')==1))

iso_code,entity_name,population,date,is_original_reported,cumulative_doses_received,effective_supply,total_doses_owid,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,daily_rate_td,rolling_4_week_avg_td,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,rolling_8_week_avg_td,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,daily_rate_1d,rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,is_latest,is_latest_week_reported,no_change_from_previous,date_accessed
UGA,Uganda,45741007,2021-03-10,0,867600.0,,338.3333333333333,0,0,0,0,0,0.0,183164.67857142855,27581.964285714286,,,0.0,0.0,0.0,0.4004386667119694,0,0.0,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-11,0,867600.0,867600.0,676.6666666666666,405,405,0,0,405,202.5,183164.67857142855,27581.964285714286,,,202.5,0.0004427099735692308,0.0004427099735692308,0.4004386667119694,405,202.5,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-12,0,867600.0,867195.0,1015.0,810,810,0,0,405,270.0,183164.67857142855,27581.964285714286,,,270.0,0.0005902799647589744,0.0005902799647589744,0.4004386667119694,405,270.0,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-13,1,867600.0,866790.0,1215.0,1215,1215,0,0,405,303.75,183164.67857142855,27581.964285714286,,,303.75,0.0006640649603538462,0.0006640649603538462,0.4004386667119694,405,303.75,0,0.0,0.0,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-14,0,867600.0,866385.0,2028.0,3719,1104,0,0,2504,743.8,183164.67857142855,27581.964285714286,,,743.8,0.0016261119918063,0.0016261119918063,0.4004386667119694,-111,220.8,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-15,0,867600.0,863881.0,2841.0,6223,994,0,0,2504,1037.1666666666667,183164.67857142855,27581.964285714286,,,1037.1666666666667,0.0022674766794414,0.0022674766794414,0.4004386667119694,-110,165.66666666666666,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-16,0,867600.0,861377.0,4684.0,8727,883,0,0,2504,1246.7142857142858,183164.67857142855,27581.964285714286,,,1246.7142857142858,0.0027255943134664,0.0027255943134664,0.4004386667119694,-111,126.14285714285714,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-17,0,867600.0,858873.0,7920.0,11231,773,0,0,2504,1403.875,183164.67857142855,27581.964285714286,0.0,,1403.875,0.0030691825389852,0.0030691825389852,0.4004386667119694,-110,96.625,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-18,0,867600.0,856369.0,13027.0,13735,662,0,0,2504,1526.111111111111,183164.67857142855,27581.964285714286,202.5,,1526.111111111111,0.0033364178254997,0.0033364178254997,0.4004386667119694,-111,73.55555555555556,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-19,0,867600.0,853865.0,17543.0,16240,552,0,0,2505,1624.0,183164.67857142855,27581.964285714286,270.0,,1624.0,0.0035504246769206,0.0035504246769206,0.4004386667119694,-110,55.2,0,0.0,,1,0,2022-05-09


iso_code,entity_name,population,date,is_original_reported,cumulative_doses_received,effective_supply,total_doses_owid,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,daily_rate_td,rolling_4_week_avg_td,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,rolling_8_week_avg_td,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,daily_rate_1d,rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,is_latest,is_latest_week_reported,no_change_from_previous,date_accessed
ABW,Aruba,106766.0,2022-04-29,1,170293.0,29.0,170323.0,170293,88370,81923,0,29,34.0,1639.392857142857,264.0,42.96428571428572,48.142857142857146,41.07142857142857,0.0318453440233782,0.0384686403643749,1.5355008683877425,16,16.107142857142858,14,17.892857142857142,1,0,0,2022-05-09
AFG,Afghanistan,38928346.0,2022-05-08,1,11671965.0,5649193.0,,6023425,5320381,4670950,0,653,4004.964285714286,51831.142857142855,11520.107142857143,4873.357142857143,8993.892857142857,6499.428571428572,0.0102880412276295,0.0166958764994242,0.1331449912029215,633,3518.5714285714284,651,3682.285714285714,1,1,0,2022-05-09
ALB,Albania,2877800.0,2022-04-24,1,2837346.0,1425.0,2837346.0,2837346,1311182,1235239,290925,1425,1720.571428571429,13059.42857142857,6314.678571428572,1793.5,2251.1071428571427,1985.8392857142856,0.059787734678276,0.0690054654845467,0.4537990329914717,303,355.3928571428572,400,435.07142857142856,1,0,0,2022-05-09
ARE,United Arab Emirates,9890402.0,2022-05-05,1,24749854.0,10021.0,24699920.53333333,24749854,9991089,9763292,4995473,10021,6287.535714285715,76720.53571428571,51958.92857142857,6732.357142857143,10170.357142857143,8228.94642857143,0.0635720945850908,0.083201334269036,0.7757069501753894,0,0.0,0,1534.357142857143,1,0,0,2022-05-09
ARM,Armenia,2963234.0,2022-04-17,1,2479824.0,325779.0,2154689.0,2154689,1131339,987673,41766,644,1702.0357142857142,15883.42857142857,3836.5,2186.428571428572,3569.3571428571427,2635.6964285714284,0.0574384511748216,0.0889466180723975,0.5360166821597137,147,452.32142857142856,401,1019.8928571428572,1,0,0,2022-05-09
ATG,Antigua and Barbuda,97929.0,2022-04-29,1,224775.0,89170.0,,135605,63952,61815,9838,0,5.785714285714286,1289.4736842105262,241.8571428571429,364.9642857142857,371.8571428571428,188.82142857142856,0.0059080704241994,0.1928146193379168,1.316743440870964,0,1.2142857142857142,0,4.571428571428571,1,0,1,2022-05-09
AUS,Australia,25499884.0,2022-05-01,1,57759823.0,42173.0,57235026.0,57759823,22279275,21528799,13426952,42173,40498.32142857143,282137.5714285714,115068.10714285714,44373.96428571428,71230.10714285714,55864.21428571428,0.1588176692434029,0.2190763467226529,1.1064268818970762,1571,2143.035714285714,5330,8507.32142857143,1,0,0,2022-05-09
AZE,Azerbaijan,10139175.0,2022-04-24,1,13513244.4,18174.400000000373,13627588.0,13499692,5362072,4843392,3294230,4622,7911.607142857143,64913.21428571428,29178.10714285714,9341.57142857143,15137.107142857143,11524.357142857143,0.0780300876832399,0.1136616849285779,0.6402218551875698,445,579.8214285714286,281,702.1071428571429,1,0,0,2022-05-09
BFA,Burkina Faso,20903273.0,2022-05-01,1,5526585.0,2597596.0,2928989.0,2928989,2434140,1539151,0,0,18102.5,27639.285714285717,3936.785714285714,19042.35714285714,3288.1071428571427,10695.30357142857,0.0866012705283043,0.0511656886049786,0.1322246794283637,594,10089.107142857143,530,11081.57142857143,1,1,1,2022-05-09
BGD,Bangladesh,164689383.0,2022-04-30,1,488507668.2,230876440.2,,257793811,128568620,116425878,12799313,162583,316767.53571428574,1811898.142857143,444306.1428571429,715956.9642857143,1179345.392857143,748056.4642857143,0.1923424145163539,0.4542226406214141,1.1001912265693188,10412,25093.42857142857,52480,173990.0357142857,1,0,0,2022-05-09


iso_code,entity_name,population,date,is_original_reported,cumulative_doses_received,effective_supply,total_doses_owid,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,daily_rate_td,rolling_4_week_avg_td,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,rolling_8_week_avg_td,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,daily_rate_1d,rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,is_latest,is_latest_week_reported,no_change_from_previous,date_accessed
AFG,Afghanistan,38928346,2021-02-22,0,450000.0,,0.0,0,0,0,0,0,0.0,51831.142857142855,11520.107142857143,,,0.0,0.0,0.0,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-02-23,0,450000.0,450000.0,1366.6666666666667,2396,0,0,0,2396,1198.0,51831.142857142855,11520.107142857143,,,1198.0,0.0030774490136313,0.0030774490136313,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-02-24,0,450000.0,447604.0,2733.333333333333,4792,0,0,0,2396,1597.3333333333333,51831.142857142855,11520.107142857143,,,1597.3333333333333,0.0041032653515084,0.0041032653515084,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-02-25,0,450000.0,445208.0,4100.0,7188,0,0,0,2396,1797.0,51831.142857142855,11520.107142857143,,,1797.0,0.004616173520447,0.004616173520447,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-02-26,0,450000.0,442812.0,5466.666666666667,9584,0,0,0,2396,1916.8,51831.142857142855,11520.107142857143,,,1916.8,0.0049239184218101,0.0049239184218101,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-02-27,0,450000.0,440416.0,6833.333333333334,11980,0,0,0,2396,1996.6666666666667,51831.142857142855,11520.107142857143,,,1996.6666666666667,0.0051290816893855,0.0051290816893855,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-02-28,0,450000.0,438020.0,8200.0,14376,0,0,0,2396,2053.714285714286,51831.142857142855,11520.107142857143,,,2053.714285714286,0.0052756268805108,0.0052756268805108,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-03-01,0,871200.0,856824.0,11062.5,16773,0,0,0,2397,2096.625,51831.142857142855,11520.107142857143,0.0,,2096.625,0.0053858568766317,0.0053858568766317,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-03-02,0,871200.0,854427.0,13925.0,19169,0,0,0,2396,2129.8888888888887,51831.142857142855,11520.107142857143,1198.0,,2129.8888888888887,0.0054713058933685,0.0054713058933685,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09
AFG,Afghanistan,38928346,2021-03-03,0,871200.0,852031.0,16787.5,21565,0,0,0,2396,2156.5,51831.142857142855,11520.107142857143,1597.3333333333333,,2156.5,0.0055396651067579,0.0055396651067579,0.1331449912029215,0,0.0,0,0.0,,1,0,2022-05-09


### Save to Azure Storage / Register in Databricks metastore

In [0]:
delta_path = transformed_storage_path + '.delta'

# dbutils.fs.rm(delta_path, True)

df12.write.format("delta").mode("overwrite").save(delta_path)

In [0]:
# path for delta
print(transformed_storage_path + '.delta')

In [0]:
%sql

DROP TABLE IF EXISTS covax_supply_chain_analytics.analysis_vx_throughput_output_daily;

CREATE TABLE covax_supply_chain_analytics.analysis_vx_throughput_output_daily
USING DELTA
LOCATION '/mnt/covax-supply-chain-analytics/transformed/who/analysis_vx_throughput_output_daily.delta'

In [0]:
display(spark.sql("SELECT * FROM covax_supply_chain_analytics.analysis_vx_throughput_output_daily"))

iso_code,entity_name,population,date,is_original_reported,cumulative_doses_received,effective_supply,total_doses_owid,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,daily_rate_td,rolling_4_week_avg_td,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,rolling_8_week_avg_td,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,daily_rate_1d,rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,is_latest,is_latest_week_reported,no_change_from_previous,date_accessed
ABW,Aruba,106766,2021-02-17,0,,,,0,0,0,0,0,0.0,1639.392857142857,264.0,,,0.0,0.0,0.0,1.5355008683877425,0,0.0,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-18,0,,,,6,6,0,0,6,3.0,1639.392857142857,264.0,,,3.0,0.0028098832961804,0.0028098832961804,1.5355008683877425,6,3.0,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-19,1,,,,12,12,0,0,6,4.0,1639.392857142857,264.0,,,4.0,0.0037465110615739,0.0037465110615739,1.5355008683877425,6,4.0,0,0.0,0.0,0,0,2022-05-09
ABW,Aruba,106766,2021-02-20,0,,,,424,424,0,0,412,106.0,1639.392857142857,264.0,,,106.0,0.0992825431317085,0.0992825431317085,1.5355008683877425,412,106.0,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-21,0,,,,836,836,0,0,412,167.2,1639.392857142857,264.0,,,167.2,0.1566041623737894,0.1566041623737894,1.5355008683877425,412,167.2,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-22,0,,,,1248,1248,0,0,412,208.0,1639.392857142857,264.0,,,208.0,0.1948185752018432,0.1948185752018432,1.5355008683877425,412,208.0,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-23,0,,,,1660,1660,0,0,412,237.14285714285717,1639.392857142857,264.0,,,237.14285714285717,0.2221145843647389,0.2221145843647389,1.5355008683877425,412,237.14285714285717,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-24,0,,,,2072,2072,0,0,412,259.0,1639.392857142857,264.0,0.0,,259.0,0.2425865912369106,0.2425865912369106,1.5355008683877425,412,259.0,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-25,0,,,,2484,2484,0,0,412,276.0,1639.392857142857,264.0,3.0,,276.0,0.2585092632485997,0.2585092632485997,1.5355008683877425,412,276.0,0,0.0,,0,0,2022-05-09
ABW,Aruba,106766,2021-02-26,1,,,,2896,2896,0,0,412,289.6,1639.392857142857,264.0,4.0,,289.6,0.271247400857951,0.271247400857951,1.5355008683877425,412,289.6,0,0.0,0.0,0,0,2022-05-09


In [0]:
display(spark.sql("SELECT * FROM covax_supply_chain_analytics.analysis_vx_throughput_output_daily").orderBy(['iso_code', 'date']).filter(col('iso_code')==iso_code))

iso_code,entity_name,population,date,is_original_reported,cumulative_doses_received,effective_supply,total_doses_owid,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,daily_rate_td,rolling_4_week_avg_td,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,rolling_8_week_avg_td,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,daily_rate_1d,rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,is_latest,is_latest_week_reported,no_change_from_previous,date_accessed
UGA,Uganda,45741007,2021-03-10,0,867600.0,,338.3333333333333,0,0,0,0,0,0.0,183164.67857142855,27581.964285714286,,,0.0,0.0,0.0,0.4004386667119694,0,0.0,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-11,0,867600.0,867600.0,676.6666666666666,405,405,0,0,405,202.5,183164.67857142855,27581.964285714286,,,202.5,0.0004427099735692308,0.0004427099735692308,0.4004386667119694,405,202.5,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-12,0,867600.0,867195.0,1015.0,810,810,0,0,405,270.0,183164.67857142855,27581.964285714286,,,270.0,0.0005902799647589744,0.0005902799647589744,0.4004386667119694,405,270.0,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-13,1,867600.0,866790.0,1215.0,1215,1215,0,0,405,303.75,183164.67857142855,27581.964285714286,,,303.75,0.0006640649603538462,0.0006640649603538462,0.4004386667119694,405,303.75,0,0.0,0.0,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-14,0,867600.0,866385.0,2028.0,3719,1104,0,0,2504,743.8,183164.67857142855,27581.964285714286,,,743.8,0.0016261119918063,0.0016261119918063,0.4004386667119694,-111,220.8,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-15,0,867600.0,863881.0,2841.0,6223,994,0,0,2504,1037.1666666666667,183164.67857142855,27581.964285714286,,,1037.1666666666667,0.0022674766794414,0.0022674766794414,0.4004386667119694,-110,165.66666666666666,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-16,0,867600.0,861377.0,4684.0,8727,883,0,0,2504,1246.7142857142858,183164.67857142855,27581.964285714286,,,1246.7142857142858,0.0027255943134664,0.0027255943134664,0.4004386667119694,-111,126.14285714285714,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-17,0,867600.0,858873.0,7920.0,11231,773,0,0,2504,1403.875,183164.67857142855,27581.964285714286,0.0,,1403.875,0.0030691825389852,0.0030691825389852,0.4004386667119694,-110,96.625,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-18,0,867600.0,856369.0,13027.0,13735,662,0,0,2504,1526.111111111111,183164.67857142855,27581.964285714286,202.5,,1526.111111111111,0.0033364178254997,0.0033364178254997,0.4004386667119694,-111,73.55555555555556,0,0.0,,1,0,2022-05-09
UGA,Uganda,45741007,2021-03-19,0,867600.0,853865.0,17543.0,16240,552,0,0,2505,1624.0,183164.67857142855,27581.964285714286,270.0,,1624.0,0.0035504246769206,0.0035504246769206,0.4004386667119694,-110,55.2,0,0.0,,1,0,2022-05-09


In [0]:
display(
  spark.sql("SELECT * FROM covax_supply_chain_analytics.analysis_vx_throughput_output_daily")
    .select('iso_code', 'date', 'max_rolling_4_week_avg_td', 'rolling_4_week_avg_td') \
    .orderBy(['iso_code', 'date'])
    .filter(col('iso_code')==iso_code)
)

iso_code,date,max_rolling_4_week_avg_td,rolling_4_week_avg_td
UGA,2021-03-10,183164.67857142855,0.0
UGA,2021-03-11,183164.67857142855,202.5
UGA,2021-03-12,183164.67857142855,270.0
UGA,2021-03-13,183164.67857142855,303.75
UGA,2021-03-14,183164.67857142855,743.8
UGA,2021-03-15,183164.67857142855,1037.1666666666667
UGA,2021-03-16,183164.67857142855,1246.7142857142858
UGA,2021-03-17,183164.67857142855,1403.875
UGA,2021-03-18,183164.67857142855,1526.111111111111
UGA,2021-03-19,183164.67857142855,1624.0


##### Query Delta Log

In [0]:
display(
  spark.sql("DESCRIBE HISTORY delta. `/mnt/covax-supply-chain-analytics/transformed/who/analysis_vx_throughput_output_daily.delta`")
)

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata
25,2022-05-09T20:50:48.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,24,WriteSerializable,False,"Map(numFiles -> 2, numOutputRows -> 92208, numOutputBytes -> 7023761)",
24,2022-05-03T17:33:32.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,23,WriteSerializable,False,"Map(numFiles -> 9, numOutputRows -> 91157, numOutputBytes -> 6966712)",
23,2022-04-26T03:22:15.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,22,WriteSerializable,False,"Map(numFiles -> 2, numOutputRows -> 88256, numOutputBytes -> 6733361)",
22,2022-04-18T22:53:23.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,21,WriteSerializable,False,"Map(numFiles -> 2, numOutputRows -> 87343, numOutputBytes -> 6659995)",
21,2022-04-11T22:19:55.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,20,WriteSerializable,False,"Map(numFiles -> 2, numOutputRows -> 85893, numOutputBytes -> 6547053)",
20,2022-04-11T22:17:51.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,19,WriteSerializable,False,"Map(numFiles -> 2, numOutputRows -> 85893, numOutputBytes -> 6547053)",
19,2022-04-04T20:00:58.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,18,WriteSerializable,False,"Map(numFiles -> 2, numOutputBytes -> 6340416, numOutputRows -> 83633)",
18,2022-03-28T22:45:02.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,17,WriteSerializable,False,"Map(numFiles -> 2, numOutputBytes -> 6326584, numOutputRows -> 83498)",
17,2022-03-21T18:22:05.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,16,WriteSerializable,False,"Map(numFiles -> 7, numOutputBytes -> 6152874, numOutputRows -> 81264)",
16,2022-03-15T01:32:56.000+0000,6136552160696939,jeremy.cooper@gatesfoundation.org,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(4454943111335582),1112-212424-shuwbub0,15,WriteSerializable,False,"Map(numFiles -> 1, numOutputBytes -> 6001362, numOutputRows -> 78850)",


### Appendix

In [0]:
display(
  spark.sql("SELECT * FROM covax_supply_chain_analytics.analysis_vx_throughput_output_daily")
    .filter(col('is_latest')==1)
)

iso_code,entity_name,population,date,is_original_reported,cumulative_doses_received,effective_supply,total_doses_owid,total_doses,at_least_one_dose,fully_vaccinated,persons_booster_add_dose,daily_rate_td,rolling_4_week_avg_td,max_rolling_4_week_avg_td,med_rolling_4_week_avg_td,rolling_4_week_avg_td_lastweek,rolling_4_week_avg_td_lastmonth,rolling_8_week_avg_td,rolling_4_week_avg_td_per100,rolling_8_week_avg_td_per100,max_rolling_4_week_avg_td_per100,daily_rate_1d,rolling_4_week_avg_1d,daily_rate_fv,rolling_4_week_avg_fv,is_latest,is_latest_week_reported,no_change_from_previous,date_accessed
AGO,Angola,32866272.0,2022-05-08,1,24790212.0,6893586.0,,17896626,12059919,6327907,369899,0,7321.928571428572,113251.14285714286,22237.10714285714,9762.57142857143,21879.0,14600.464285714286,0.0222779406542627,0.0444238527744013,0.3445816515397391,0,16720.14285714286,0,6772.607142857143,1,1,1,2022-05-09
AIA,Anguilla,15002.0,2022-04-29,1,23126.0,0.0,23315.0,23126,10622,9874,2630,0,14.714285714285714,236.9642857142857,32.357142857142854,14.714285714285714,12.035714285714286,13.375,0.0980821604738415,0.0891547793627516,1.579551297922182,0,1.7857142857142858,0,8.928571428571429,1,0,1,2022-05-09
AND,Andorra,77265.0,2022-04-24,1,152463.0,12.0,152463.0,152463,57866,53414,41183,12,9.857142857142858,1157.5,290.5357142857143,10.357142857142858,23.785714285714285,16.821428571428573,0.0127575782788362,0.0217710846714923,1.498090985569145,2,1.1071428571428572,1,0.8928571428571429,1,0,0,2022-05-09
ARG,Argentina,45195774.0,2022-04-29,1,105796364.4,7391185.400000006,98819521.0,98484287,41017420,37222311,20555542,79108,63413.642857142855,358555.1785714286,248174.07142857145,61931.25,104055.92857142857,83734.78571428571,0.1403087882887963,0.1852712727395391,0.79333784298379,2769,3459.25,8118,9129.07142857143,1,0,0,2022-05-09
ASM,American Samoa,55197.0,2022-04-21,1,,,,105958,44240,40593,21694,207,239.5,445.9642857142857,200.57142857142856,255.89285714285717,438.8928571428572,339.19642857142856,0.4339003931373082,0.614519681452667,0.8079502250381102,5,15.142857142857142,39,41.10714285714285,1,0,0,2022-05-09
AUT,Austria,8901064.0,2022-04-24,1,34687728.0,16435840.0,18261423.285714287,18254793,6821795,6605084,5178777,2905,8665.642857142857,133023.32142857142,25311.85714285714,8783.785714285714,4058.142857142857,6361.892857142857,0.0973551348147014,0.0714733975302599,1.4944653968174075,236,273.60714285714283,434,490.3571428571428,1,0,0,2022-05-09
BDI,Burundi,11890784.0,2022-05-08,1,586080.0,573456.0,,12651,12651,12078,0,27,6.678571428571429,163.46428571428572,56.392857142857146,0.0,19.75,13.214285714285714,5.6165946909568187e-05,0.00011113048319005467,0.0013747141123267,71,41.21428571428572,71,39.785714285714285,1,1,0,2022-05-09
BEL,Belgium,11522440.0,2022-04-24,1,25292607.0,2069.0,25308942.0,25292607,9227575,9129979,7125339,2069,17040.214285714286,140314.25,47350.5,17149.25,4015.607142857143,10527.910714285714,0.147887203454427,0.091368761428011,1.217747716629464,235,187.8571428571429,332,371.6071428571428,1,0,0,2022-05-09
BEN,Benin,12123200.0,2022-05-08,1,4823766.0,1672884.0,,3152148,3157864,2625325,0,1266,1265.4642857142858,41237.607142857145,2187.214285714286,1334.571428571429,1697.607142857143,1481.5357142857142,0.0104383684647146,0.0122206654537227,0.3401544735949019,2990,2989.5714285714284,3380,3379.464285714286,1,1,0,2022-05-09
BGR,Bulgaria,6951482.0,2022-04-24,1,11727810.0,7351171.0,4376629.5,4377438,2086746,2056552,739107,799,1132.321428571429,19972.39285714286,8736.67857142857,1282.9642857142858,1692.178571428571,1412.25,0.0162889212483241,0.0203158117938016,0.2873112935794534,109,152.32142857142858,132,189.64285714285717,1,0,0,2022-05-09
