The aim of this small Notebook is to load CSV files from https://lwillem.shinyapps.io/socrates_rshiny/ and paste them into separate sheets in an XLXS file.

In [31]:
import pandas as pd

######### USER INPUT ###########
# Choose dataset
dataset = 'willem_2012'
# Choose age stratification: decades or life phases
stratification = 'phases'
######### USER INPUT ###########

# All possible subdirectories to loop over
# All XLSX files
locations = ['total', 'home', 'work', 'school', 'transport', 'leisure', 'otherplace']
# All sheets within XLSX files
contacts = ['all', 'physical_only', 'less_5_min', 'less_15_min', 'more_15_min', 'more_one_hour', 'more_four_hours']

# Define directory of raw data
interaction_raw = f'../../data/raw/interaction_matrices/{dataset}/CSVs/{stratification}'
# Define directory of interim data (target)
interaction_interim = f'../../data/interim/interaction_matrices/{dataset}/{stratification}'

# Define row names
row_names_phases = ['[0, 12)', '[12, 18)', '[18, 25)', '[25, 35)', '[35, 45)', '[45, 55)', '[55, 65)', '[65, 75)', '[75, 85)', '85+']
row_names_decades = ['[0, 10)', '[10, 20)', '[20, 30)', '[30, 40)', '[40, 50)', '[50, 60)', '[60, 70)', '[70, 80)', '80+']
    
row_names=dict({'phases' : row_names_phases,
                'decades': row_names_decades})

In [32]:
# Define function to create the more_5_min DataFrame
def create_more_5_min(CSV_dir, dataset, stratification, loc):
    contact_df_all = pd.read_csv(f'{CSV_dir}/{loc}/{dataset}_{stratification}_{loc}_all.csv')
    contact_df_less_5_min = pd.read_csv(f'{CSV_dir}/{loc}/{dataset}_{stratification}_{loc}_less_5_min.csv')
    contact_df_more_5_min = contact_df_all - contact_df_less_5_min
    return contact_df_more_5_min

def clean_df(raw_df, row_names):
    clean_df = raw_df.copy()
    clean_df['age_group']=row_names
    clean_df = clean_df.set_index('age_group')
    return clean_df

def create_XLSX(source_dir, target_dir, stratification, dataset, locations, contacts):
    for loc in locations:
        with pd.ExcelWriter(f'{target_dir}/{loc}_{stratification}.xlsx', engine='xlsxwriter') as XLSX:
            for con in contacts:
                # Create a sheet with name {con} inside the excel file XLSX
                contact_df = pd.read_csv(f'{source_dir}/{loc}/{dataset}_{stratification}_{loc}_{con}.csv')
                contact_df = clean_df(contact_df, row_names[stratification])
                contact_df.to_excel(XLSX, sheet_name=con)
                if con=='less_5_min':
                    # Create more_5_min sheet right after less_5_min
                    contact_df = create_more_5_min(source_dir, dataset, stratification, loc)
                    contact_df = clean_df(contact_df, row_names[stratification])
                    contact_df.to_excel(XLSX, sheet_name='more_5_min')
    return

In [33]:
# Create the XLSX files
create_XLSX(interaction_raw, interaction_interim, stratification, dataset, locations, contacts)