# Imports

In [None]:
import os
import pandas as pd
import numpy as np
from datetime import datetime

# Define country and parameters

In [None]:
# Select target country
country = 'Colombia'

# Set country-specific parameters: ISO codes and buffer size (in meters)
if country == 'Argentina':
    country_short = 'ARG'   # ISO 3-letter code
    country_code = 'AR'     # ISO 2-letter code
elif country == 'Chile':
    country_short = 'CHL'
    country_code = 'CL'
elif country == 'Colombia':
    country_short = 'COL'
    country_code = 'CO'
# Uncomment the following if Mexico is to be included in the analysis
# elif country == 'Mexico':
#     country_short = 'MEX'
#     country_code = 'MX'

# Set working directory

In [None]:
# Define working directory path
wd = (
    '/Users/carmen/Library/CloudStorage/OneDrive-TheUniversityofLiverpool/'
    'research/recast/latin-mobility-covid-local-files'
)

# Initialise baseline population data

In [None]:
# Days of the week are coded according to the `datetime` library (0 = Monday, 6 = Sunday)

### ONLY UNCOMMENT THE LINES BELOW TO FILL IN BASELINE DATAFRAME FROM SCRATCH!!

# # Initialise an empty baseline population DataFrame with columns for each day of the week
# df_bl = pd.DataFrame({
#     'FID': [],  # Feature ID
#     '0': [],    # Monday
#     '1': [],    # Tuesday
#     '2': [],    # Wednesday
#     '3': [],    # Thursday
#     '4': [],    # Friday
#     '5': [],    # Saturday
#     '6': []     # Sunday
# })

# # Save the empty baseline DataFrame to the local working directory
# df_bl.to_csv(wd + '/baseline/baseline_pop.csv', index=False)

# # Create or overwrite the counter file used to track progress in processing
# filename = wd + '/baseline/counter_pop.txt'
# with open(filename, 'w') as file:
#     file.write(str(0))


# Update baseline population data using daily files

In [None]:
# This cell:
# - Loads the index from 'counter_pop.txt' to resume processing
# - Iterates through daily population files in '/pop/'
# - Updates the baseline DataFrame for each FID and day of the week
# - Writes progress to 'counter_pop.txt' after each file
# - Saves the updated baseline to 'baseline_pop.csv'


# Load the index to resume processing from the last file processed
filename = wd + '/baseline/counter_pop.txt'
with open(filename, 'r') as file:
    start_i = file.read()
print(start_i)

# Load existing baseline population data
df_bl = pd.read_csv(wd + '/baseline/baseline_pop.csv')

# List and sort all files in the 'pop' subdirectory of the working directory
files = sorted(os.listdir(wd + '/pop/'))

# Filter out hidden files (e.g., .DS_Store or other dotfiles)
files = [file for file in files if not file.startswith('.')]

# Loop through the population files starting from the saved index
for i in range(int(start_i), len(files)):

    if i % 50 == 0:
        print(i / len(files) * 100)  # Progress indicator

    # Reload baseline in each iteration to preserve updates
    df_bl = pd.read_csv(wd + '/baseline/baseline_pop.csv')

    file = files[i]
    df_pops = pd.read_csv(wd + '/pop/' + file, index_col=0)

    # Drop rows without valid FID
    df_pops_bl = df_pops.dropna(subset=['FID']).reset_index(drop=True)

    # Extract weekday from filename (assumes format ends with YYYY-MM-DD.csv)
    wday = datetime.strptime(file[-19:-9], "%Y-%m-%d").weekday()

    # Iterate through each row in the population file
    for j in range(len(df_pops_bl)):

        ID = int(df_pops_bl.loc[j, 'FID'])  # Area ID
        value = df_pops_bl.loc[j, 'n_baseline']  # Baseline population count
        df_bl_t = df_bl[df_bl['FID'] == ID]  # Filter existing record

        # If FID not found in baseline, add a new row with the current weekday's value
        if len(df_bl_t) == 0:
            row = [int(ID)] + [-999 if k != wday else value for k in range(7)]
            df_bl.loc[len(df_bl)] = row

        # If FID is found, update the value for the current weekday if not already set
        else:
            if len(df_bl_t.index) == 1:
                if (df_bl.loc[df_bl_t.index[0], str(wday)] > 0) == False:
                    df_bl.loc[df_bl_t.index[0], str(wday)] = value
            else:
                print('more than one row in df_bl_t')

    # Update the counter file with the current index
    filename = wd + '/baseline/counter_pop.txt'
    with open(filename, 'w') as file:
        file.write(str(i))

    # Save the updated baseline to CSV
    df_bl.to_csv(wd + '/baseline/baseline_pop.csv', index=False)
