# Imports

In [None]:
import pandas as pd
import os
from datetime import datetime

# Define country and parameters

In [None]:
# Select target country
country = 'Colombia'

# Set country-specific parameters: ISO codes and buffer size (in meters)
if country == 'Argentina':
    country_short = 'ARG'   # ISO 3-letter code
    country_code = 'AR'     # ISO 2-letter code
elif country == 'Chile':
    country_short = 'CHL'
    country_code = 'CL'
elif country == 'Colombia':
    country_short = 'COL'
    country_code = 'CO'
# Uncomment the following if Mexico is to be included in the analysis
# elif country == 'Mexico':
#     country_short = 'MEX'
#     country_code = 'MX'

# Set working directory

In [None]:
# Define working directory path

wd = (
    '/Users/carmen/Library/CloudStorage/OneDrive-TheUniversityofLiverpool/'
    'Research/RECAST/latin-mobility-covid-local-files/data/outputs/' + country_short 
)

# Initialise baseline movement data

In [None]:
# Days of the week are coded according to the `datetime` library (0 = Monday, 6 = Sunday)

### ONLY UNCOMMENT THE LINES BELOW TO FILL IN BASELINE DATAFRAME FROM SCRATCH!!

# # Initialise an empty DataFrame for baseline movements
# # 'O' and 'D' represent origin and destination IDs respectively
# # Columns '0' through '6' correspond to days of the week (Monday=0, Sunday=6)
# df_bl = pd.DataFrame({
#     'O': [],  # Origin ID
#     'D': [],  # Destination ID
#     '0': [],  # Monday
#     '1': [],  # Tuesday
#     '2': [],  # Wednesday
#     '3': [],  # Thursday
#     '4': [],  # Friday
#     '5': [],  # Saturday
#     '6': []   # Sunday
# })

# # Save the empty baseline movements DataFrame to CSV
# df_bl.to_csv(wd + '/baseline/baseline_mov.csv', index=False)

# # Create or overwrite a counter file to track processing progress
# filename = wd + '/baseline/counter_mov.txt'
# with open(filename, 'w') as file:
#     file.write(str(0))



# Update baseline population data using daily files

In [None]:
# This cell:
# - Loads the index from 'counter_mov.txt' to resume processing movement data
# - Lists and iterates through daily movement files in the '/mov/' directory
# - For each file, updates the baseline movement DataFrame with counts per origin-destination (O-D) pair and day of the week
# - Writes progress (current file index) to 'counter_mov.txt' after processing each file
# - Saves the updated baseline movements to 'baseline_mov.csv'

# Read the index to resume processing from the last processed file
filename = wd + '/baseline/counter_mov.txt'
with open(filename, 'r') as file:
    start_i = file.read()
print(f"Resuming from file index: {start_i}")

# Load the existing baseline movements DataFrame
df_bl = pd.read_csv(wd + '/baseline/baseline_mov.csv')

# List and sort movement files in the 'mov' directory, excluding hidden files
files = sorted(os.listdir(wd + '/mov/'))
files = [file for file in files if not file.startswith('.')]

# Loop over files starting from the saved index
for i in range(int(start_i), len(files)):
    
    if i % 20 == 0:
        print(f"Progress: {i / len(files) * 100:.2f}%")
    
    # Reload baseline movements dataframe to keep it up to date in each iteration
    df_bl = pd.read_csv(wd + '/baseline/baseline_mov.csv')

    file = files[i]
    df_movs = pd.read_csv(wd + '/mov/' + file, index_col=0)

    # Filter out records without valid start or end IDs
    df_movs_bl = df_movs.dropna(subset=['start_FID', 'end_FID']).reset_index(drop=True)
    # Optional filtering on movement length can be done here:
    # df_movs_bl = df_movs_bl[df_movs_bl['length_km'] < 70].reset_index(drop=True)

    # Extract weekday from filename (expects date in format YYYY-MM-DD)
    wday = datetime.strptime(file[-19:-9], "%Y-%m-%d").weekday()

    # Iterate through all movement records in the file
    for j in range(len(df_movs_bl)):

        O = int(df_movs_bl.loc[j, 'start_FID'])  # Origin ID
        D = int(df_movs_bl.loc[j, 'end_FID'])    # Destination ID

        # Select existing row for this origin-destination pair
        df_bl_t = df_bl[(df_bl['O'] == O) & (df_bl['D'] == D)]

        value = df_movs_bl.loc[j, 'n_baseline']  # Baseline value for this movement

        # If this origin-destination pair is not in baseline, add new row
        if len(df_bl_t) == 0:
            row = [O, D] + [-999 if k != wday else value for k in range(7)]
            df_bl.loc[len(df_bl)] = row

        # If row exists, update baseline for the weekday if not already set
        else:
            if len(df_bl_t.index) == 1:
                idx = df_bl_t.index[0]
                if df_bl.loc[idx, str(wday)] <= 0:
                    df_bl.loc[idx, str(wday)] = value
            else:
                print(f"Warning: Multiple baseline rows for O={O}, D={D}")

    # Update the progress counter file after processing each file
    with open(wd + '/baseline/counter_mov.txt', 'w') as file:
        file.write(str(i))

    # Save the updated baseline movements DataFrame to CSV
    df_bl.to_csv(wd + '/baseline/baseline_mov.csv', index=False)
