# Imports

In [None]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Define country and parameters

In [None]:
# Select target country
country = 'Colombia'

# Set country-specific parameters: ISO codes and buffer size (in meters)
if country == 'Argentina':
    country_short = 'ARG'   # ISO 3-letter code
    country_code = 'AR'     # ISO 2-letter code
elif country == 'Chile':
    country_short = 'CHL'
    country_code = 'CL'
elif country == 'Colombia':
    country_short = 'COL'
    country_code = 'CO'
# Uncomment the following if Mexico is to be included in the analysis
# elif country == 'Mexico':
#     country_short = 'MEX'
#     country_code = 'MX'

# Set working directory

In [None]:
# Define working directory path
wd = (
    '/Users/carmen/Library/CloudStorage/OneDrive-TheUniversityofLiverpool/'
    'research/recast/latin-mobility-covid-local-files'
)

# Load baseline data
Two options:<br>
    - with raw data (reported) only<br>
    - with both raw and estimated data from imputed baseline when raw data is missing <br>

In [None]:
# Set to True or False according to desired option (see description above)
raw = True

In [None]:
# Load baseline movement data depending on the flag
if raw == True:
    baseline_mov = pd.read_csv(
        os.path.join(wd, 'data', 'outputs', country_short, 'baseline', 'baseline_mov.csv')
    )
else:
    baseline_mov_imput = pd.read_csv(
        os.path.join(
            wd, 'data', 'outputs', country_short,
            'baseline', 'movcell-baseline-imput-mov-dist-with-exo-var-flatten-sample.csv'
        )
    ).drop(columns='Unnamed: 0')

# Load imputed baseline population data with exogenous variables
baseline_pop_imput = gpd.read_file(
    os.path.join(
        wd, 'data', 'outputs', country_short,
        'grids-with-data', 'movcell-baseline-imput-pop-with-exo-var',
        'movcell-baseline-imput-pop-with-exo-var.gpkg'
    )
)

# Evolution by day distance > 0

In [None]:
# Define the directory containing daily movement data
directory = os.path.join(wd, 'data', 'outputs', country_short, 'mov')

# Get a sorted list of non-hidden files in the movement data directory
files = sorted([file for file in os.listdir(directory) if not file.startswith('.')])

# Extract start and end dates from filenames (assuming format includes YYYY-MM-DD near the end)
start_date = datetime.strptime(files[0][-19:-9], '%Y-%m-%d')
end_date = datetime.strptime(files[-1][-19:-9], '%Y-%m-%d')

# Compute the number of days in the movement data time span
delta = end_date - start_date

# Generate list of column names as date strings for each day in the time range
columns = [str(start_date + timedelta(days=i))[:10] for i in range(delta.days + 1)]

# Initialise empty DataFrame for movement data evolution based on `raw` flag
if raw == True:
    # Create DataFrame with NaNs and same number of rows as baseline raw data
    df_mov_evo_dist = pd.DataFrame({
        column: [np.nan] * len(baseline_mov) for column in columns
    })

    # Insert origin and destination columns from raw baseline data
    df_mov_evo_dist.insert(0, 'D', baseline_mov['D'])
    df_mov_evo_dist.insert(0, 'O', baseline_mov['O'])

    # Create a copy to represent baseline movement for comparison or extension
    df_mov_evo_dist_baseline_from_mov = df_mov_evo_dist.copy()

else:
    # Create DataFrame with NaNs and same number of rows as imputed baseline data
    df_mov_evo_dist = pd.DataFrame({
        column: [np.nan] * len(baseline_mov_imput) for column in columns
    })

    # Insert origin and destination columns from imputed baseline data
    df_mov_evo_dist.insert(0, 'D', baseline_mov_imput['D'])
    df_mov_evo_dist.insert(0, 'O', baseline_mov_imput['O'])

    # Create a copy for later reference or baseline comparison
    df_mov_evo_dist_baseline_from_baseline = df_mov_evo_dist.copy()


In [None]:
# NOTE: This block is slow to run. Only run if you want to regenerate movement evolution files.

for i in range(len(files)):
    # Print progress every 50 files
    if i % 50 == 0:
        print(f"Processing {i / len(files) * 100:.2f}%")

    file = files[i]
    file_path = os.path.join(directory, file)

    # Load movement file and drop unnecessary column
    df_movs = pd.read_csv(file_path).drop('Unnamed: 0', axis=1)

    # Extract date and weekday from filename
    date_str = file[-19:-9]
    wday = datetime.strptime(date_str, "%Y-%m-%d").weekday()

    for j in range(len(df_movs)):
        if df_movs.loc[j, 'length_km'] > 0:
            start_FID = df_movs.loc[j, 'start_FID']
            end_FID = df_movs.loc[j, 'end_FID']

            try:
                # Filter for matching Origin-Destination pair
                df_od = df_mov_evo_dist[
                    (df_mov_evo_dist['O'] == start_FID) &
                    (df_mov_evo_dist['D'] == end_FID)
                ]

                index = df_od.index[0]

                # Get crisis and baseline values
                n_crisis = df_movs.loc[j, 'n_crisis']
                n_baseline = baseline_mov_imput.loc[index, str(wday)]

                if pd.notna(n_crisis) and pd.notna(n_baseline):
                    df_mov_evo_dist.loc[index, date_str] = n_crisis

                    if raw:
                        df_mov_evo_dist_baseline_from_mov.loc[index, date_str] = n_baseline
                else:
                    # If raw is False and we have percent change instead of absolute values
                    if not raw:
                        percent_change = df_movs.loc[j, 'percent_change']
                        df_mov_evo_dist.loc[index, date_str] = (
                            n_baseline * percent_change / 100 + n_baseline
                        )

                if not raw:
                    df_mov_evo_dist_baseline_from_baseline.loc[index, date_str] = n_baseline

            except Exception:
                print('OD pair not found')

# Save results to appropriate files based on raw flag
output_dir = os.path.join(wd, 'data', 'outputs', country_short, 'evo')

if raw == True:
    df_mov_evo_dist.to_csv(os.path.join(output_dir, 'mov_evo_dist_raw.csv'), index=False)
    df_mov_evo_dist_baseline_from_mov.to_csv(os.path.join(output_dir, 'mov_evo_baseline_dist_raw.csv'), index=False)
else:
    df_mov_evo_dist.to_csv(os.path.join(output_dir, 'mov_evo_dist_sample.csv'), index=False)
    df_mov_evo_dist_baseline_from_baseline.to_csv(os.path.join(output_dir, 'mov_evo_baseline_dist_sample.csv'), index=False)


# Load crisis data generated above for further adjustments

In [None]:
df_pop_evo = pd.read_csv(
    f"{wd}/data/outputs/{country_short}/evo/pop_evo_movcell.csv"
).drop("Unnamed: 0", axis=1)

if raw == True:
    df_mov_evo = pd.read_csv(
        f"{wd}/data/outputs/{country_short}/evo/mov_evo_dist_raw.csv"
    ).drop("Unnamed: 0", axis=1)

    df_mov_evo_baseline = pd.read_csv(
        f"{wd}/data/outputs/{country_short}/evo/mov_evo_baseline_dist_raw.csv"
    ).drop("Unnamed: 0", axis=1)
else:
    df_mov_evo = pd.read_csv(
        f"{wd}/data/outputs/{country_short}/evo/mov_evo_dist_sample.csv"
    ).drop("Unnamed: 0", axis=1)

    df_mov_evo_baseline = pd.read_csv(
        f"{wd}/data/outputs/{country_short}/evo/mov_evo_baseline_dist_sample.csv"
    ).drop("Unnamed: 0", axis=1)

column_to_drop = [
    column for column in df_pop_evo.columns[1:]
    if column not in df_mov_evo.columns[2:]
]
df_pop_evo = df_pop_evo.drop(column_to_drop, axis=1)

column_to_drop = [
    column for column in df_mov_evo.columns[2:]
    if column not in df_pop_evo.columns[1:]
]
df_mov_evo = df_mov_evo.drop(column_to_drop, axis=1)

# Example of evolution of movements and movements/FBuser

In [None]:
row = 1750
origin = df_mov_evo.loc[row, "O"]

evo_movs = df_mov_evo.loc[row, df_mov_evo.columns[2:]]
plt.plot(np.arange(len(evo_movs)), evo_movs)
plt.show()

In [None]:
evo_pops = df_pop_evo[df_pop_evo["FID"] == origin].reset_index(drop=True)
evo_pops = evo_pops.loc[0, evo_pops.columns[1:]]
plt.plot(np.arange(len(evo_pops)), evo_pops)
plt.show()

In [None]:
evo_movs_per_pop = [
    evo_movs[i] / evo_pops[i] if pd.notna(evo_pops[i]) else np.nan
    for i in range(len(evo_pops))
]

fig, ax = plt.subplots()
ax.plot(np.arange(len(evo_movs_per_pop)), evo_movs_per_pop)
plt.show()

# Adjust FB movs with quotient of median (baseline) and daily pop in tile

In [None]:
df_mov_evo_adjust = df_mov_evo.copy()

# Drop movement columns not found in population evolution
for column in df_mov_evo_adjust.columns[2:]:
    if column not in df_pop_evo.columns:
        df_mov_evo_adjust = df_mov_evo_adjust.drop(column, axis=1)

index_to_drop = []

# Compute median total population across all time points
median = np.median([
    np.sum(df_pop_evo[column])
    for column in df_pop_evo.columns[1:]
])

for i in range(len(df_mov_evo_adjust)):
    if i % 1000 == 0:
        print(i / len(df_mov_evo_adjust) * 100)

    origin = df_mov_evo_adjust.loc[i, "O"]

    if origin in baseline_pop_imput.index:
        for column in df_mov_evo_adjust.columns[2:]:
            if pd.notna(df_mov_evo_adjust.loc[i, column]):
                if pd.notna(median) and pd.notna(df_pop_evo.loc[origin, column]):
                    df_mov_evo_adjust.loc[i, column] = (
                        df_mov_evo_adjust.loc[i, column] * median / np.sum(df_pop_evo[column])
                    )
                else:
                    df_mov_evo_adjust.loc[i, column] = np.nan
    else:
        index_to_drop.append(i)

df_mov_evo_adjust = df_mov_evo_adjust.drop(index_to_drop).reset_index(drop=True)

df_mov_evo_adjust.to_csv(
    f"{wd}/data/outputs/{country_short}/evo/mov_evo_dist_adjust_sample.csv"
)

In [None]:
df_mov_evo_adjust = df_mov_evo.copy()

# Drop movement columns that are not present in population evolution
for column in df_mov_evo_adjust.columns[2:]:
    if column not in df_pop_evo.columns:
        df_mov_evo_adjust.drop(column, axis=1, inplace=True)

index_to_drop = []

# Compute median total population over all dates
median = np.median([
    df_pop_evo[column].sum()
    for column in df_pop_evo.columns[1:]
])

for i in range(len(df_mov_evo_adjust)):
    if i % 1000 == 0:
        print(i / len(df_mov_evo_adjust) * 100)

    origin = df_mov_evo_adjust.loc[i, "O"]

    if origin in baseline_pop_imput.index:
        for column in df_mov_evo_adjust.columns[2:]:
            value = df_mov_evo_adjust.loc[i, column]
            pop_value = df_pop_evo.loc[origin, column] if column in df_pop_evo.columns else np.nan

            if pd.notna(value) and pd.notna(median) and pd.notna(pop_value):
                adjusted_value = value * median / df_pop_evo[column].sum()
                df_mov_evo_adjust.loc[i, column] = adjusted_value
            else:
                df_mov_evo_adjust.loc[i, column] = np.nan
    else:
        index_to_drop.append(i)

df_mov_evo_adjust.drop(index_to_drop, inplace=True)
df_mov_evo_adjust.reset_index(drop=True, inplace=True)

output_path = f"{wd}/data/outputs/{country_short}/evo/mov_evo_dist_adjust_sample.csv"
df_mov_evo_adjust.to_csv(output_path)

# Adjust FB movs evo baseline

In [None]:
df_mov_evo_adjust = df_mov_evo.copy()

# Drop movement columns that are not present in population evolution
for column in df_mov_evo_adjust.columns[2:]:
    if column not in df_pop_evo.columns:
        df_mov_evo_adjust.drop(column, axis=1, inplace=True)

index_to_drop = []

# Compute median total population over all dates
median = np.median([
    df_pop_evo[column].sum()
    for column in df_pop_evo.columns[1:]
])

for i in range(len(df_mov_evo_adjust)):
    if i % 1000 == 0:
        print(i / len(df_mov_evo_adjust) * 100)

    origin = df_mov_evo_adjust.loc[i, "O"]

    if origin in baseline_pop_imput.index:
        for column in df_mov_evo_adjust.columns[2:]:
            value = df_mov_evo_adjust.loc[i, column]
            pop_value = df_pop_evo.loc[origin, column] if column in df_pop_evo.columns else np.nan

            if pd.notna(value) and pd.notna(median) and pd.notna(pop_value):
                adjusted_value = value * median / df_pop_evo[column].sum()
                df_mov_evo_adjust.loc[i, column] = adjusted_value
            else:
                df_mov_evo_adjust.loc[i, column] = np.nan
    else:
        index_to_drop.append(i)

df_mov_evo_adjust.drop(index_to_drop, inplace=True)
df_mov_evo_adjust.reset_index(drop=True, inplace=True)

output_path = f"{wd}/data/outputs/{country_short}/evo/mov_evo_dist_adjust_sample.csv"
df_mov_evo_adjust.to_csv(output_path)
  