# Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
import geopandas as gpd
import jenkspy
from datetime import datetime, timedelta
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings
warnings.filterwarnings("ignore")

# Define country and parameters

In [None]:
# Select target country
country = 'Colombia'

# Set country-specific parameters: ISO codes and buffer size (in meters)
if country == 'Argentina':
    country_short = 'ARG'   # ISO 3-letter code
    country_code = 'AR'     # ISO 2-letter code
elif country == 'Chile':
    country_short = 'CHL'
    country_code = 'CL'
elif country == 'Colombia':
    country_short = 'COL'
    country_code = 'CO'
# Uncomment the following if Mexico is to be included in the analysis
# elif country == 'Mexico':
#     country_short = 'MEX'
#     country_code = 'MX'

# Set working directory

In [None]:
# Define working directory path
wd = (
    '/Users/carmen/Library/CloudStorage/OneDrive-TheUniversityofLiverpool/'
    'research/recast/latin-mobility-covid-local-files'
)

# Define some functions to generate time series of inflows, outflows

In [None]:
def compute_flows(df_mov_evo, flow_type):
    """
    Compute either outflows or inflows aggregated by origin or destination.

    Parameters:
    - df_mov_evo: DataFrame containing movement data with columns 'O', 'D', and time series data.
    - flow_type: str, either 'outflows' (sum by origin) or 'inflows' (sum by destination).

    Returns:
    - df_flows: DataFrame aggregated by 'O' or 'D'.
    """

    # Initialize df_flows with unique IDs depending on flow_type
    if flow_type == 'outflows':
        df_flows = pd.DataFrame({'O': np.unique(df_mov_evo['O'])})
    elif flow_type == 'inflows':
        df_flows = pd.DataFrame({'D': np.unique(df_mov_evo['D'])})
    else:
        raise ValueError("flow_type must be 'outflows' or 'inflows'")

    # Prepare empty columns for the time series data initialised with NaNs
    time_columns = df_mov_evo.columns[2:]
    df_flows_add = pd.DataFrame({col: [np.nan] * len(df_flows) for col in time_columns})

    # Combine ID column with empty data columns
    df_flows = pd.concat([df_flows, df_flows_add], axis=1)

    # Iterate over each unique ID to compute aggregated flows
    for i in range(len(df_flows)):
        if flow_type == 'outflows':
            ID = df_flows.loc[i, 'O']
            df_subset = df_mov_evo[df_mov_evo['O'] == ID]
        else:  # inflows
            ID = df_flows.loc[i, 'D']
            df_subset = df_mov_evo[df_mov_evo['D'] == ID]

        # Sum values per column, ignoring NaNs
        for column in time_columns:
            values = df_subset[column].dropna()
            if values.empty:
                df_flows.loc[i, column] = np.nan
            else:
                df_flows.loc[i, column] = values.sum()

    return df_flows

In [None]:
def compute_df_ts(df_flows, df_flows_baseline, initial_col):
    """
    Compute time series of movement sums and baseline sums from flow DataFrames,
    handle missing and zero values by interpolation using nearest observations,
    and calculate rolling averages and percentage changes.

    Parameters:
    - df_flows: DataFrame with flow data over time columns starting at initial_col
    - df_flows_baseline: Baseline DataFrame with same structure as df_flows
    - initial_col: int, index of the first time column in the DataFrames

    Returns:
    - df_ts: DataFrame with dates, movements, baseline, filled values, rolling means,
             and percentage change metrics.
    """

    evo_movs = []
    evo_movs_baseline = []

    # Aggregate sum of movements and baseline for each time column, ignoring NaNs
    for column in df_flows.columns[initial_col:]:
        sums_mov = []
        sums_baseline = []
        for i in range(len(df_flows)):
            val_mov = df_flows.loc[i, column]
            val_base = df_flows_baseline.loc[i, column]
            if not pd.isna(val_mov) and not pd.isna(val_base):
                sums_mov.append(val_mov)
                sums_baseline.append(val_base)
        if sums_mov:
            evo_movs.append(np.sum(sums_mov))
            evo_movs_baseline.append(np.sum(sums_baseline))
        else:
            evo_movs.append(np.nan)
            evo_movs_baseline.append(np.nan)

    # Create DataFrame with date and aggregated sums
    df_ts = pd.DataFrame({
        'date': df_flows.columns[initial_col:],
        'movements': evo_movs,
        'baseline': evo_movs_baseline
    })

    # Function to fill zeros and NaNs using mean of closest 15 observations in time series
    def fill_zeros_and_nans(series, fill_column_name):
        series.replace(0, np.nan, inplace=True)
        series.replace([np.inf, -np.inf], np.nan, inplace=True)

        is_na = series.isna()
        filled_series = series.copy()

        # Extract rows where values are not NaN for reference
        valid_idx = series.dropna().index
        valid_vals = series.dropna()

        # For each NaN, find 15 nearest valid observations by index distance and take their mean
        for idx in series[is_na].index:
            distances = abs(valid_idx - idx)
            nearest_idx = distances.nsmallest(15).index
            filled_series.loc[idx] = valid_vals.loc[nearest_idx].mean()

        return filled_series

    # Fill movements and baseline columns
    df_ts['movements_fill'] = fill_zeros_and_nans(df_ts['movements'], 'movements_fill')
    df_ts['rolling'] = df_ts['movements_fill'].rolling(window=15, min_periods=1).mean()

    df_ts['baseline_fill'] = fill_zeros_and_nans(df_ts['baseline'], 'baseline_fill')
    df_ts['rolling_baseline'] = df_ts['baseline_fill'].rolling(window=15, min_periods=1).mean()

    # Calculate percentage change between movements and baseline
    df_ts['perchange'] = (df_ts['movements_fill'] - df_ts['baseline_fill']) / df_ts['baseline_fill'] * 100
    df_ts['rolling_perchange'] = df_ts['perchange'].rolling(window=30, min_periods=1).mean()

    return df_ts

In [None]:
def compute_df_ts_weekly(df_ts):
    """
    Aggregate daily time series data into weekly summaries.

    Parameters:
    - df_ts: DataFrame containing daily time series with at least the following columns:
             'date', 'movements', 'baseline', 'movements_fill', 'baseline_fill'

    Returns:
    - df_ts_weekly: DataFrame aggregated by week with sums and percentage change.
    """

    # Calculate number of full weeks in the data (each week has 7 days)
    num_weeks = len(df_ts) // 7

    # Initialise DataFrame for weekly aggregation
    df_ts_weekly = pd.DataFrame({'week_no': range(num_weeks)})

    # Aggregate by week:
    # - 'week_start' is the date of the first day of the week
    # - sum the columns over each week block of 7 days
    df_ts_weekly['week_start'] = [df_ts.loc[i * 7, 'date'] for i in range(num_weeks)]
    df_ts_weekly['movements'] = [np.sum(df_ts.loc[i * 7:(i + 1) * 7 - 1, 'movements']) for i in range(num_weeks)]
    df_ts_weekly['baseline'] = [np.sum(df_ts.loc[i * 7:(i + 1) * 7 - 1, 'baseline']) for i in range(num_weeks)]
    df_ts_weekly['movements_fill'] = [np.sum(df_ts.loc[i * 7:(i + 1) * 7 - 1, 'movements_fill']) for i in range(num_weeks)]
    df_ts_weekly['baseline_fill'] = [np.sum(df_ts.loc[i * 7:(i + 1) * 7 - 1, 'baseline_fill']) for i in range(num_weeks)]

    # Compute weekly percentage change between filled movements and baseline
    df_ts_weekly['perchange'] = [
        (df_ts_weekly.loc[i, 'movements_fill'] - df_ts_weekly.loc[i, 'baseline_fill']) / df_ts_weekly.loc[i, 'baseline_fill'] * 100
        for i in range(num_weeks)
    ]

    return df_ts_weekly

# Read some additional data

In [None]:
# Load COVID-19 stringency data from CSV file
df_stringency = pd.read_csv(wd + '/data/inputs/covid-stringency/owid-covid-data.csv')

# Filter the data for the specified country (case-insensitive, capitalizes first letter)
df_stringency = df_stringency[df_stringency['location'] == str(country).capitalize()].reset_index(drop=True)                                      

In [None]:
# Load baseline population data with exogenous variables from a GeoPackage file
baseline_pop_imput = gpd.read_file(
    wd + '/data/outputs/' + country_short + '/grids-with-data/movcell-baseline-imput-pop-with-exo-var/movcell-baseline-imput-pop-with-exo-var.gpkg'
)

# Set filename suffixes based on processing options:
- `dist`: whether to include movements with distance >=0 (adds '_dist' if True) <br>
- `raw`: whether using raw data (adds '_raw' if True) <br>
- `adjust`: whether data is adjusted (adds '_adjust' if True) <br>

In [None]:
dist = True
raw = False
adjust = True

# Initialise suffix strings based on boolean flags

# Assign suffix '_dist' if dist is True, else empty string
dist = '_dist' if dist else ''

# Assign suffix '_raw' if raw is True, else empty string
raw = '_raw' if raw else ''

# Assign suffix '_adjust' if adjust is True, else empty string
adjust = '_adjust' if adjust else ''

# Read movement data as time series for each tile

In [None]:
# Read mobility evolution data from CSV file, drop the first unnamed index column
df_mov_evo = pd.read_csv(
    wd + '/data/outputs/' + country_short + '/evo/mov_evo' + dist + raw + adjust + '.csv'
).drop('Unnamed: 0', axis=1)

# Read baseline mobility evolution data, drop the first unnamed index column
df_mov_evo_baseline = pd.read_csv(
    wd + '/data/outputs/' + country_short + '/evo/mov_evo_baseline' + dist + raw + '.csv'
).drop('Unnamed: 0', axis=1)

In [None]:
# ------------------------------------------------------------
# Only uncomment and run this block when needed — it may take time
# ------------------------------------------------------------

# Compute outflows and inflows from mobility evolution data
# df_outflows = compute_flows(df_mov_evo, 'outflows')
# df_inflows = compute_flows(df_mov_evo, 'inflows')

# Compute outflows and inflows from baseline mobility data
# df_outflows_baseline = compute_flows(df_mov_evo_baseline, 'outflows')
# df_inflows_baseline = compute_flows(df_mov_evo_baseline, 'inflows')

# Save the computed dataframes to CSV files
# df_outflows.to_csv(
#     wd + '/data/outputs/' + country_short + '/mov-analysis/outflows' +
#     dist + raw + adjust + '_sample.csv'
# )
# df_inflows.to_csv(
#     wd + '/data/outputs/' + country_short + '/mov-analysis/inflows' +
#     dist + raw + adjust + '_sample.csv'
# )
# df_outflows_baseline.to_csv(
#     wd + '/data/outputs/' + country_short + '/mov-analysis/outflows_baseline' +
#     dist + raw + '_sample.csv'
# )
# df_inflows_baseline.to_csv(
#     wd + '/data/outputs/' + country_short + '/mov-analysis/inflows_baseline' +
#     dist + raw + '_sample.csv'
# )

# Read movement data as time series for inflows or outflows

In [None]:
 # Set the type of flow to analyze ('movs', 'inflows', or 'outflows')
flows = 'outflows'  # change to 'movs', 'inflows', or 'outflows' as needed

# Load appropriate flow data depending on the selected flow type
if flows in ['inflows', 'outflows']:
    # Load computed flow data and baseline from CSV, dropping the index column
    df_flows = pd.read_csv(
        wd + '/data/outputs/' + country_short + '/mov-analysis/' +
        flows + dist + raw + adjust + '.csv'
    ).drop('Unnamed: 0', axis=1)

    df_flows_baseline = pd.read_csv(
        wd + '/data/outputs/' + country_short + '/mov-analysis/' +
        flows + '_baseline' + dist + raw + '.csv'
    ).drop('Unnamed: 0', axis=1)

    # Set the initial column index for further processing
    initial_col = 1

else:
    # Use raw mobility evolution data directly if 'movs' is selected
    df_flows = df_mov_evo
    df_flows_baseline = df_mov_evo_baseline
    initial_col = 2


# Compute total inflows or outflows, as a sum, handling missing values

And visualise

In [None]:
# Compute time-series data from flows and baseline
df_ts = compute_df_ts(df_flows, df_flows_baseline, initial_col)

# -----------------------------
# Plot 1: Raw Movements Over Time
# -----------------------------

fig, ax = plt.subplots()

# Uncomment below to set custom y-axis range
# ax.set_ylim([-2000, 45000])

# Plot different mobility indicators
ax.plot(np.arange(len(df_ts)), df_ts['movements_fill'], color='steelblue', lw=1, alpha=0.7)
ax.plot(np.arange(len(df_ts)), df_ts['rolling'], color='darkred', lw=1.5)
ax.plot(np.arange(len(df_ts)), df_ts['rolling_baseline'], color='darkblue', lw=1.5)

# Prepare stringency index for background coloring
stringencies = []
for date in df_ts['date']:
    try:
        stringency_value = df_stringency[df_stringency['date'] == date].reset_index(drop=True).loc[0, 'stringency_index']
    except IndexError:
        stringency_value = stringencies[-1] if stringencies else 0
    stringencies.append(stringency_value)

# Add background shading by stringency level
for k in range(len(df_ts)):
    try:
        rgba = matplotlib.cm.gist_heat(
            1 - (stringencies[k] - min(stringencies)) / max(stringencies)
        )
    except Exception:
        rgba = matplotlib.cm.gist_heat(
            1 - (stringencies[k - 1] - min(stringencies)) / max(stringencies)
        )
    x = [k - 0.5, k + 0.5]
    ax.fill_between(
        x,
        0,
        max(df_ts['movements_fill']),
        color=rgba,
        alpha=0.6,
        edgecolor='None',
        linewidth=0,
        zorder=0
    )

plt.show()

# -----------------------------
# Plot 2: Weekly % Change
# -----------------------------

fig, ax = plt.subplots()

# Remove tick marks but style them
ax.tick_params(axis='both', which='both', width=0, length=0, labelsize=20, pad=9)

# Compute weekly aggregation and rolling percent change
df_ts_weekly = compute_df_ts_weekly(df_ts)
df_ts_weekly['rolling_perchange'] = df_ts_weekly['perchange'].rolling(window=4).mean()

# Plot rolling % change (smoothed)
ax.plot(
    np.arange(len(df_ts_weekly['rolling_perchange'])) * 7,
    df_ts_weekly['rolling_perchange'],
    color='black',
    lw=2,
    zorder=3
)

# Add baseline line at 0
ax.plot(
    np.arange(len(df_ts_weekly['rolling_perchange']) * 7),
    np.zeros(len(df_ts_weekly['rolling_perchange']) * 7),
    linestyle=':',
    color='k'
)

# Re-use stringency values for coloring
stringencies = []
for date in df_ts['date']:
    try:
        stringency_value = df_stringency[df_stringency['date'] == date].reset_index(drop=True).loc[0, 'stringency_index']
    except IndexError:
        stringency_value = stringencies[-1] if stringencies else 0
    stringencies.append(stringency_value)

# Y-axis limits
ymin = int(min([-100, np.min(df_ts_weekly['rolling_perchange'])]))
ymax = int(max([101, np.max(df_ts_weekly['rolling_perchange']) + 1]))

# Add background shading
for k in range(len(df_ts)):
    try:
        rgba = matplotlib.cm.gist_heat(
            1 - (stringencies[k] - min(stringencies)) / max(stringencies)
        )
    except Exception:
        rgba = matplotlib.cm.gist_heat(
            1 - (stringencies[k - 1] - min(stringencies)) / max(stringencies)
        )
    x = [k - 0.5, k + 0.5]
    ax.fill_between(
        x,
        -100,
        100,
        color=rgba,
        alpha=0.6,
        edgecolor='None',
        linewidth=0,
        zorder=0
    )

# Format x-axis with selected date labels
xticks = [i for i in range(0, len(df_ts['rolling_perchange'])) if i % 183 == 0]
xtick_labels = ['Apr 2020', 'Oct 2020', 'Apr 2021', 'Oct 2021', 'Apr 2022']
ax.set_xticks(xticks, xtick_labels)
ax.tick_params(axis='x', bottom=True, labelsize=10, pad=6, rotation=90)

# Format y-axis
yticks = [i for i in range(ymin, ymax) if i % 25 == 0]
ax.set_yticks(yticks)
for y in yticks:
    ax.plot([0, len(df_ts['rolling_perchange'])], [y, y], color='gray', lw=0.7, zorder=0)
ax.tick_params(axis='y', labelsize=10, pad=6)

# Save figure (uncomment when ready to save)
# plt.savefig(
#     wd + '/plots/evolution/' + flows + '/total/' + country_short +
#     '/evo' + dist + raw + adjust + '.pdf',
#     bbox_inches='tight'
# )

plt.show()

# By density class

In [None]:
# Number of classes for population density classification
n_class_density = 5

# Compute natural breaks (Jenks) for the 'density' column
breaks_density = jenkspy.jenks_breaks(
    baseline_pop_imput.dropna(subset=['density'])['density'],
    n_classes=n_class_density
)

# Slightly adjust the first break to ensure inclusion of the minimum value
breaks_density[0] -= 10**(-10)

# Classify 'density' values into discrete bins using Jenks natural breaks
baseline_pop_imput['class_density'] = pd.cut(
    baseline_pop_imput['density'],
    bins=breaks_density,
    labels=[i for i in range(n_class_density)]
)

# Ensure resulting class labels are treated as numeric
baseline_pop_imput['class_density'] = pd.to_numeric(baseline_pop_imput['class_density'])

# Get unique population density classes (excluding NaNs)
class_density = np.unique(baseline_pop_imput['class_density'])
n_class_density = len(class_density[~np.isnan(class_density)])

# Initialise array to store weekly percent change for each density class
df_ts_weekly_class_density = np.zeros((n_class_density, len(df_ts_weekly)))

# Iterate over each density class
for i in range(n_class_density):

    # Get indices of locations belonging to the current density class
    indexes = set(baseline_pop_imput[baseline_pop_imput['class_density'] == i].index)

    # Create a mask to filter flow data depending on flow direction
    if flows == 'movs':
        mask = df_flows['O'].isin(indexes) | df_flows['D'].isin(indexes)
    elif flows == 'outflows':
        mask = df_flows['O'].isin(indexes)
    elif flows == 'inflows':
        mask = df_flows['D'].isin(indexes)

    # Filter both current and baseline flow datasets
    df_flows_class_density = df_flows[mask].reset_index(drop=True)
    df_flows_class_density_baseline = df_flows_baseline[mask].reset_index(drop=True)

    # Compute time-series for this class
    df_ts_class_density = compute_df_ts(
        df_flows_class_density,
        df_flows_class_density_baseline,
        initial_col
    )    

In [None]:
# -----------------------------
# Plot option 1: weekly % change by density class
# -----------------------------

fig, ax = plt.subplots()
ax.tick_params(axis='both', which='both', width=0, length=0, labelsize=20, pad=9)

viridis = plt.cm.get_cmap('viridis')
norm = plt.Normalize(0, n_class_density - 1)

for i in range(n_class_density):
    color = viridis(norm(i))
    df_class = pd.DataFrame({'perchange_class': df_ts_weekly_class_density[i, :]})
    df_class['rolling_perchange'] = df_class['perchange_class'].rolling(window=4).mean()
    ax.plot(np.arange(len(df_class['rolling_perchange'])) * 7, df_class['rolling_perchange'], color=color, lw=2, zorder=3)

ax.plot(np.arange(len(df_class['rolling_perchange']) * 7), np.zeros(len(df_class['rolling_perchange']) * 7), linestyle=':', color='k')

# Stringency shading
stringencies = []
for date in df_ts['date']:
    try:
        val = df_stringency[df_stringency['date'] == date].reset_index(drop=True).loc[0, 'stringency_index']
    except IndexError:
        val = np.nan
    stringencies.append(val)

ymin = int(min(-100, np.nanmin(df_ts_weekly_class_density)))
ymax = int(max(101, np.nanmax(df_ts_weekly_class_density)) + 1)

for k in range(len(df_ts)):
    try:
        rgba = matplotlib.cm.gist_heat(1 - (stringencies[k] - np.nanmin(stringencies)) / np.nanmax(stringencies))
    except:
        rgba = matplotlib.cm.gist_heat(1 - (stringencies[k - 1] - np.nanmin(stringencies)) / np.nanmax(stringencies))
    ax.fill_between([k - 0.5, k + 0.5], ymin, ymax, color=rgba, alpha=0.6, edgecolor='none', linewidth=0, zorder=0)

# Axis ticks
xticks = [i for i in range(0, len(df_ts['rolling_perchange'])) if i % 183 == 0]
xtick_labels = ['Apr 2020', 'Oct 2020', 'Apr 2021', 'Oct 2021', 'Apr 2022']
ax.set_xticks(xticks, xtick_labels)
ax.tick_params(axis='x', bottom=True, labelsize=10, pad=6, rotation=90)

yticks = [i for i in range(ymin, ymax) if i % 25 == 0]
ax.set_yticks(yticks)
for y in yticks:
    ax.plot([0, len(df_ts['rolling_perchange'])], [y, y], color='gray', lw=0.7, zorder=0)
ax.tick_params(axis='y', labelsize=10, pad=6)

# plt.savefig(wd + '/plots/evolution/' + flows + '/by-density/' + country_short + '/evo' + dist + raw + adjust + '.pdf', bbox_inches='tight')
plt.show()

# Plotting weekly rolling % change in flows by population density class
 And trend decomposition

In [None]:
# -----------------------------
# Plot option 2: weekly % change by density class
# -----------------------------

fig, axs = plt.subplots(
    1,
    n_class_density,
    sharey=True,
    gridspec_kw={'hspace': 0.1, 'wspace': 0.1},
    figsize=(40, 7.5),
)
df_trend = pd.DataFrame(
    columns=[df_ts_weekly.loc[i, 'week_start'] for i in range(len(df_ts_weekly))]
)

for i in range(n_class_density):
    axs[i].tick_params(axis='both', which='both', width=0, length=0, color='k', labelsize=20, pad=9)

    viridis = plt.cm.get_cmap('viridis')
    norm = plt.Normalize(0, n_class_density - 1)

    color = viridis(norm(i))
    df_ts_weekly_class_density_plot = pd.DataFrame({'perchange_class': df_ts_weekly_class_density[i, :]})
    df_ts_weekly_class_density_plot.loc[:, 'rolling_perchange'] = df_ts_weekly_class_density_plot[
        'perchange_class'
    ].rolling(window=4).mean()
    axs[i].plot(
        np.arange(len(df_ts_weekly_class_density_plot['rolling_perchange'])) * 7,
        df_ts_weekly_class_density_plot['rolling_perchange'],
        color=color,
        lw=8,
        zorder=6,
    )

    axs[i].plot(
        np.arange(len(df_ts_weekly_class_density_plot['rolling_perchange']) * 7),
        np.zeros(len(df_ts_weekly_class_density_plot['rolling_perchange']) * 7),
        lw=4,
        linestyle=':',
        color='k',
        zorder=5,
    )

    series = pd.DataFrame({
        'index': pd.to_datetime(df_ts_weekly['week_start']),
        'value': df_ts_weekly_class_density[i, :],
    })
    series.index = series['index']
    series = series.drop(['index'], axis=1)
    try:
        result = seasonal_decompose(series, model='additive', extrapolate_trend='freq')
        df_trend.loc[i] = result.trend
        # axs[i,j].plot(np.arange(len(df_ts_weekly['week_start']))*7, result.trend, color='white', lw=2, linestyle='-', zorder=4)
    except:
        print('not possible for this ', i)

    stringencies = []
    for date in df_ts['date']:
        stringencies.append(df_stringency[df_stringency['date'] == date].reset_index(drop=True).loc[0, 'stringency_index'])

    ymin = int(min([-100, np.min(df_ts_weekly_class_density)]))
    try:
        ymax = int(
            max(
                [101, np.max([i for i in df_ts_weekly_class_density.flatten() if i < np.max(df_ts_weekly_class_density)]) + 1]
            )
        )
    except:
        ymax = 101

    for l in range(len(df_ts)):
        try:
            rgba = matplotlib.cm.gray(1 - (stringencies[l] - min(stringencies)) / max(stringencies))
        except:
            rgba = matplotlib.cm.gray(1 - (stringencies[l - 1] - min(stringencies)) / max(stringencies))
        x = [l - 0.50, l + 0.50]
        axs[i].fill_between(x, ymin, ymax, color=rgba, alpha=0.4, edgecolor='None', linewidth=0, zorder=0)

    xticks = []
    xticks_labels = ['Apr 2020', 'Oct 2020', 'Apr 2021', 'Oct 2021', 'Apr 2022']
    for l in range(0, len(df_ts['rolling_perchange'])):
        if l % 183 == 0:
            xticks.append(l)
    axs[i].set_xticks(xticks, xticks_labels)
    axs[i].tick_params(axis='x', bottom=True, labelsize=35, pad=12, rotation=90)

    yticks = []
    for l in range(ymin, ymax):
        if l % 50 == 0:
            yticks.append(l)
    axs[i].set_yticks(yticks, yticks)
    for y in yticks:
        axs[i].plot([0, len(df_ts['rolling_perchange'])], [y, y], color='gray', lw=1.5, zorder=0)
    axs[i].tick_params(axis='y', labelsize=40, pad=12, rotation=0)

# plt.savefig(wd + '/plots/evolution/' + flows + '/by-density/' + country_short + '/evo' + dist + raw + adjust + '_by_origin.pdf', bbox_inches = 'tight')

plt.show()


# Transforming the trend DataFrame to long format and saving it as CSV

In [None]:
time = []
cat = []
y = []

for i in range(len(df_trend)):
    for j in range(len(df_trend.columns)):
        time.append(j)
        cat.append(i)
        y.append(df_trend.loc[i, df_trend.columns[j]])

df_trend_long = pd.DataFrame({'time': time, 'cat': cat, 'y': y})

df_trend_long.to_csv(
    wd + '/data/outputs/' + country_short + '/mov-analysis/by-density/data_trend_new.csv'
)

# By rdi class

In [None]:
n_class_rdi = 3  # Number of RDI classes to create

# Calculate Jenks natural breaks for 'rdi' values, excluding NaNs
breaks_rdi = jenkspy.jenks_breaks(baseline_pop_imput.dropna(subset=['rdi'])['rdi'], n_classes=n_class_rdi)

# Adjust first break slightly to include the minimum value
breaks_rdi[0] = breaks_rdi[0] - 10**(-10)

# Categorise 'rdi' values into classes based on Jenks breaks
# (alternative qcut method commented out)
baseline_pop_imput['class_rdi'] = pd.cut(
    baseline_pop_imput['rdi'],
    bins=breaks_rdi,
    labels=[i for i in range(n_class_rdi)]
)
baseline_pop_imput['class_rdi'] = pd.to_numeric(baseline_pop_imput['class_rdi'])

# Get unique class labels and count valid classes
class_rdi = np.unique(baseline_pop_imput['class_rdi'])
n_class_rdi = len(class_rdi[~np.isnan(class_rdi)])

# Initialise array to store weekly time series data per RDI class
df_ts_weekly_class_rdi = np.zeros((n_class_rdi, len(df_ts_weekly)))

# Loop over each RDI class to compute flows and weekly changes
for i in range(n_class_rdi):
    # Find indices belonging to current RDI class
    indexes = set(baseline_pop_imput[baseline_pop_imput['class_rdi'] == i].index)
    
    # Define mask depending on flow type
    if flows == 'movs':
        mask = df_flows['O'].isin(indexes) | df_flows['D'].isin(indexes)
    elif flows == 'outflows':
        mask = df_flows['O'].isin(indexes)
    elif flows == 'inflows':
        mask = df_flows['D'].isin(indexes)
    
    # Filter flows based on mask and reset index
    df_flows_class_rdi = df_flows[mask].reset_index(drop=True)
    df_flows_class_rdi_baseline = df_flows_baseline[mask].reset_index(drop=True)
    
    # Compute time series for current RDI class
    df_ts_class_rdi = compute_df_ts(df_flows_class_rdi, df_flows_class_rdi_baseline, initial_col)
    
    # Compute weekly percent change and store in array
    df_ts_weekly_class_rdi[i, :] = compute_df_ts_weekly(df_ts_class_rdi)['perchange']


In [None]:
# -----------------------------
# Plot option 1: weekly % change by RDI class
# -----------------------------

fig, ax = plt.subplots()

# Configure axis ticks style
ax.tick_params(axis='both', which='both', width=0, length=0, color='k', labelsize=20, pad=9)

# Colormap and normalization for RDI classes
viridis = plt.cm.get_cmap('viridis_r')
norm = plt.Normalize(0, n_class_rdi - 1)

# Plot rolling mean percent changes for each RDI class
for i in range(n_class_rdi):
    color = viridis(norm(i))
    df_ts_weekly_class_rdi_plot = pd.DataFrame({'perchange_class': df_ts_weekly_class_rdi[i, :]})
    df_ts_weekly_class_rdi_plot['rolling_perchange'] = df_ts_weekly_class_rdi_plot['perchange_class'].rolling(window=4).mean()
    ax.plot(np.arange(len(df_ts_weekly_class_rdi_plot['rolling_perchange'])) * 7,
            df_ts_weekly_class_rdi_plot['rolling_perchange'],
            color=color, lw=2, zorder=3)

# Plot horizontal zero line
ax.plot(np.arange(len(df_ts_weekly_class_rdi_plot['rolling_perchange']) * 7),
        np.zeros(len(df_ts_weekly_class_rdi_plot['rolling_perchange']) * 7),
        linestyle=':', color='k')

# Prepare stringency index shading background
stringencies = [df_stringency[df_stringency['date'] == date].reset_index(drop=True).loc[0, 'stringency_index'] for date in df_ts['date']]

ymin = int(min([-100, np.min(df_ts_weekly_class_rdi)]))
ymax = int(max([101, np.max([i for i in df_ts_weekly_class_density.flatten() if i < np.max(df_ts_weekly_class_density)]) + 1]))

# Add shaded areas according to stringency index values
for k in range(len(df_ts)):
    try:
        rgba = matplotlib.cm.gist_heat(1 - (stringencies[k] - min(stringencies)) / max(stringencies))
    except:
        rgba = matplotlib.cm.gist_heat(1 - (stringencies[k-1] - min(stringencies)) / max(stringencies))
    x = [k - 0.5, k + 0.5]
    ax.fill_between(x, ymin, ymax, color=rgba, alpha=0.6, edgecolor='None', linewidth=0, zorder=0)

# Set x-axis ticks and labels
xticks = [i for i in range(0, len(df_ts['rolling_perchange'])) if i % 183 == 0]
xticks_labels = ['Apr 2020', 'Oct 2020', 'Apr 2021', 'Oct 2021', 'Apr 2022']
ax.set_xticks(xticks)
ax.set_xticklabels(xticks_labels)
ax.tick_params(axis='x', bottom=True, labelsize=10, pad=6, rotation=90)

# Set y-axis ticks and grid lines
yticks = [i for i in range(ymin, ymax) if i % 25 == 0]
ax.set_yticks(yticks)
ax.set_yticklabels(yticks)
for y in yticks:
    ax.plot([0, len(df_ts['rolling_perchange'])], [y, y], color='gray', lw=0.7, zorder=0)
ax.tick_params(axis='y', labelsize=10, pad=6, rotation=0)

# Uncomment to save the figure
# plt.savefig(wd + '/plots/evolution/' + flows + '/by-rdi/' + country_short + '/evo' + dist + raw + adjust + '.pdf', bbox_inches='tight')

plt.show()


# Plotting weekly rolling % change in flows by RDI class
And trend decomposition

In [None]:
# -----------------------------
# Plot option 2: weekly % change by RDI class
# -----------------------------

fig, axs = plt.subplots(
    1,
    n_class_rdi,
    sharey=True,
    gridspec_kw={'hspace': 0.07, 'wspace': 0.07},
    figsize=(24, 7.5)
)

# Prepare DataFrame to store trend components from seasonal decomposition
df_trend = pd.DataFrame(columns=[df_ts_weekly.loc[i, 'week_start'] for i in range(len(df_ts_weekly))])

for i in range(n_class_rdi):
    # Customize tick appearance for each subplot
    axs[i].tick_params(axis='both', which='both', width=0, length=0, color='k', labelsize=20, pad=9)

    # Set colormap and normalization
    viridis = plt.cm.get_cmap('viridis')
    norm = plt.Normalize(0, n_class_rdi - 1)
    color = viridis(norm(i))

    # Create DataFrame with percent change data and calculate rolling mean
    df_ts_weekly_class_rdi_plot = pd.DataFrame({'perchange_class': df_ts_weekly_class_rdi[i, :]})
    df_ts_weekly_class_rdi_plot['rolling_perchange'] = df_ts_weekly_class_rdi_plot['perchange_class'].rolling(window=4).mean()

    # Plot rolling percent change
    axs[i].plot(
        np.arange(len(df_ts_weekly_class_rdi_plot['rolling_perchange'])) * 7,
        df_ts_weekly_class_rdi_plot['rolling_perchange'],
        color=color, lw=7, zorder=6
    )

    # Plot horizontal zero line for reference
    axs[i].plot(
        np.arange(len(df_ts_weekly_class_rdi_plot['rolling_perchange']) * 7),
        np.zeros(len(df_ts_weekly_class_rdi_plot['rolling_perchange']) * 7),
        lw=4, linestyle=':', color='k', zorder=5
    )

    # Prepare time series for seasonal decomposition
    series = pd.DataFrame({
        'index': pd.to_datetime(df_ts_weekly['week_start']),
        'value': df_ts_weekly_class_rdi[i, :]
    })
    series.index = series['index']
    series = series.drop(['index'], axis=1)

    # Apply seasonal decomposition and store trend; skip if fails
    try:
        result = seasonal_decompose(series, model='additive', extrapolate_trend='freq')
        df_trend.loc[i] = result.trend
    except:
        print('not possible for this ', i)

    # Prepare stringency index values for shading
    stringencies = [
        df_stringency[df_stringency['date'] == date].reset_index(drop=True).loc[0, 'stringency_index']
        for date in df_ts['date']
    ]

    ymin = int(min([-100, np.min(df_ts_weekly_class_rdi)]))
    try:
        ymax = int(max([101, np.max([val for val in df_ts_weekly_class_rdi.flatten() if val < np.max(df_ts_weekly_class_rdi)]) + 1]))
    except:
        ymax = 101

    # Add shaded background reflecting stringency levels
    for l in range(len(df_ts)):
        try:
            rgba = matplotlib.cm.gray(1 - (stringencies[l] - min(stringencies)) / max(stringencies))
        except:
            rgba = matplotlib.cm.gray(1 - (stringencies[l - 1] - min(stringencies)) / max(stringencies))
        x = [l - 0.50, l + 0.50]
        axs[i].fill_between(x, ymin, ymax, color=rgba, alpha=0.4, edgecolor='None', linewidth=0, zorder=0)

    # Define x-axis ticks and labels
    xticks = [l for l in range(0, len(df_ts['rolling_perchange'])) if l % 183 == 0]
    xticks_labels = ['Apr 2020', 'Oct 2020', 'Apr 2021', 'Oct 2021', 'Apr 2022']
    axs[i].set_xticks(xticks, xticks_labels)
    axs[i].tick_params(axis='x', bottom=True, labelsize=35, pad=10, rotation=90)

    # Define y-axis ticks and draw grid lines
    yticks = [l for l in range(ymin, ymax) if l % 50 == 0]
    axs[i].set_yticks(yticks, yticks)
    for y in yticks:
        axs[i].plot([0, len(df_ts['rolling_perchange'])], [y, y], color='gray', lw=1.5, zorder=0)
    axs[i].tick_params(axis='y', labelsize=40, pad=10, rotation=0)

# Uncomment to save figure
# plt.savefig(wd + '/plots/evolution/' + flows + '/by-rdi/' + country_short + '/evo' + dist + raw + adjust + '_by_origin.pdf', bbox_inches='tight')

plt.show()

# Transforming the trend DataFrame to long format and saving it as CSV

In [None]:
# Prepare lists to convert wide-format df_trend into long-format DataFrame
time = []
cat = []
y = []

# Iterate over rows and columns of df_trend to reshape data
for i in range(len(df_trend)):
    for j in range(len(df_trend.columns)):
        time.append(j)               # time index (column position)
        cat.append(i)                # category (row index)
        y.append(df_trend.loc[i, df_trend.columns[j]])  # trend value

# Create long-format DataFrame suitable for analysis or plotting
df_trend_long = pd.DataFrame({'time': time, 'cat': cat, 'y': y})

# Save to CSV file
df_trend_long.to_csv(wd + '/data/outputs/' + country_short + '/mov-analysis/by-rdi/data_trend_new.csv', index=False)