In [1]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load files from workspace results folder
results_dir = '/workspace/results/'
files = glob.glob(os.path.join(results_dir, 'final_results_*.csv'))
print(f"Found {len(files)} files")

all_data = []
for file in files:
    year = int(os.path.basename(file).split('_')[-1].split('.')[0])
    df = pd.read_csv(file)
    df['year'] = year
    all_data.append(df)

combined_df = pd.concat(all_data, ignore_index=True)

Found 21 files


In [2]:
def categorize_biome(biome):
    if biome == "Mangroves":
        return "Mangrove"
    elif biome == "N/A" or biome == "Unknown":
        return "Rock & Ice"
    elif biome == "Deserts & Xeric Shrublands":
        return "Desert"
    elif biome == "Tundra":
        return "Tundra"
    elif biome in ["Tropical & Subtropical Coniferous Forests",
                  "Tropical & Subtropical Moist Broadleaf Forests",
                  "Tropical & Subtropical Dry Broadleaf Forests"]:
        return "Tropical-Forests"
    elif biome in ["Mediterranean Forests, Woodlands & Scrub",
                  "Temperate Conifer Forests",
                  "Temperate Broadleaf & Mixed Forests"]:
        return "Temperate-Forests"
    elif biome == "Boreal Forests/Taiga":
        return "Boreal-Forests"
    elif biome in ["Tropical & Subtropical Grasslands, Savannas & Shrublands",
                  "Temperate Grasslands, Savannas & Shrublands",
                  "Montane Grasslands & Shrublands",
                  "Flooded Grasslands & Savannas"]:
        return "Grassland-Shrubland"
    else:
        return "Unknown" 
        
combined_df['biome'] = combined_df['BIOME_NAME'].apply(categorize_biome)
combined_df['SHARED_BOR'] = combined_df.groupby('WDPA_PID')['SHARED_BOR'].transform('first')
combined_df.to_csv('/workspace/all_data.csv', index=False)
combined_df = combined_df[combined_df['biome'] != 'Rock & Ice']  # Filter out low sample size biomes for plotting
combined_df = combined_df[combined_df['biome'] != 'Unknown'] 
combined_df = combined_df[combined_df['biome'] != 'Mangrove']  

In [None]:
# Columns that vary by zone
zone_cols = ['hm_mean', 'hm_median', 'hm_stddev', 
             'gradient_mean', 'gradient_median', 'gradient_stddev']

# Columns that are the same across zones (keep from any zone)
static_cols = [col for col in combined_df.columns 
               if col not in zone_cols + ['zone']]

# Get static columns (use first occurrence per WDPA_PID + year)
wide_df = combined_df.groupby(['WDPA_PID', 'year']).first().reset_index()[static_cols]

# Pivot each zone metric and merge
for col in zone_cols:
    pivoted = combined_df.pivot_table(
        index=['WDPA_PID', 'year'],
        columns='zone',
        values=col,
        aggfunc='first'
    ).reset_index()
    # Rename columns: perc_water becomes perc_water_-1_1km, etc.
    pivoted.columns = ['WDPA_PID', 'year'] + [f'{col}_{zone}' for zone in pivoted.columns[2:]]
    wide_df = wide_df.merge(pivoted, on=['WDPA_PID', 'year'], how='left')

print(f"Original shape: {combined_df.shape}")
print(f"Wide shape: {wide_df.shape}")

In [None]:
wide_df['edge_index_outer'] = wide_df['gradient_mean_-1_1km'] / ((wide_df['gradient_mean_1_3km'] + wide_df['gradient_mean_-1_-3km']) / 2)
wide_df['edge_index_far_outer'] = wide_df['gradient_mean_-1_1km'] / ((wide_df['gradient_mean_3_5km'] + wide_df['gradient_mean_-3_-5km']) / 2)

In [None]:
import statsmodels.api as sm
from statsmodels.regression.mixed_linear_model import MixedLM

def plot_mixed_model_trends(df, y_col, covariate_col, group_col=None, title='', ylabel='', figsize=(15, 8)):
    """Plot mixed effects model: y ~ year + covariate + (1|WDPA_PID)"""
    fig, ax = plt.subplots(figsize=figsize)
    
    if group_col is None:
        # Overall model
        clean_df = df[[y_col, 'year', covariate_col, 'WDPA_PID']].replace([np.inf, -np.inf], np.nan).dropna()
        
        # Prepare data for model
        endog = clean_df[y_col]
        exog = sm.add_constant(clean_df[['year', covariate_col]])
        groups = clean_df['WDPA_PID']
        
        model = MixedLM(endog, exog, groups=groups)
        result = model.fit()
        
        print(f"\nYear coef: {result.fe_params['year']:.6f} (p={result.pvalues['year']:.4f})")
        print(f"Converged: {result.converged}")
        
        # Predict and plot
        years = np.linspace(clean_df['year'].min(), clean_df['year'].max(), 100)
        X_pred = pd.DataFrame({'const': 1, 'year': years, covariate_col: clean_df[covariate_col].mean()})
        y_pred = result.predict(X_pred)
        
        ax.scatter(clean_df['year'], clean_df[y_col], alpha=0.2, s=10, label='Data')
        ax.plot(years, y_pred, 'r-', linewidth=2, label=f'β={result.fe_params["year"]:.4f}, p={result.pvalues["year"]:.4f}')
        ax.fill_between(years, y_pred - 1.96*np.sqrt(result.scale), y_pred + 1.96*np.sqrt(result.scale), 
                       alpha=0.2, color='red', label='95% CI')
    else:
        # By group
        colors = plt.cm.tab10(np.linspace(0, 1, len(df[group_col].unique())))
        
        for (group, color) in zip(df[group_col].unique(), colors):
            gdf = df[df[group_col] == group][[y_col, 'year', covariate_col, 'WDPA_PID']].replace([np.inf, -np.inf], np.nan).dropna()
            
            if len(gdf['WDPA_PID'].unique()) < 3:
                continue
            
            try:
                # Prepare data for model
                endog = gdf[y_col]
                exog = sm.add_constant(gdf[['year', covariate_col]])
                groups = gdf['WDPA_PID']
                
                model = MixedLM(endog, exog, groups=groups)
                result = model.fit(reml=False)
                
                years = np.linspace(gdf['year'].min(), gdf['year'].max(), 50)
                X_pred = pd.DataFrame({'const': 1, 'year': years, covariate_col: gdf[covariate_col].mean()})
                y_pred = result.predict(X_pred)
                
                ax.plot(years, y_pred, color=color, linewidth=2,
                       label=f'{group} (β={result.fe_params["year"]:.4f}, p={result.pvalues["year"]:.3f})')
                
                print(f"{group}: β={result.fe_params['year']:.4f}, p={result.pvalues['year']:.4f}, converged={result.converged}")
            except Exception as e:
                print(f"{group} error: {e}")
    
    ax.set_xlabel('Year', fontsize=12)
    ax.set_ylabel(ylabel, fontsize=12)
    ax.set_title(title, fontsize=14)
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    return fig, ax

# Plot by biome
fig, ax = plot_mixed_model_trends(
    wide_df, 'edge_index_far_outer', 'hm_mean_3_5km', 'biome',
    'Edge Index Outer by Biome (Mixed Model)', 'Edge Index Outer'
)
plt.show()

# Overall
fig2, ax2 = plot_mixed_model_trends(
    wide_df, 'edge_index_far_outer', 'hm_mean_3_5km', None,
    'Overall Edge Index Outer (Mixed Model)', 'Edge Index Outer'
)
plt.show()

In [None]:
wide_df['edge_index_outer_median'] = wide_df['gradient_median_-1_1km'] / ((wide_df['gradient_median_1_3km'] + wide_df['gradient_median_-1_-3km']) / 2)
wide_df['edge_index_far_outer_median'] = wide_df['gradient_median_-1_1km'] / ((wide_df['gradient_median_3_5km'] + wide_df['gradient_median_-3_-5km']) / 2)

In [None]:
# Plot by biome
fig3, ax3 = plot_mixed_model_trends(
    wide_df, 'edge_index_far_outer_median', 'hm_mean_3_5km', 'biome',
    'Edge Index Far Outer by Biome Median (Mixed Model)', 'Edge Index Outer'
)
plt.show()

# Overall
fig24, ax4 = plot_mixed_model_trends(
    wide_df, 'edge_index_far_outer_median', 'hm_mean_3_5km', None,
    'Overall Edge Far Index Outer Median (Mixed Model)', 'Edge Index Outer'
)
plt.show()