# Load the necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

## Load the data and preprocess it

In [None]:
# ===========================================
# Seasonal Anomalies with 5-Year Moving Average
# FINAL VERSION - Traditional Seasons Only
# (CHL_cv in the first column)
# ===========================================


# --- GLOBAL FONT SIZE SETTINGS ---
TICK_FONTSIZE = 16  # New variable for axis tick label size (was hard to read)
YLABEL_FONTSIZE = 26 # Season label font size (already good)
TITLE_FONTSIZE = 22  # Subplot title (a, b, c...) font size (already good)
SUPX_FONTSIZE = 18 # Shared X-axis label font size (already good)
# ----------------------------------

# -------------------------------------------
# 1. Load data and set coordinates
# -------------------------------------------
try:
    df_all = pd.read_csv("final_merged_combined_all_cv_box_data.csv", parse_dates=['time'])
    
    # Set time as index to resample
    df_all.set_index('time', inplace=True)
    # Resample from end-of-month to start-of-month ('MS') and take the mean
    df_all = df_all.resample('MS').mean() 
    # Reset index so the lat/lon filter below still works
    df_all.reset_index(inplace=True)
    print("Successfully loaded and resampled main CSV.")

except FileNotFoundError:
    print("Warning: Main CSV file not found. Using generated synthetic data for demonstration.")
    date_rng = pd.date_range(start='1998-01-01', end='2024-12-31', freq='MS')
    df_all = pd.DataFrame(date_rng, columns=['time'])
    
    # Use correct filter/variable names for synthetic data
    df_all['Latitude_cv'] = 16.0 
    df_all['Longitude_cv'] = -23.75

    month_cycle = (np.sin(np.arange(len(date_rng)) * (2 * np.pi / 12) + np.pi) + 1) / 2
    
    # Use correct variable names
    df_all['SST_cv'] = 25 + np.sin(np.arange(len(date_rng)) * (2 * np.pi / 12) - np.pi/2) * 3 + np.random.randn(len(date_rng)) * 0.3 + np.linspace(0, 1.5, len(date_rng))
    df_all['CHL_cv'] = 0.05 + month_cycle * 0.4 + np.random.rand(len(date_rng)) * 0.05
    df_all.loc[df_all['time'].dt.year == 1998, 'CHL_cv'] *= 1.8
    df_all['AOD_cv'] = 0.15 + np.random.rand(len(date_rng)) * 0.1
    df_all['Wind_Speed_cv'] = 5 + (1-month_cycle)*3 + np.random.rand(len(date_rng)) * 1.5
    df_all['wet_deposition_cv'] = (0.5 + month_cycle * 1.5 + np.random.rand(len(date_rng)) * 0.2) * 1e-10
    df_all['dry_deposition_cv1'] = (1.0 + (1-month_cycle) * 2.0 + np.random.rand(len(date_rng)) * 0.5) * 1e-10


# Coordinates for Cabo Verde
lat = 16.0
lon = -23.75

# lat = 16.0
# lon = -23.75

# Filter data
df = df_all[(df_all['Latitude_cv'] == lat) & (df_all['Longitude_cv'] == lon)].copy()
df.set_index('time', inplace=True)
df.sort_index(inplace=True)

# -------------------------------------------
# 2. Define colors and titles
# -------------------------------------------
# Reordered to put CHL_cv first
variables_colors = {
    'CHL_cv':      {'pos': '#2ca02c', 'neg': "#b2c09c"},
    'SST_cv':      {'pos': "#ff260e", 'neg': "#68b4ea"},  
    'AOD_cv':      {'pos': "#4f0906", 'neg': "#393ed6bf"},
    'Wind_Speed_cv': {'pos': "#ca2b0b", 'neg': "#3d7bf0"},
    'wet_deposition_cv': {'pos': "#0F0064", 'neg': "#515BE1"}, 
    'dry_deposition_cv': {'pos': "#95613C", 'neg': '#DEB887'}
}

# Reordered to match variables_colors
plot_titles = { 
    'CHL_cv': 'a)', 
    'SST_cv': 'b)', 
    'AOD_cv': 'c)', 
    'Wind_Speed_cv': 'd)', 
    'wet_deposition_cv': 'e)', 
    'dry_deposition_cv': 'f)' 
}

# -------------------------------------------
# 3. Define traditional seasons
# -------------------------------------------
def assign_standard_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

# List of traditional seasons
seasons = ['Winter', 'Spring', 'Summer', 'Fall']
# This line automatically picks up the new order from variables_colors
variables = list(variables_colors.keys()) 

# -------------------------------------------
# 4. Create figure (4 rows for 4 seasons)
# -------------------------------------------
# Figure size is set for 6 columns
fig, axs = plt.subplots(len(seasons), len(variables), figsize=(25, 14), constrained_layout=True)
axs = axs.reshape(len(seasons), len(variables)) # Ensure axs is 2D

# Left-aligned title
fig.suptitle(
    "",
    fontsize=30,
    fontfamily='Times New Roman',
    x=0.01, ha='left'
)

# -------------------------------------------
# 5. Main plotting loop
# -------------------------------------------
for i, season in enumerate(seasons):
    for j, var in enumerate(variables):
        ax = axs[i, j]
        
        # Check if the variable exists in the dataframe
        if var not in df.columns:
            ax.set_title(f'Variable {var} not found')
            ax.axis('off')
            continue
            
        var_df = df[[var]].dropna().copy()

        if var == 'CHL_cv': # Note: corrected the variable name from 'CHL_cv1' to 'CHL_cv' based on dictionary keys
            upper_clip = var_df[var].quantile(0.99)
            var_df[var] = var_df[var].clip(lower=0.05, upper=upper_clip)

        # Seasonal subset
        var_df['season_label'] = var_df.index.month.map(assign_standard_season)
        season_data = var_df[var_df['season_label'] == season]

        if season_data.empty:
            ax.set_title(f'No {season} data for {var}')
            ax.axis('off')
            continue

        # Seasonal anomaly
        yearly_seasonal_mean = season_data.groupby(season_data.index.year)[var].mean()
        
        # Ensure we only use years that exist in the index
        valid_baseline_years = yearly_seasonal_mean.index[
            (yearly_seasonal_mean.index >= 1998) & (yearly_seasonal_mean.index <= 2020)
        ]
        
        if valid_baseline_years.empty:
             ax.set_title(f'No baseline data (1998-2020) for {var}')
             ax.axis('off')
             continue

        baseline_years = yearly_seasonal_mean.loc[valid_baseline_years]
        climatology = baseline_years.mean()
        anomalies = yearly_seasonal_mean - climatology

        # Bars
        ax.axhline(0, color='k', linestyle='--', alpha=0.6)
        positive_mask = anomalies >= 0

        ax.bar(anomalies.index[positive_mask], anomalies[positive_mask], 
               color=variables_colors[var]['pos'], width=0.8)
        ax.bar(anomalies.index[~positive_mask], anomalies[~positive_mask], 
               color=variables_colors[var]['neg'], width=0.8)

        # 5-year moving average
        moving_avg = anomalies.rolling(window=5, center=True, min_periods=1).mean()
        ax.plot(moving_avg.index, moving_avg.values, color='black', linestyle='--', linewidth=2, marker='o', markersize=3)

        # Labels and formatting
        if i == 0:
            ax.set_title(plot_titles.get(var, var), fontsize=TITLE_FONTSIZE, fontweight='bold', loc= 'left', fontfamily='Times New Roman')
        if j == 0:
            ax.set_ylabel(season, fontsize=YLABEL_FONTSIZE, fontweight='bold', fontfamily='Times New Roman')

        ax.grid(axis='y', linestyle='--', alpha=0.5)
        ax.set_xlim(1997.5, 2024.5)
        ax.xaxis.set_major_locator(plt.MaxNLocator(integer=True, nbins=15))

        # --- Y-Axis Readability Improvement ---
        # Set a fixed number of ticks (e.g., 5-7) to prevent overlap
        ax.yaxis.set_major_locator(plt.MaxNLocator(nbins=6))
        
        # Explicitly set the tick label font size
        ax.tick_params(axis='y', labelsize=TICK_FONTSIZE)
        # ------------------------------------

        # Show year ticks only at bottom row
        if i == len(seasons) - 1:
            ax.tick_params(axis='x', rotation=90, labelsize=TICK_FONTSIZE) # Apply TICK_FONTSIZE here
        else:
            ax.tick_params(axis='x', labelbottom=False)

# -------------------------------------------
# 6. Shared x-axis label
# -------------------------------------------
fig.supxlabel("Year", fontsize=SUPX_FONTSIZE, fontweight='bold', fontfamily='Times New Roman')

# -------------------------------------------
# 7. Save or show
# -------------------------------------------
# plt.savefig('SEASONAL_ANOMALIES_CHECKED_GOOD/Seasonal_Anomalies_CV_box_OX_6vars_CHL_first_readabl.png', dpi=360, bbox_inches='tight', facecolor='white')
plt.show()