In [None]:
# -----********************-----

# Created Time: 2024/10/15

# Author: Tara Liu, Yiyi He

# Use Case

# This notebook condust Exploratory data analysis (EDA) on ERA5 climate data and hourly ESMI voltage data and created plots.
# 1. Load ERA5 climate data and ESMI voltage data
# 2. Plot and save seasonal trends of climate data over multiple years
# 3. Choropleth map of climate data


# -----********************-----

In [None]:
from shapely.geometry import Point, Polygon
import geopandas as gpd
import calendar
import pandas as pd
import numpy as np
import os
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 500)
pd.options.display.max_rows = 1000

In [None]:

stations = pd.read_csv('../../data/STATION.csv')

input_dir = '../../data/yearly_climate_by_station/1'

files = os.listdir(input_dir)
df = pd.DataFrame()
for file in files:
	df = pd.concat([df, pd.read_csv(os.path.join(input_dir, file))])

df.drop(['From date', 'To date', 'ESMI_ID'], axis=1, inplace=True)

df['date'] = df['date'].astype(str)
df['time'] = df['time'].astype(str).apply(lambda x: x.zfill(4))
df['datetime'] = pd.to_datetime(df['date'] + df['time'], format='%Y%m%d%H%M')

df.info()
df.set_index('datetime', inplace=True)

In [None]:


df['year'] = df.index.year
df['month'] = df.index.month

def get_season(month):
    # spring 3-5, summer 6-8, monsoon 9-11, winter 12-2
    if month in [1, 2, 3]:
        return 'Winter'
    elif month in [4, 5, 6]:
        return 'Summer'
    elif month in [7, 8, 9]:
        return 'Monsoon'
    else:
        return 'Post-monsoon'


df['season'] = df['month'].apply(get_season)

plt.style.use('seaborn-darkgrid')  

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle(
    'Seasonal Trends of ERA5-Land Data Over Multiple Years', fontsize=16, y=1.02)

season_colors = {'Winter': 'tab:blue', 'Summer': 'tab:orange',
                 'Monsoon': 'tab:green', 'Post-monsoon': 'tab:red'}


def plot_seasonal_trend(ax, variable, title, ylabel):
    for year, group in df.groupby('year'):
        for season, season_group in group.groupby('season'):
            ax.plot(season_group.index.dayofyear, season_group[variable],
                    color=season_colors[season], label=f'{season} {year}' if year == group['year'].min() else "")

    ax.set_title(title, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=12)
    ax.legend(loc='upper right', fontsize=8)
    ax.grid(True)


plot_seasonal_trend(axes[0, 0], 't2m', 'Temperature (t2m)', 'Temperature (°C)')
plot_seasonal_trend(axes[0, 1], 'u10', 'Wind U Component', 'Wind U (m/s)')
plot_seasonal_trend(axes[1, 0], 'v10', 'Wind V Component', 'Wind V (m/s)')
plot_seasonal_trend(
    axes[1, 1], 'tp', 'Total Precipitation (tp)', 'Precipitation (mm)')

for ax in axes.flat:
    ax.set_xlabel('Day of Year', fontsize=12)

plt.tight_layout(pad=2)

plt.savefig('../../data/seasonal_trends_multiple_years_stack.png', dpi=300)