In [22]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import calendar
import folium
import matplotlib.cm as cm
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import os
import pandas as pd
import re

plt.rcParams.update({"font.size": 24})


In [23]:
def read_water_gage_station(filename):
    df = pd.read_csv(
        filename,
        index_col="t",
        usecols=lambda column: column not in ["s", "f_x", "g", "f_y", "direction"],
    )
    return df


WaterGageStation8771972 = read_water_gage_station("WaterGageStation8771972.csv")
WaterGageStation8772440 = read_water_gage_station("WaterGageStation8772440.csv")
WaterGageStation8772447 = read_water_gage_station("WaterGageStation8772447.csv")
WaterGageStation8772471 = read_water_gage_station("WaterGageStation8772471.csv")
WaterGageStation8773767 = read_water_gage_station("WaterGageStation8773767.csv")


In [24]:
datasets = {
    "Station 8771972": WaterGageStation8771972,
    "Station 8772440": WaterGageStation8772440,
    "Station 8772447": WaterGageStation8772447,
    "Station 8772471": WaterGageStation8772471,
    "Station 8773767": WaterGageStation8773767,
}


In [25]:
month_map = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December",
}

month_order = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]

rename_columns = {
    "water_level": "Water Level (m)",
    "wind_speed": "Wind Speed (m/s)",
    "wind_direction": "Wind Direction (degrees)",
}


In [26]:
for dataset_name, df in datasets.items():
    df.rename(columns=rename_columns, inplace=True)
    for col in rename_columns.values():
        df[col] = pd.to_numeric(df[col], errors="coerce")

    df.index = pd.to_datetime(df.index)
    df.insert(0, "Year", df.index.year)
    df.insert(1, "Month", df.index.month)
    df["Month"] = df["Month"].map(month_map)
    df["Month"] = pd.Categorical(df["Month"], categories=month_order, ordered=True)
    df["Year"] = pd.Categorical(df["Year"])

    datasets[dataset_name] = df


In [27]:
def create_directory(base_path, subfolder):
    if base_path and subfolder:
        full_path = os.path.join(base_path, subfolder)
        os.makedirs(full_path, exist_ok=True)
    else:
        full_path = base_path
    return full_path


def adjust_dataframe(df, direction_column):
    orientation = 90 - 34
    df = df.copy()
    df[direction_column] = (df[direction_column] - orientation) % 360
    return df


def setup_plot(df, direction_column, magnitude_column):
    sns.set_theme(style="ticks")
    g = sns.JointGrid(
        data=df, x=direction_column, y=magnitude_column, marginal_ticks=True, space=0.5
    )
    return g


def customize_plot(g, df, direction_column, magnitude_column, title):
    cmap = sns.diverging_palette(230, 20, as_cmap=True)

    if True:
        cax = g.figure.add_axes([0.15, 0.55, 0.02, 0.2])
        cax.set_ylabel("Percent", rotation=90)
        g.plot_joint(
            sns.histplot, stat="percent", cmap=cmap, cbar=True, cbar_ax=cax, fill=True
        )
        cax.set_ylabel('Percent', rotation=90, fontsize=20)
    else:   
        g.plot_joint(sns.kdeplot, fill=True, thresh=0, levels=10, cmap=cmap)

    g.ax_joint.set(yscale="log")
    g.ax_marg_y.set_ylim(1, 20)
    g.ax_marg_x.set_xlim(0, 361)

    g.ax_marg_x.hist(
        df[direction_column], color="gray", alpha=1, bins=int(360 / 36), density=True
    )
    g.ax_marg_y.hist(
        df[magnitude_column],
        color="gray",
        alpha=1,
        bins=int((df[magnitude_column].max() - df[magnitude_column].min()) / 0.5),
        orientation="horizontal",
        density=True,
    )
    g.ax_marg_x.set_xlabel("Percent")
    g.ax_marg_y.set_ylabel("Percent")
    xlabel, ylabel = "Wind Approaching Island From ", ""
    ylabel = "Wind Speed (m/s)"
    g.set_axis_labels(xlabel, ylabel)
    g.ax_joint.xaxis.set_major_locator(ticker.FixedLocator([0, 90, 180, 270, 360]))
    g.ax_joint.xaxis.set_major_formatter(
        ticker.FuncFormatter(
            lambda val, pos: {
                0: "0",
                90: r"$\frac{\pi}{2}$",
                180: r"$\pi$",
                270: r"$\frac{3\pi}{2}$",
                360: r"$2\pi$",
            }.get(val, "")
        )
    )
    cm = 1 / 2.54
    g.fig.set_size_inches(20 * cm, 20 * cm)
    plt.suptitle(title, horizontalalignment="center", verticalalignment="top")


def save_plot(g, full_path, title):
    save_path = f"{title}_Heatmap.svg"
    plt.savefig(save_path)
    plt.close(g.fig)


def plot_kde(
    df, direction_column, magnitude_column, title, base_path=None, subfolder=None
):
    full_path = create_directory(base_path, subfolder)
    df_adjusted = adjust_dataframe(df, direction_column)
    g = setup_plot(df_adjusted, direction_column, magnitude_column)
    customize_plot(g, df_adjusted, direction_column, magnitude_column, title)
    save_plot(g, full_path, title)


In [28]:
base_path = os.getcwd()

column_pairs = [
    ("Wind Direction (degrees)", "Wind Speed (m/s)"),
]
for dataset_name, df in datasets.items():
    for direction_column, magnitude_column in column_pairs:
        title = dataset_name

        plot_kde(
            df,
            direction_column=direction_column,
            magnitude_column=magnitude_column,
            title=title,
            base_path=base_path,
            subfolder=dataset_name,
        )
