In [37]:
import matplotlib.pyplot as plt
import numpy as np
import ptitprince as pt
import numpy as np
import seaborn as sns
import pandas as pd
import os

plt.rcParams.update({"font.size": 20})


In [38]:
def read_water_gage_station(filename):
    df = pd.read_csv(
        filename,
        index_col="Datetime",
    )
    return df


WaveDataStation42002 = read_water_gage_station("WaveDataStation42002.csv")
WaveDataStation42019 = read_water_gage_station("WaveDataStation42019.csv")
WaveDataStation42020 = read_water_gage_station("WaveDataStation42020.csv")


In [39]:
datasets = {
    "Station 42002": WaveDataStation42002,
    "Station 42019": WaveDataStation42019,
    "Station 42020": WaveDataStation42020,
}


In [40]:
parameters = [
    "Wind Speed (m/s)",
    "Peak Gust Speed (m/s)",
    "Significant Wave Height (meters)",
    "Dominant Wave Period (seconds)",
    "Average Wave Period (seconds)",
    "Wave Direction (degrees)",
    "Air Temperature (Celsius)",
    "Sea Surface Temperature (Celsius)",
    "Dewpoint Temperature (Celsius)",
]


In [41]:
month_map = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December",
}

month_order = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]

rename_columns = {
    "YYYY": "Year",
    "MM": "Month",
    "DD": "Day",
    "hh": "Hour",
    "mn": "Minute",
    "WDIR": "Wind Direction (degrees)",
    "WSPD": "Wind Speed (m/s)",
    "GST": "Peak Gust Speed (m/s)",
    "WVHT": "Significant Wave Height (meters)",
    "DPD": "Dominant Wave Period (seconds)",
    "APD": "Average Wave Period (seconds)",
    "MWD": "Wave Direction (degrees)",
    "BAR": "PRES",
    "ATMP": "Air Temperature (Celsius)",
    "WTMP": "Sea Surface Temperature (Celsius)",
    "DEWP": "Dewpoint Temperature (Celsius)",
}


In [42]:
for dataset_name, df in datasets.items():
    df.rename(columns=rename_columns, inplace=True)
    df["Month"] = df["Month"].map(month_map)
    df["Month"] = pd.Categorical(df["Month"], categories=month_order, ordered=True)
    df["Year"] = pd.Categorical(df["Year"])
    datasets[dataset_name] = df


In [43]:
def create_directory_for_plots(dataset_name, categorical_parameter):
    base_directory_path = "Results/Plots"
    if categorical_parameter == "Month":
        directory = os.path.join(base_directory_path, dataset_name, "monthly")
    if categorical_parameter == "Year":
        directory = os.path.join(base_directory_path, dataset_name, "yearly")
    os.makedirs(directory, exist_ok=True)
    return directory


def generate_filenames(numerical_parameter, categorical_parameter, directory):
    if categorical_parameter == "Year":
        filename = (
            f"{directory}/{numerical_parameter}_yearly_raincloud_plot.png".replace(
                "(m/s)", "mـs"
            )
            .replace("(km/h)", "kmh")
            .replace("(m/s)", "m_s")
        )
    if categorical_parameter == "Month":
        filename = (
            f"{directory}/{numerical_parameter}_monthly_raincloud_plot.png".replace(
                "(m/s)", "mـs"
            )
            .replace("(km/h)", "kmh")
            .replace("(m/s)", "m_s")
        )
    return filename


In [44]:
def clean_data(df, numerical_parameter):
    df = df.dropna(subset=[numerical_parameter])
    return df


In [45]:
def plot_raincloud(
    categorical_parameter,
    numerical_parameter,
    df,
    sigma=0.1,
    ort="h",
    pal="Set2",
    filename="raincloud_plot.png",
):
    if categorical_parameter == "Year":
        plt.rcParams.update({"font.size": 16})

    if categorical_parameter == "Month":
        plt.rcParams.update({"font.size": 20})

    df = clean_data(df, numerical_parameter)
    fig, ax = plt.subplots(figsize=(10, 10))
    pt.RainCloud(
        x=categorical_parameter,
        y=numerical_parameter,
        data=df,
        bw=sigma,
        width_viol=0.7,
        width_box=0.15,
        orient=ort,
        move=0.0,
        palette=pal,
        pointplot=True,
        point_size=0.01,
        box_showfliers=True,
        box_flierprops={"markerfacecolor": "r", "marker": "o", "markersize": 3},
    )

    ax.set_title(f"Raincloud Plot of {numerical_parameter} ")
    ax.set_ylabel("")
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()


In [46]:
def process_datasets_and_plot(datasets, parameters):
    for dataset_name, df in datasets.items():
        print(dataset_name)
        for numerical_parameter in parameters:
            # for categorical_parameter in ['Month', 'Year']:
            for categorical_parameter in ['Year']:
                directory = create_directory_for_plots(dataset_name, categorical_parameter)
                filename = generate_filenames(numerical_parameter, categorical_parameter ,directory)
                plot_raincloud(
                    categorical_parameter, numerical_parameter, df, sigma=0.1, ort="h", pal="Set2", filename=filename
                )
        

In [47]:
process_datasets_and_plot(datasets, parameters)


Station 42002
Station 42019
Station 42020
