In [1]:
import matplotlib.pyplot as plt
import numpy as np
import ptitprince as pt
import numpy as np
import seaborn as sns
import pandas as pd
import os

plt.rcParams.update({"font.size": 16})


In [2]:
def read_water_gage_station(filename):
    df = pd.read_csv(
        filename,
        index_col="time",
    )
    return df


MeteostatStationLNFT5 = read_water_gage_station("Meteostat_LNFT5.csv")
MeteostatStationKBYY0 = read_water_gage_station("Meteostat_KBYY0.csv")
MeteostatStationKLBX0 = read_water_gage_station("Meteostat_KLBX0.csv")


In [3]:
datasets = {
    "Station LNFT5": MeteostatStationLNFT5,
    "Station KBYY0": MeteostatStationKBYY0,
    "Station KLBX0": MeteostatStationKLBX0,
}


In [4]:
parameters = [
    "Air Temperature (°C)",
    "Dew Point (°C)",
    "Relative Humidity (%)",
    "Total Precipitation (mm)",
    "Wind (From) Direction (Degrees)",
    "Average Wind Speed (m/s)",
    "Sea-Level Air Pressure (hPa)",
]


In [5]:
month_map = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December",
}

month_order = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]

rename_columns = {
    "air_temperature": "Air Temperature (°C)",
    "dewpoint": "Dew Point (°C)",
    "relative_humidity": "Relative Humidity (%)",
    "total_precipitation": "Total Precipitation (mm)",
    "snow_depth": "Snow Depth (m)",
    "wind_direction": "Wind (From) Direction (Degrees)",
    "average_wind_speed": "Average Wind Speed (km/h)",
    "wind_peak_gust": "Wind Peak Gust (km/h)",
    "sea_level_air_pressure": "Sea-Level Air Pressure (hPa)",
    "total_sunshine_duration": "Total Sunshine Duration (Minutes)",
    "weather_condition_code": "Weather Condition Code",
}


In [6]:
for dataset_name, df in datasets.items():
    df.rename(columns=rename_columns, inplace=True)
    for col in rename_columns.values():
        df[col] = pd.to_numeric(df[col], errors="coerce")

    df.index = pd.to_datetime(df.index)
    df.insert(0, "Year", df.index.year)
    df.insert(1, "Month", df.index.month)
    df["Month"] = df["Month"].map(month_map)
    df["Month"] = pd.Categorical(df["Month"], categories=month_order, ordered=True)
    df["Year"] = pd.Categorical(df["Year"])
    df["Average Wind Speed (km/h)"] = df["Average Wind Speed (km/h)"] * 1000 / 3600
    df.rename(
        columns={"Average Wind Speed (km/h)": "Average Wind Speed (m/s)"}, inplace=True
    )

    datasets[dataset_name] = df


In [7]:
def create_directory_for_plots(dataset_name, categorical_parameter):
    base_directory_path = "Results/Plots"
    if categorical_parameter == "Month":
        directory = os.path.join(base_directory_path, dataset_name, "monthly")
    if categorical_parameter == "Year":
        directory = os.path.join(base_directory_path, dataset_name, "yearly")
    os.makedirs(directory, exist_ok=True)
    return directory


def generate_filenames(numerical_parameter, categorical_parameter, directory):
    if categorical_parameter == "Year":
        filename = (
            f"{directory}/{numerical_parameter}_yearly_raincloud_plot.png".replace(
                "(m/s)", "mـs"
            )
            .replace("(km/h)", "kmh")
            .replace("(m/s)", "m_s")
        )
    if categorical_parameter == "Month":
        filename = (
            f"{directory}/{numerical_parameter}_monthly_raincloud_plot.png".replace(
                "(m/s)", "mـs"
            )
            .replace("(km/h)", "kmh")
            .replace("(m/s)", "m_s")
        )
    return filename


In [8]:
def clean_data(df, numerical_parameter):
    df = df.dropna(subset=[numerical_parameter])
    return df

In [9]:
def plot_raincloud(
    categorical_parameter,
    numerical_parameter,
    df,
    sigma=0.1,
    ort="h",
    pal="Set2",
    filename="raincloud_plot.png",
):
    
    df = clean_data(df, numerical_parameter)
    fig, ax = plt.subplots(figsize=(10, 10))
    pt.RainCloud(
        x=categorical_parameter,
        y=numerical_parameter,
        data=df,
        bw=sigma,
        width_viol=0.7,
        width_box=0.15,
        orient=ort,
        move=0.0,
        palette=pal,
        pointplot=True,
        point_size=0.01,
        box_showfliers=True,
        box_flierprops={"markerfacecolor": "r", "marker": "o", "markersize": 3},
    )

    ax.set_title(f"Raincloud Plot of {numerical_parameter} ")
    ax.set_ylabel("")
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()


In [10]:
def process_datasets_and_plot(datasets, parameters):
    for dataset_name, df in datasets.items():
        print(dataset_name)
        for numerical_parameter in parameters:
            for categorical_parameter in ['Month', 'Year']:
                directory = create_directory_for_plots(dataset_name, categorical_parameter)
                filename = generate_filenames(numerical_parameter, categorical_parameter ,directory)
                plot_raincloud(
                    categorical_parameter, numerical_parameter, df, sigma=0.1, ort="h", pal="Set2", filename=filename
                )
        

In [11]:
process_datasets_and_plot(datasets, parameters)


Station LNFT5
Station KBYY0
Station KLBX0
