# This program does the visualization and save the figures.

## Use the separately csv files of the county to manipulate the data.

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
county = "Kaohsiung_City"
path = f"../data/processed/{county}.csv"

try:
    df = pd.read_csv(path)
    df["date"] = pd.to_datetime(df["date"])
except FileNotFoundError:
    print("Can't find the county name.")

In [None]:
df.info()

In [None]:
# Check all of sitename
df["sitename"].unique()

In [None]:
# We don't need the county name, drop it off.
df = df.drop(columns="county")

In [None]:
# Let's check the NaN first
df.isna().sum()

In [None]:
# We can make a copy avoiding changing the original data.
df_copy = df.copy()

In [None]:
# Set the time period and sitename
start_time = "2024-08-01"
end_time = "2024-08-31"
sitename = "Zuoying"

# Catch the data in the time period and matching sitename
df_plot = df_copy[(df_copy["date"] > start_time) & (df_copy["date"] < end_time)]
df_plot = df_plot[
    df_plot["sitename"] == sitename
    ]

# create a day column to save the day
df_plot["day"] = df_plot["date"].dt.date
df_plot["month"] = df_plot["date"].dt.month
df_plot["year"] = df_plot["date"].dt.year
df_plot

# Draw the date data.

In [None]:
# Ascending the data via day
df_plot = df_plot.sort_values(by="day", ascending=True)

# Build the color corresponding to the aqi
y_label = "aqi"
df_mean = df_plot.groupby("day", as_index=False)[y_label].mean()

# Normalize will standardize the column value between 0 to 1
norm = plt.Normalize(df_mean[y_label].min(), df_mean[y_label].max())

# as_cmap makes the sns palette to colormap
colors = sns.color_palette("coolwarm", as_cmap=True)(norm(df_mean[y_label]))

# Draw the Boxplot
plt.figure(figsize=(14, 10))
ax = sns.boxplot(
    data=df_plot,
    x="day",
    y=y_label,
    # dict create a "day" -> color form
    palette=dict(zip(df_mean["day"], colors)),
    hue="day",
    legend=False,
)
plt.xticks(rotation=90)
plt.xlabel("DAY", fontsize=14)
plt.ylabel("AQI(Air Quality Index)", fontsize=14)
plt.ylim((0, 100))
plt.yticks(np.arange(0, 100, 5))
plt.title(f"AQI Distribution by Day in Taoyuan District ({start_time} - {end_time})", fontsize=18)
plt.grid(linestyle="--", linewidth=1, alpha=0.3)

# Save figure
path = f"../figure/{county}_{sitename}_{y_label}_{start_time}_day.png"
os.makedirs(os.path.dirname(path), exist_ok=True)
plt.savefig(path, dpi=600, bbox_inches="tight")
print("Figure has been saved.")

# show method must stand at the last position
plt.show()

# Draw the month data.

In [None]:
# Ascending the data via day
df_plot = df_plot.sort_values(by="month", ascending=True)

# Build the color corresponding to the aqi
y_label = "aqi"
df_mean = df_plot.groupby("month", as_index=False)[y_label].mean()

# Normalize will standardize the column value between 0 to 1
norm = plt.Normalize(df_mean[y_label].min(), df_mean[y_label].max())

# as_cmap makes the sns palette to colormap
colors = sns.color_palette("coolwarm", as_cmap=True)(norm(df_mean[y_label]))

# Draw the Boxplot
plt.figure(figsize=(14, 10))
ax = sns.boxplot(
    data=df_plot,
    x="month",
    y=y_label,
    # dict create a "day" -> color form
    palette=dict(zip(df_mean["month"], colors)),
    hue="month",
    legend=False,
)
plt.xticks()
plt.xlabel("Month", fontsize=14)
plt.ylabel("AQI(Air Quality Index)", fontsize=14)
plt.ylim((0, 180))
plt.yticks(np.arange(0, 180, 5))
plt.title(f"AQI Distribution by Month in Zuoying District ({start_time} - {end_time})", fontsize=18)
plt.grid(linestyle="--", linewidth=1, alpha=0.3)

# Save figure
path = f"../figure/{county}_{sitename}_{y_label}_{start_time}_month.png"
os.makedirs(os.path.dirname(path), exist_ok=True)
plt.savefig(path, dpi=600, bbox_inches="tight")
print("Figure has been saved.")

# show method must stand at the last position
plt.show()

# Draw the year data.

In [None]:
# Ascending the data via day
df_plot = df_plot.sort_values(by="year", ascending=True)

# Build the color corresponding to the aqi
y_label = "aqi"
df_mean = df_plot.groupby("year", as_index=False)[y_label].mean()

# Normalize will standardize the column value between 0 to 1
norm = plt.Normalize(df_mean[y_label].min(), df_mean[y_label].max())

# as_cmap makes the sns palette to colormap
colors = sns.color_palette("coolwarm", as_cmap=True)(norm(df_mean[y_label]))

# Draw the Boxplot
plt.figure(figsize=(14, 10))
ax = sns.boxplot(
    data=df_plot,
    x="year",
    y=y_label,
    # dict create a "day" -> color form
    palette=dict(zip(df_mean["year"], colors)),
    hue="year",
    legend=False,
)
plt.xticks()
plt.xlabel("Year", fontsize=14)
plt.ylabel("AQI(Air Quality Index)", fontsize=14)
plt.ylim((0, 230))
plt.yticks(np.arange(0, 230, 5))
plt.title(f"AQI Distribution by Year in Zuoying District ({start_time} - {end_time})", fontsize=18)
plt.grid(linestyle="--", linewidth=1, alpha=0.3)

# Save figure
path = f"../figure/{county}_{sitename}_{y_label}_{start_time}_year.png"
os.makedirs(os.path.dirname(path), exist_ok=True)
plt.savefig(path, dpi=600, bbox_inches="tight")
print("Figure has been saved.")

# show method must stand at the last position
plt.show()