In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import os

# Set project root if not already there
project_root = '/Users/joaquinx/Documents/GitHub/aus-meteo-research'
if os.getcwd() != project_root:
    os.chdir(project_root)
    print("Working directory set to:", os.getcwd())

# Set the scaling factor and direction map for wind
scaling_factor = 22
direction_map = {'N': 0, 'NNE': 22.5, 'NE': 45, 'ENE': 67.5, 'E': 90, 'ESE': 112.5, 'SE': 135, 'SSE': 157.5,
                 'S': 180, 'SSW': 202.5, 'SW': 225, 'WSW': 247.5, 'W': 270, 'WNW': 292.5, 'NW': 315, 'NNW': 337.5}

# Configure Seaborn theme, font, and palette
sns.set_theme(style="whitegrid")
sns.set(font="DejaVu Sans")
sns.set_palette("crest")

In [10]:
def preprocess_data(file_path):
    data = pd.read_csv(file_path, encoding='ISO-8859-1')
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
    data.set_index('Date', inplace=True)
    data.replace('Calm', 0, inplace=True)
    data['9am wind angle'] = data['9am wind direction'].map(direction_map)
    for column in ['Minimum temperature (°C)', 'Maximum temperature (°C)', 'Rainfall (mm)',
                   'Speed of maximum wind gust (km/h)', '9am Temperature (°C)', '9am relative humidity (%)',
                   '9am wind speed (km/h)', '9am MSL pressure (hPa)', '3pm Temperature (°C)',
                   '3pm relative humidity (%)', '3pm wind speed (km/h)', '3pm MSL pressure (hPa)']:
        if column in data.columns:
            data[column] = pd.to_numeric(data[column], errors='coerce')
    return data

In [11]:
def plot_weather_data(data, title, output_path):
    plt.figure(figsize=(15, 24))
    
    # Determine column names
    min_temp_col = 'Minimum temperature (°C)' if 'Minimum temperature (°C)' in data.columns else 'Minimum temperature (C)'
    max_temp_col = 'Maximum temperature (°C)' if 'Maximum temperature (°C)' in data.columns else 'Maximum temperature (C)'

    # Temperature Plot
    plt.subplot(6, 1, 1)
    sns.lineplot(data=data, x=data.index, y=min_temp_col, label='Minimum Temperature (°C)')
    sns.lineplot(data=data, x=data.index, y=max_temp_col, label='Maximum Temperature (°C)')
    plt.ylabel('Temperature (°C)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_locator(mdates.DayLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    # Rainfall Plot with updated parameter
    plt.subplot(6, 1, 2)
    sns.barplot(x=data.index, y='Rainfall (mm)', data=data, errorbar=None, color="skyblue")
    plt.ylabel('Rainfall (mm)')
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_locator(mdates.DayLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    # Wind Speeds Plot
    plt.subplot(6, 1, 3)
    sns.lineplot(data=data, x=data.index, y='Speed of maximum wind gust (km/h)', label='Max Wind Gust Speed (km/h)')
    sns.lineplot(data=data, x=data.index, y='9am wind speed (km/h)', label='9am Wind Speed (km/h)', linestyle='--')
    sns.lineplot(data=data, x=data.index, y='3pm wind speed (km/h)', label='3pm Wind Speed (km/h)', linestyle=':')
    plt.ylabel('Wind Speed (km/h)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_locator(mdates.DayLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    # Relative Humidity Plot
    plt.subplot(6, 1, 4)
    sns.lineplot(data=data, x=data.index, y='9am relative humidity (%)', label='9am Relative Humidity (%)', linestyle='--')
    sns.lineplot(data=data, x=data.index, y='3pm relative humidity (%)', label='3pm Relative Humidity (%)', linestyle=':')
    plt.ylabel('Relative Humidity (%)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_locator(mdates.DayLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    # MSL Pressure Plot
    plt.subplot(6, 1, 5)
    sns.lineplot(data=data, x=data.index, y='9am MSL pressure (hPa)', label='9am MSL Pressure (hPa)', linestyle='--')
    sns.lineplot(data=data, x=data.index, y='3pm MSL pressure (hPa)', label='3pm MSL Pressure (hPa)', linestyle=':')
    plt.ylabel('MSL Pressure (hPa)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_locator(mdates.DayLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    # Wind Direction and Speed Plot
    plt.subplot(6, 1, 6)
    for i, (angle, speed) in enumerate(zip(data['9am wind angle'], data['9am wind speed (km/h)'])):
        if np.isnan(angle) or np.isnan(speed):
            continue
        dx = np.cos(np.radians(angle)) * speed / scaling_factor
        dy = np.sin(np.radians(angle)) * speed / scaling_factor
        plt.quiver(i, 0, dx, dy, angles='xy', scale_units='xy', scale=1, width=0.003, color='blue', alpha=0.7)
    plt.title('9am Wind Direction and Speed')
    plt.ylabel('Wind Direction (9am)')
    plt.xticks(range(len(data.index)), data.index.strftime('%m-%d'), rotation=45)
    plt.ylim(-1, 1)
    plt.gca().xaxis.set_major_locator(mdates.DayLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    plt.suptitle(title, fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig(output_path)  # Save the figure to the output path
    plt.close()  # Close the plot to free memory

In [12]:
# Create the output directory in the root if it doesn't exist
os.makedirs('output', exist_ok=True)

In [17]:
# Process each file in the data folder and save outputs in root output folder
data_folder = 'data'
for filename in os.listdir(data_folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(data_folder, filename)
        data = preprocess_data(file_path)
        title = f"Weather Observations - {filename.replace('.csv', '')}"
        
        # Generate output file path in the root output folder
        output_path = os.path.join('output', f"{filename.replace('.csv', '')}_weather_plot.png")
        
        # Plot and save the image
        plot_weather_data(data, title, output_path)