In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os

# Column names as variables
date_col = 'date'
avg_temp_col = 'avg_temperature'
precipitation_col = 'precipitation'
avg_pressure_col = 'avg_pressure_sea'
solar_radiation_col = 'solar_radiation'
max_temp_col = 'max_temperature'
min_temp_col = 'min_temperature'
max_wind_speed_col = 'max_wind_speed'
avg_humidity_col = 'avg_relative_humidity'
max_humidity_col = 'max_relative_humidity'
min_humidity_col = 'min_relative_humidity'
extreme_weather_event_treshold=0.99
# Load the dataset
file_path = '/content/weatherstats_ottawa_daily.csv'
data = pd.read_csv(file_path)

# Create a directory for saving plots
plots_dir = "/content/drive/MyDrive/CSI6900/plots"
os.makedirs(plots_dir, exist_ok=True)

# Convert the date column to datetime
data[date_col] = pd.to_datetime(data[date_col])

# Define seasons
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'
data['year'] = data[date_col].dt.year
data['month'] = data[date_col].dt.month
data['season'] = data['month'].apply(get_season)

# Set uniform plot style
plt.style.use('seaborn-darkgrid')
font = {'family': 'serif', 'weight': 'bold', 'size': 14}
plt.rc('font', **font)

# Define a uniform color palette
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

# Plot 1: Time series of average daily temperature
plt.figure(figsize=(14, 7))
plt.plot(data[date_col], data[avg_temp_col], label='Average Temperature', color=colors[0])
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.title('Average Daily Temperature Over Time')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'average_daily_temperature.png'))
plt.close()

# Plot 2: Time series of daily precipitation
plt.figure(figsize=(14, 7))
plt.plot(data[date_col], data[precipitation_col], label='Precipitation', color=colors[1])
plt.xlabel('Date')
plt.ylabel('Precipitation (mm)')
plt.title('Daily Precipitation Over Time')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'daily_precipitation.png'))
plt.close()

# Plot 3: Time series of average daily pressure
plt.figure(figsize=(14, 7))
plt.plot(data[date_col], data[avg_pressure_col], label='Average Pressure at Sea Level', color=colors[2])
plt.xlabel('Date')
plt.ylabel('Pressure (kPa)')
plt.title('Average Daily Pressure Over Time')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'average_daily_pressure.png'))
plt.close()

# Plot 4: Time series of daily solar radiation
plt.figure(figsize=(14, 7))
plt.plot(data[date_col], data[solar_radiation_col], label='Solar Radiation', color=colors[3])
plt.xlabel('Date')
plt.ylabel('Solar Radiation (MJ/m²)')
plt.title('Daily Solar Radiation Over Time')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'daily_solar_radiation.png'))
plt.close()

# Plot 5: Histogram of average daily temperature
plt.figure(figsize=(14, 7))
plt.hist(data[avg_temp_col].dropna(), bins=30, edgecolor='k', alpha=0.7, color=colors[0])
plt.xlabel('Average Temperature (°C)')
plt.ylabel('Frequency')
plt.title('Distribution of Average Daily Temperature')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'distribution_avg_temp.png'))
plt.close()

# Plot 6: Histogram of daily precipitation
plt.figure(figsize=(14, 7))
plt.hist(data[precipitation_col].dropna(), bins=30, edgecolor='k', alpha=0.7, color=colors[1])
plt.xlabel('Precipitation (mm)')
plt.ylabel('Frequency')
plt.title('Distribution of Daily Precipitation')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'distribution_precipitation.png'))
plt.close()

# Plot 7: Histogram of average daily pressure
plt.figure(figsize=(14, 7))
plt.hist(data[avg_pressure_col].dropna(), bins=30, edgecolor='k', alpha=0.7, color=colors[2])
plt.xlabel('Pressure (kPa)')
plt.ylabel('Frequency')
plt.title('Distribution of Average Daily Pressure')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'distribution_avg_pressure.png'))
plt.close()

# Plot 8: Histogram of daily solar radiation
plt.figure(figsize=(14, 7))
plt.hist(data[solar_radiation_col].dropna(), bins=30, edgecolor='k', alpha=0.7, color=colors[3])
plt.xlabel('Solar Radiation (MJ/m²)')
plt.ylabel('Frequency')
plt.title('Distribution of Daily Solar Radiation')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'distribution_solar_radiation.png'))
plt.close()

# Plot 9: Heatmap of correlations between key variables
plt.figure(figsize=(12, 10))
corr_matrix = data[[avg_temp_col, precipitation_col, avg_pressure_col, solar_radiation_col]].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap of Key Variables')
plt.savefig(os.path.join(plots_dir, 'correlation_heatmap.png'))
plt.close()

# Plot 10: Scatter plot of average temperature vs precipitation
plt.figure(figsize=(14, 7))
sns.scatterplot(x=avg_temp_col, y=precipitation_col, data=data, alpha=0.5, color=colors[0])
plt.xlabel('Average Temperature (°C)')
plt.ylabel('Precipitation (mm)')
plt.title('Average Temperature vs Precipitation')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'scatter_temp_vs_precip.png'))
plt.close()

# Plot 11: Scatter plot of average temperature vs solar radiation
plt.figure(figsize=(14, 7))
sns.scatterplot(x=avg_temp_col, y=solar_radiation_col, data=data, alpha=0.5, color=colors[3])
plt.xlabel('Average Temperature (°C)')
plt.ylabel('Solar Radiation (MJ/m²)')
plt.title('Average Temperature vs Solar Radiation')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'scatter_temp_vs_solar.png'))
plt.close()

# Plot 12: Scatter plot of average pressure vs solar radiation
plt.figure(figsize=(14, 7))
sns.scatterplot(x=avg_pressure_col, y=solar_radiation_col, data=data, alpha=0.5, color=colors[2])
plt.xlabel('Average Pressure (kPa)')
plt.ylabel('Solar Radiation (MJ/m²)')
plt.title('Average Pressure vs Solar Radiation')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'scatter_pressure_vs_solar.png'))
plt.close()

# Plot 13: Monthly average temperature trend over years
data['year'] = data[date_col].dt.year
data['month'] = data[date_col].dt.month
monthly_avg_temp = data.groupby(['year', 'month'])[avg_temp_col].mean().unstack()

plt.figure(figsize=(14, 7))
sns.heatmap(monthly_avg_temp, cmap='coolwarm', annot=False)
plt.title('Monthly Average Temperature Trend Over Years')
plt.xlabel('Month')
plt.ylabel('Year')
plt.savefig(os.path.join(plots_dir, 'monthly_avg_temp_trend.png'))
plt.close()

# Plot 14: Monthly total precipitation over years
monthly_total_precip = data.groupby(['year', 'month'])[precipitation_col].sum().unstack()

plt.figure(figsize=(14, 7))
sns.heatmap(monthly_total_precip, cmap='Blues', annot=False)
plt.title('Monthly Total Precipitation Over Years')
plt.xlabel('Month')
plt.ylabel('Year')
plt.savefig(os.path.join(plots_dir, 'monthly_total_precipitation.png'))
plt.close()

# Plot 15: Box plot of average temperature by month
plt.figure(figsize=(14, 7))
sns.boxplot(x='month', y=avg_temp_col, data=data, palette=colors)
plt.xlabel('Month')
plt.ylabel('Average Temperature (°C)')
plt.title('Average Temperature by Month')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'boxplot_avg_temp.png'))
plt.close()

# Plot 16: Box plot of precipitation by month
plt.figure(figsize=(14, 7))
sns.boxplot(x='month', y=precipitation_col, data=data, palette=colors)
plt.xlabel('Month')
plt.ylabel('Precipitation (mm)')
plt.title('Precipitation by Month')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'boxplot_precipitation.png'))
plt.close()

# Plot 17: Monthly max temperature trend over years
monthly_max_temp = data.groupby(['year', 'month'])[max_temp_col].max().unstack()

plt.figure(figsize=(14, 7))
sns.heatmap(monthly_max_temp, cmap='coolwarm', annot=False)
plt.title('Monthly Max Temperature Trend Over Years')
plt.xlabel('Month')
plt.ylabel('Year')
plt.savefig(os.path.join(plots_dir, 'monthly_max_temp_trend.png'))
plt.close()

# Plot 18: Annual average precipitation trends
annual_avg_precip = data.groupby('year')[precipitation_col].mean()

plt.figure(figsize=(14, 7))
annual_avg_precip.plot(kind='bar', color=colors[1])
plt.xlabel('Year')
plt.ylabel('Average Precipitation (mm)')
plt.title('Annual Average Precipitation Trends')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'annual_avg_precipitation.png'))
plt.close()

# Plot 19: Seasonal total precipitation trends
seasonal_total_precip = data.groupby(['year', 'season'])[precipitation_col].sum().unstack()

plt.figure(figsize=(14, 7))
ax = seasonal_total_precip.plot(kind='bar', stacked=True, color=colors[:4], figsize=(14, 7))
plt.xlabel('Year')
plt.ylabel('Total Precipitation (mm)')
plt.title('Seasonal Total Precipitation Trends')
plt.legend(title='Season')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'seasonal_total_precipitation.png'))
plt.close()

# Plot 20: Monthly average solar radiation
monthly_avg_solar_radiation = data.groupby('month')[solar_radiation_col].mean()

plt.figure(figsize=(14, 7))
monthly_avg_solar_radiation.plot(kind='bar', color=colors[3])
plt.xlabel('Month')
plt.ylabel('Average Solar Radiation (MJ/m²)')
plt.title('Monthly Average Solar Radiation')
plt.grid(True)
plt.savefig(os.path.join(plots_dir, 'monthly_avg_solar_radiation.png'))
plt.close()

# Define thresholds for extreme events
extreme_max_temp_threshold = data[max_temp_col].quantile(extreme_weather_event_treshold)
extreme_min_temp_threshold = data[min_temp_col].quantile(1-extreme_weather_event_treshold)
extreme_precipitation_threshold = data[precipitation_col].quantile(extreme_weather_event_treshold)
extreme_wind_speed_threshold = data[max_wind_speed_col].quantile(extreme_weather_event_treshold)

# Calculate the number of extreme events per year
extreme_events = data.copy()
extreme_events['extreme_max_temp'] = extreme_events[max_temp_col] > extreme_max_temp_threshold
extreme_events['extreme_min_temp'] = extreme_events[min_temp_col] < extreme_min_temp_threshold
extreme_events['extreme_precipitation'] = extreme_events[precipitation_col] > extreme_precipitation_threshold
extreme_events['extreme_wind_speed'] = extreme_events[max_wind_speed_col] > extreme_wind_speed_threshold

extreme_events_per_year = extreme_events.groupby('year').agg({
    'extreme_max_temp': 'sum',
    'extreme_min_temp': 'sum',
    'extreme_precipitation': 'sum',
    'extreme_wind_speed': 'sum'
})

# Plot the trends of extreme weather events
fig, axes = plt.subplots(4, 1, figsize=(14, 28), sharex=True)

# Extreme max temperature events
axes[0].bar(extreme_events_per_year.index, extreme_events_per_year['extreme_max_temp'], color=colors[3])
axes[0].set_ylabel('Extreme Max Temp Events')
axes[0].set_title('Extreme Max Temperature Events per Year')

# Extreme min temperature events
axes[1].bar(extreme_events_per_year.index, extreme_events_per_year['extreme_min_temp'], color=colors[0])
axes[1].set_ylabel('Extreme Min Temp Events')
axes[1].set_title('Extreme Min Temperature Events per Year')

# Extreme precipitation events
axes[2].bar(extreme_events_per_year.index, extreme_events_per_year['extreme_precipitation'], color=colors[1])
axes[2].set_ylabel('Extreme Precipitation Events')
axes[2].set_title('Extreme Precipitation Events per Year')

# Extreme wind speed events
axes[3].bar(extreme_events_per_year.index, extreme_events_per_year['extreme_wind_speed'], color=colors[2])
axes[3].set_xlabel('Year')
axes[3].set_ylabel('Extreme Wind Speed Events')
axes[3].set_title('Extreme Wind Speed Events per Year')

plt.tight_layout()
plt.savefig(os.path.join(plots_dir, 'extreme_weather_events.png'))
plt.close()

# Calculate annual average, maximum, and minimum relative humidity
annual_avg_humidity = data.groupby('year')[avg_humidity_col].mean()
annual_max_humidity = data.groupby('year')[max_humidity_col].mean()
annual_min_humidity = data.groupby('year')[min_humidity_col].mean()

# Plot the trends of humidity over years
fig, axes = plt.subplots(3, 1, figsize=(14, 21), sharex=True)

# Annual average relative humidity
axes[0].plot(annual_avg_humidity.index, annual_avg_humidity, marker='o', color=colors[0])
axes[0].set_ylabel('Average Relative Humidity (%)')
axes[0].set_title('Annual Average Relative Humidity')
axes[0].grid(True)

# Annual maximum relative humidity
axes[1].plot(annual_max_humidity.index, annual_max_humidity, marker='o', color=colors[2])
axes[1].set_ylabel('Maximum Relative Humidity (%)')
axes[1].set_title('Annual Maximum Relative Humidity')
axes[1].grid(True)

# Annual minimum relative humidity
axes[2].plot(annual_min_humidity.index, annual_min_humidity, marker='o', color=colors[3])
axes[2].set_xlabel('Year')
axes[2].set_ylabel('Minimum Relative Humidity (%)')
axes[2].set_title('Annual Minimum Relative Humidity')
axes[2].grid(True)

plt.tight_layout()
plt.savefig(os.path.join(plots_dir, 'annual_humidity_trends.png'))
plt.close()





  data = pd.read_csv(file_path)
  plt.style.use('seaborn-darkgrid')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='month', y=avg_temp_col, data=data, palette=colors)
The palette list has fewer values (10) than needed (12) and will cycle, which may produce an uninterpretable plot.
  sns.boxplot(x='month', y=avg_temp_col, data=data, palette=colors)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='month', y=precipitation_col, data=data, palette=colors)
The palette list has fewer values (10) than needed (12) and will cycle, which may produce an uninterpretable plot.
  sns.boxplot(x='month', y=precipitation_col, data=data, palette=colors)


<Figure size 1400x700 with 0 Axes>