In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read the dataset
file_path = '../../data/2009_SCIENCE_GARDEN.csv'
df = pd.read_csv(file_path)

In [None]:
# Display the first few rows of the dataset
df.head()


In [None]:
# Convert 'DATE' column to datetime for easier time series analysis
df['DATE'] = pd.to_datetime(df['DATE'])

In [None]:
# Summary of the dataset (basic statistics)
df.describe()

In [None]:
# Visualize the data
# Plot daily rainfall
plt.figure(figsize=(10, 5))
plt.plot(df['DATE'], df['DAILY RAINFALL'], color='blue', label='Daily Rainfall')
plt.title('Daily Rainfall in 2009')
plt.xlabel('Date')
plt.ylabel('Rainfall (mm)')
plt.legend()
plt.show()

In [None]:
# Visualize the data
# Plot daily rainfall
plt.figure(figsize=(10, 5))
plt.plot(df['DATE'], df['DAILY RAINFALL'], color='blue', label='Daily Rainfall')
plt.axvline(pd.Timestamp('2009-09-27'), color='red', linestyle='--', label='Tropical Cyclone Ondoy')
plt.title('Daily Rainfall in 2009')
plt.xlabel('Date')
plt.ylabel('Rainfall (mm)')
plt.legend()
plt.show()

In [None]:
# 1. Histogram of Daily Rainfall
plt.figure(figsize=(10, 5))
sns.histplot(df['DAILY RAINFALL'], bins=30, kde=True, color='blue')
plt.title('Histogram of Daily Rainfall in 2009')
plt.xlabel('Daily Rainfall (mm)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# 2. Column chart that accumulates rainfall per month
# Add a 'Month' column for grouping
df['Month'] = df['DATE'].dt.month
monthly_rainfall = df.groupby('Month')['DAILY RAINFALL'].sum()

plt.figure(figsize=(10, 5))
monthly_rainfall.plot(kind='bar', color='green')
plt.title('Total Rainfall per Month in 2009')
plt.xlabel('Month')
plt.ylabel('Total Rainfall (mm)')
plt.xticks(rotation=0)
plt.show()

In [None]:
# 3. Column chart that accumulates rainfall per season
# Define seasons based on given criteria
def get_season(date):
    month = date.month
    if month in [6, 7, 8, 9]:
        return 'Wet Season (JJAS)'
    elif month in [10, 11, 12, 1, 2]:
        return 'Cool Dry Seasos (ONDF)'
    else:
        return 'Hot Dry Season (MAM)'

df['Season'] = df['DATE'].apply(get_season)
seasonal_rainfall = df.groupby('Season')['DAILY RAINFALL'].sum()

plt.figure(figsize=(10, 5))
seasonal_rainfall.plot(kind='bar', color='purple')
plt.title('Total Rainfall per Season in 2009')
plt.xlabel('Season')
plt.ylabel('Total Rainfall (mm)')
plt.xticks(rotation=0)
plt.show()

In [None]:
# Monthly boxplot for Mean Temperature
plt.figure(figsize=(10, 6))
sns.boxplot(x=df['Month'], y=df['TEMP MEAN'])
plt.title('Monthly Boxplots for Mean Temperature in 2009')
plt.xlabel('Month')
plt.ylabel('Mean Temperature (°C)')
plt.show()
