In [None]:
import pandas as pd
df = pd.read_excel('./data/data.xlsx')

df['date'] = pd.to_datetime(df['date'])

# Basic info
print(df.info())
print(df.describe())

In [None]:
print(df.isna().sum())

## Clean NaN values

In [None]:
# Drop rows with missing values
df = df.dropna()

print(df.isna().sum())
print("Length of data after dropping missing values: ", len(df))

In [None]:
print(df.describe())

In [None]:
import matplotlib.pyplot as plt

# Plot precipitation + irrigation over time
plt.figure(figsize=(10, 4))
plt.plot(df['date'], df['precipitation + irrigation (mm)'], label='Precip + Irrigation')
plt.title('Daily Water Input')
plt.xlabel('Date')
plt.ylabel('mm')
plt.legend()
plt.show()

# Plot soil moisture at various depths
plt.figure(figsize=(10, 6))
for depth_col in ['depth 10cm', 'depth 30cm', 'depth 60cm', 'depth 90cm']:
    plt.plot(df['date'], df[depth_col], label=depth_col)
plt.title('Soil Moisture at Different Depths')
plt.xlabel('Date')
plt.ylabel('Moisture Level')
plt.legend()
plt.show()


In [None]:
import seaborn as sns

# Select numeric columns for correlation
cols = [
    'precipitation + irrigation (mm)',
    'potential evapotranspiration (mm)',
    'depth 10cm',
    'depth 30cm',
    'depth 60cm',
    'depth 90cm',
    'actual evapotranspiration (mm)',
    'groundwater recharge (mm)'
]
corr = df[cols].corr()

plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()


In [None]:
# Save the cleaned data
df.to_csv('./data/cleaned_data.csv', index=False)

# Seasonal Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d', errors='coerce')
df = df.sort_values(by='date').reset_index(drop=True)

df_seasonal = df.rename(columns={
    'precipitation + irrigation (mm)': 'precip',
    'potential evapotranspiration (mm)': 'pet',
    'depth 10cm': 'moisture_10cm',
    'depth 30cm': 'moisture_30cm',
    'depth 60cm': 'moisture_60cm',
    'depth 90cm': 'moisture_90cm',
    'actual evapotranspiration (mm)': 'aet',
    'groundwater recharge (mm)': 'gw_recharge'
})

print(df_seasonal.head())


In [None]:
def get_season(row_date):
    """
    Assigns a season based on the month (meteorological seasons).
    - Winter: Dec (12), Jan (1), Feb (2)
    - Spring: Mar (3), Apr (4), May (5)
    - Summer: Jun (6), Jul (7), Aug (8)
    - Autumn: Sep (9), Oct (10), Nov (11)
    """
    month = row_date.month
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

# Create a new column for Season
df_seasonal['season'] = df_seasonal['date'].apply(get_season)

# Group by Season and get mean or other stats
seasonal_stats = df_seasonal.groupby('season').agg({
    'precip': 'mean',
    'pet': 'mean',
    'moisture_10cm': 'mean',
    'moisture_30cm': 'mean',
    'moisture_60cm': 'mean',
    'moisture_90cm': 'mean',
    'aet': 'mean',
    'gw_recharge': 'mean'
}).reset_index()

print("Seasonal Mean Values:")
print(seasonal_stats)

In [None]:
# visualize the seasonal stats
plt.figure(figsize=(12, 6))
for col in ['precip', 'pet', 'moisture_10cm', 'moisture_30cm', 'moisture_60cm', 'moisture_90cm', 'aet', 'gw_recharge']:
    sns.lineplot(x='season', y=col, data=seasonal_stats, marker='o', label=col)
plt.title('Seasonal Trends')
plt.ylabel('Mean Value')
plt.legend(title='Variables')
plt.show()

In [None]:
for col in ['precip', 'pet', 'moisture_10cm', 'moisture_30cm', 'moisture_60cm', 'moisture_90cm', 'aet', 'gw_recharge']:
    sns.boxplot(x='season', y=col, data=df_seasonal)
    plt.title(f'Seasonal Distribution of {col}')
    plt.ylabel(col)
    plt.show()