# Having a look at the data



In [20]:

# packages
import os
import glob
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display
from load_levels import load_levels
from load_ea_rainfall import load_earain


## River Levels


In [16]:
# Define the folder path
folder_path = '../data/river'

# Find all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

# Load and append all dataframes
df_list = [load_levels(file) for file in csv_files]
dfLevels = pd.concat(df_list, ignore_index=True)


# Convert dateTime column to datetime format
dfLevels['dateTime'] = pd.to_datetime(dfLevels['dateTime'], errors='coerce')

# Add a 'season' column based on month
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

dfLevels['season'] = dfLevels['dateTime'].dt.month.map(get_season)
dfLevels

Unnamed: 0,measure,dateTime,date,value,station,season
0,level,2025-08-16 00:00:00,2025-08-16,-0.082,Nutclough,Summer
1,level,2025-08-16 00:15:00,2025-08-16,-0.082,Nutclough,Summer
2,level,2025-08-16 00:30:00,2025-08-16,-0.082,Nutclough,Summer
3,level,2025-08-16 00:45:00,2025-08-16,-0.082,Nutclough,Summer
4,level,2025-08-16 01:00:00,2025-08-16,-0.082,Nutclough,Summer
...,...,...,...,...,...,...
13727,level,2025-09-13 14:30:00,2025-09-13,0.093,Mytholmroyd-Dauber-Bridge,Autumn
13728,level,2025-09-13 14:45:00,2025-09-13,0.097,Mytholmroyd-Dauber-Bridge,Autumn
13729,level,2025-09-13 15:00:00,2025-09-13,0.098,Mytholmroyd-Dauber-Bridge,Autumn
13730,level,2025-09-13 15:15:00,2025-09-13,0.098,Mytholmroyd-Dauber-Bridge,Autumn


In [17]:
# simple plot

# Create a line plot of water level over time for multiple stations
fig = px.line(dfLevels, x='dateTime', y='value', color='station',
              labels={'dateTime': 'Date and Time', 'value': 'Water Level (m)', 'station': 'Monitoring Station'},
              title='Water Level Over Time by Monitoring Station')

# Show the plot
fig.show()


## Rainfall

### EA Rainfall Gauges

In [21]:
# Define the folder path
folder_path = '../data/weather'

# Find all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, '*-rainfall-15min-Qualified.csv'))

# Load and append all dataframes
df_list = [load_earain(file) for file in csv_files]
dfEARain= pd.concat(df_list, ignore_index=True)


# Convert dateTime column to datetime format
dfEARain['dateTime'] = pd.to_datetime(dfEARain['dateTime'], errors='coerce')

# Add a 'season' column based on month
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

dfEARain['season'] = dfEARain['dateTime'].dt.month.map(get_season)
dfEARain

Unnamed: 0,measure,dateTime,date,value,station,season
0,rainfall,2025-08-16 00:00:00,2025-08-16,0.0,Walshaw-Dean-Lodge,Summer
1,rainfall,2025-08-16 00:15:00,2025-08-16,0.0,Walshaw-Dean-Lodge,Summer
2,rainfall,2025-08-16 00:30:00,2025-08-16,0.0,Walshaw-Dean-Lodge,Summer
3,rainfall,2025-08-16 00:45:00,2025-08-16,0.0,Walshaw-Dean-Lodge,Summer
4,rainfall,2025-08-16 01:00:00,2025-08-16,0.0,Walshaw-Dean-Lodge,Summer
...,...,...,...,...,...,...
10975,rainfall,2025-09-13 15:00:00,2025-09-13,0.2,Bacup,Autumn
10976,rainfall,2025-09-13 15:15:00,2025-09-13,0.0,Bacup,Autumn
10977,rainfall,2025-09-13 15:30:00,2025-09-13,0.0,Bacup,Autumn
10978,rainfall,2025-09-13 15:45:00,2025-09-13,0.0,Bacup,Autumn


In [32]:
# simple plot

# Create a line plot of rainfall over time for multiple stations
fig = px.line(dfEARain, x='dateTime', y='value', facet_row='station',
              labels={'dateTime': 'Date and Time', 'value': 'Rainfall (mm)', 'station': 'Monitoring Station'},
              title='Rainfall Over Time by Monitoring Station')

# Show the plot
fig.show()