In [1]:
import numpy as np
import pandas as pd

# Cleaning Data

In [2]:
original_df = pd.read_excel('Original-River-Data.xlsx', usecols='A:I', skiprows=1)

### Data Exploration

In [3]:
original_df.head(20)

Unnamed: 0.1,Unnamed: 0,Crakehill,Skip Bridge,Westwick,Skelton,Arkengarthdale,East Cowton,Malham Tarn,Snaizeholme
0,1993-01-01,10.4,4.393,9.291,26.1,0.0,0.0,0.0,4.0
1,1993-01-02,9.95,4.239,8.622,24.86,0.0,0.0,0.8,0.0
2,1993-01-03,9.46,4.124,8.057,23.6,0.0,0.0,0.8,0.0
3,1993-01-04,9.41,4.363,7.925,23.47,2.4,24.8,0.8,61.6
4,1993-01-05,26.3,11.962,58.704,60.7,11.2,5.6,33.6,111.2
5,1993-01-06,32.1,10.237,34.416,98.01,0.0,0.0,1.6,0.8
6,1993-01-07,19.3,7.254,22.263,56.99,5.6,4.0,17.6,36.0
7,1993-01-08,22.0,7.266,29.587,56.66,1.6,0.0,1.6,2.4
8,1993-01-09,35.5,8.153,60.253,78.1,14.4,0.8,55.2,104.8
9,1993-01-10,51.0,13.276,93.951,125.7,20.8,2.4,76.0,136.8


In [4]:
original_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Unnamed: 0      1461 non-null   datetime64[ns]
 1   Crakehill       1461 non-null   float64       
 2   Skip Bridge     1461 non-null   object        
 3   Westwick        1461 non-null   float64       
 4   Skelton         1461 non-null   object        
 5   Arkengarthdale  1461 non-null   float64       
 6   East Cowton     1461 non-null   object        
 7   Malham Tarn     1461 non-null   float64       
 8   Snaizeholme     1461 non-null   float64       
dtypes: datetime64[ns](1), float64(5), object(3)
memory usage: 102.9+ KB


In [5]:
river_data = original_df.copy()
river_data.head(10)

Unnamed: 0.1,Unnamed: 0,Crakehill,Skip Bridge,Westwick,Skelton,Arkengarthdale,East Cowton,Malham Tarn,Snaizeholme
0,1993-01-01,10.4,4.393,9.291,26.1,0.0,0.0,0.0,4.0
1,1993-01-02,9.95,4.239,8.622,24.86,0.0,0.0,0.8,0.0
2,1993-01-03,9.46,4.124,8.057,23.6,0.0,0.0,0.8,0.0
3,1993-01-04,9.41,4.363,7.925,23.47,2.4,24.8,0.8,61.6
4,1993-01-05,26.3,11.962,58.704,60.7,11.2,5.6,33.6,111.2
5,1993-01-06,32.1,10.237,34.416,98.01,0.0,0.0,1.6,0.8
6,1993-01-07,19.3,7.254,22.263,56.99,5.6,4.0,17.6,36.0
7,1993-01-08,22.0,7.266,29.587,56.66,1.6,0.0,1.6,2.4
8,1993-01-09,35.5,8.153,60.253,78.1,14.4,0.8,55.2,104.8
9,1993-01-10,51.0,13.276,93.951,125.7,20.8,2.4,76.0,136.8


In [6]:
# Renaming Headers
new_columns = {'Unnamed: 0': 'Date'}
new_columns.update({col: f"{col} MDF (Cumecs)" for col in river_data.columns[1:5]})
new_columns.update({col: f"{col} DRT (mm)" for col in river_data.columns[5:]})

river_data.rename(
    columns=new_columns, 
    inplace=True
)
river_data.head()

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
0,1993-01-01,10.4,4.393,9.291,26.1,0.0,0.0,0.0,4.0
1,1993-01-02,9.95,4.239,8.622,24.86,0.0,0.0,0.8,0.0
2,1993-01-03,9.46,4.124,8.057,23.6,0.0,0.0,0.8,0.0
3,1993-01-04,9.41,4.363,7.925,23.47,2.4,24.8,0.8,61.6
4,1993-01-05,26.3,11.962,58.704,60.7,11.2,5.6,33.6,111.2


##### MDF - Mean Daily Flow
##### DRT - Daily Rainfall Total

In [7]:
river_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Date                      1461 non-null   datetime64[ns]
 1   Crakehill MDF (Cumecs)    1461 non-null   float64       
 2   Skip Bridge MDF (Cumecs)  1461 non-null   object        
 3   Westwick MDF (Cumecs)     1461 non-null   float64       
 4   Skelton MDF (Cumecs)      1461 non-null   object        
 5   Arkengarthdale DRT (mm)   1461 non-null   float64       
 6   East Cowton DRT (mm)      1461 non-null   object        
 7   Malham Tarn DRT (mm)      1461 non-null   float64       
 8   Snaizeholme DRT (mm)      1461 non-null   float64       
dtypes: datetime64[ns](1), float64(5), object(3)
memory usage: 102.9+ KB


In [8]:
# Converting to non-numeric columns to numeric columns
river_data['Skip Bridge MDF (Cumecs)'] = pd.to_numeric(river_data['Skip Bridge MDF (Cumecs)'], errors='coerce')
river_data['Skelton MDF (Cumecs)'] = pd.to_numeric(river_data['Skelton MDF (Cumecs)'], errors='coerce')
river_data['East Cowton DRT (mm)'] = pd.to_numeric(river_data['East Cowton DRT (mm)'], errors='coerce')
river_data['Date'] = pd.to_datetime(river_data['Date'], errors='coerce')
river_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Date                      1461 non-null   datetime64[ns]
 1   Crakehill MDF (Cumecs)    1461 non-null   float64       
 2   Skip Bridge MDF (Cumecs)  1460 non-null   float64       
 3   Westwick MDF (Cumecs)     1461 non-null   float64       
 4   Skelton MDF (Cumecs)      1460 non-null   float64       
 5   Arkengarthdale DRT (mm)   1461 non-null   float64       
 6   East Cowton DRT (mm)      1460 non-null   float64       
 7   Malham Tarn DRT (mm)      1461 non-null   float64       
 8   Snaizeholme DRT (mm)      1461 non-null   float64       
dtypes: datetime64[ns](1), float64(8)
memory usage: 102.9 KB


In [9]:
river_data.isna().sum()

Date                        0
Crakehill MDF (Cumecs)      0
Skip Bridge MDF (Cumecs)    1
Westwick MDF (Cumecs)       0
Skelton MDF (Cumecs)        1
Arkengarthdale DRT (mm)     0
East Cowton DRT (mm)        1
Malham Tarn DRT (mm)        0
Snaizeholme DRT (mm)        0
dtype: int64

In [10]:
river_data.describe()

Unnamed: 0,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
count,1461.0,1460.0,1461.0,1460.0,1461.0,1460.0,1461.0,1461.0
mean,17.519213,6.929692,21.59704,47.093886,8.519233,11.466301,68.89692,9.725394
std,52.587125,28.182097,28.35579,55.712853,133.950452,235.722765,2092.760651,20.984849
min,-999.0,-999.0,1.954,3.694,-999.0,0.0,0.0,0.0
25%,5.22,2.166,5.673,12.4425,0.0,0.0,0.0,0.0
50%,10.1,3.4945,10.391,24.26,0.8,0.0,1.6,0.8
75%,21.9,8.89225,26.736,59.3575,6.4,5.6,18.4,10.4
max,220.0,80.244,374.061,448.1,5000.0,9000.0,80000.0,268.8


### Removing Outliers and Null Values

In [11]:
# Dropping nulll values
flow_cols = list(river_data.columns[1:5])
rain_cols = list(river_data.columns[5:])

null_values = river_data.isna().any(axis=1)
river_data[null_values] # Rows with at least 1 null value

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
96,1993-04-07,22.9,4.159,26.603,,2.4,0.0,0.0,3.2
789,1995-03-01,80.6,19.096,100.761,174.0,17.6,,51.2,21.6
1134,1996-02-09,12.7,,7.576,20.58,0.0,37.6,12.0,14.4


In [12]:
river_data.dropna(how="any", inplace=True)
river_data[null_values]

  


Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)


In [13]:
# Replacing negative values
river_data[(river_data[flow_cols + rain_cols] < 0).any(1)] # replace all negative values with zero

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
43,1993-02-13,-999.0,5.476,11.051,30.73,0.0,0.0,0.0,0.0
73,1993-03-15,-999.0,2.546,7.179,18.06,11.2,0.0,86.0,19.2
74,1993-03-16,-999.0,2.494,7.232,17.16,0.8,0.0,12.8,8.0
116,1993-04-27,24.4,11.684,22.181,73.96,-999.0,0.0,0.0,0.0
1203,1996-04-18,7.61,-999.0,13.918,16.84,0.8,4.8,47.2,32.0


In [14]:
river_data[flow_cols+rain_cols] = river_data[flow_cols+rain_cols].where((river_data[flow_cols+rain_cols] > -1), 0)
river_data[(river_data[flow_cols + rain_cols] < 0).any(1)]

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)


In [15]:
# Dropping rows with rainfall outliers
rainfall_outliers = river_data[(river_data[rain_cols] > 400).any(1)]
rainfall_outliers

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
771,1995-02-11,65.0,31.496,108.575,136.7,5000.0,15.2,108.4,80.8
788,1995-02-28,44.6,11.563,52.105,93.91,19.2,9000.0,46.4,47.2
1104,1996-01-10,32.4,10.548,30.086,84.33,0.8,0.0,80000.0,0.8


In [16]:
river_data.drop(rainfall_outliers.index, inplace=True)
river_data[(river_data[rain_cols] > 400).any(1)] # drop rows that have rainfall outliers

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)


In [17]:
# Dropping rows with river flow outliers
river_flow_outliers = river_data[(river_data[flow_cols] == 0).any(1)]
river_flow_outliers

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
43,1993-02-13,0.0,5.476,11.051,30.73,0.0,0.0,0.0,0.0
73,1993-03-15,0.0,2.546,7.179,18.06,11.2,0.0,86.0,19.2
74,1993-03-16,0.0,2.494,7.232,17.16,0.8,0.0,12.8,8.0
1203,1996-04-18,7.61,0.0,13.918,16.84,0.8,4.8,47.2,32.0


In [18]:
river_data.drop(river_flow_outliers.index, inplace=True)
river_data[(river_data[flow_cols] == 0).any(1)]

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)


In [19]:
river_data.describe()

Unnamed: 0,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
count,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0
mean,19.522233,7.600946,21.494272,46.977784,5.784149,5.295107,13.986492,9.635837
std,25.249077,10.013759,28.263025,55.691669,13.262708,12.296973,25.04585,20.934958
min,2.06,1.002,1.954,3.694,0.0,0.0,0.0,0.0
25%,5.24,2.1585,5.6325,12.385,0.0,0.0,0.0,0.0
50%,10.1,3.492,10.379,24.25,0.8,0.0,1.6,0.8
75%,21.75,8.833,26.652,59.15,6.4,5.6,18.4,10.4
max,220.0,80.244,374.061,448.1,225.2,165.6,252.0,268.8


### Exporting Cleaned Data Set

In [113]:
# export_data = river_data.copy()
# export_data["Date"] = export_data["Date"].astype("string")
# export_data.to_excel('River-Data-Cleaned.xlsx')

# Standardisation, Lagging and Splitting

In [20]:
clean_df = pd.read_excel('River-Data-Cleaned.xlsx')
clean_df.drop(["Unnamed: 0"], axis=1, inplace=True)
clean_df.head(20)

Unnamed: 0,Date,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
0,1993-01-01,10.4,4.393,9.291,26.1,0.0,0.0,0.0,4.0
1,1993-01-02,9.95,4.239,8.622,24.86,0.0,0.0,0.8,0.0
2,1993-01-03,9.46,4.124,8.057,23.6,0.0,0.0,0.8,0.0
3,1993-01-04,9.41,4.363,7.925,23.47,2.4,24.8,0.8,61.6
4,1993-01-05,26.3,11.962,58.704,60.7,11.2,5.6,33.6,111.2
5,1993-01-06,32.1,10.237,34.416,98.01,0.0,0.0,1.6,0.8
6,1993-01-07,19.3,7.254,22.263,56.99,5.6,4.0,17.6,36.0
7,1993-01-08,22.0,7.266,29.587,56.66,1.6,0.0,1.6,2.4
8,1993-01-09,35.5,8.153,60.253,78.1,14.4,0.8,55.2,104.8
9,1993-01-10,51.0,13.276,93.951,125.7,20.8,2.4,76.0,136.8


In [21]:
clean_df.describe()

Unnamed: 0,Crakehill MDF (Cumecs),Skip Bridge MDF (Cumecs),Westwick MDF (Cumecs),Skelton MDF (Cumecs),Arkengarthdale DRT (mm),East Cowton DRT (mm),Malham Tarn DRT (mm),Snaizeholme DRT (mm)
count,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0
mean,19.522233,7.600946,21.494272,46.977784,5.784149,5.295107,13.986492,9.635837
std,25.249077,10.013759,28.263025,55.691669,13.262708,12.296973,25.04585,20.934958
min,2.06,1.002,1.954,3.694,0.0,0.0,0.0,0.0
25%,5.24,2.1585,5.6325,12.385,0.0,0.0,0.0,0.0
50%,10.1,3.492,10.379,24.25,0.8,0.0,1.6,0.8
75%,21.75,8.833,26.652,59.15,6.4,5.6,18.4,10.4
max,220.0,80.244,374.061,448.1,225.2,165.6,252.0,268.8


In [22]:
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1451 entries, 0 to 1450
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Date                      1451 non-null   object 
 1   Crakehill MDF (Cumecs)    1451 non-null   float64
 2   Skip Bridge MDF (Cumecs)  1451 non-null   float64
 3   Westwick MDF (Cumecs)     1451 non-null   float64
 4   Skelton MDF (Cumecs)      1451 non-null   float64
 5   Arkengarthdale DRT (mm)   1451 non-null   float64
 6   East Cowton DRT (mm)      1451 non-null   float64
 7   Malham Tarn DRT (mm)      1451 non-null   float64
 8   Snaizeholme DRT (mm)      1451 non-null   float64
dtypes: float64(8), object(1)
memory usage: 102.1+ KB


In [23]:
# Function for standardising and unstandardising columns
def standardise_columns(df, cols):
    subset_df = df[cols]
    subset_df = 0.8 * ((subset_df - subset_df.min()) / (subset_df.max() - subset_df.min())) + 0.1
    return subset_df

def unstandardise_columns(df, cols, max_val, min_val):
    subset_df = df[cols]
    subset_df = ((subset_df - subset_df.min()) / 0.8) * (max_val - min_val) + min_val
    return subset_df

### Lagging Data

In [76]:
lagged_df = pd.DataFrame()
lagged_df["Date"] = clean_df["Date"]
lagged_df[flow_cols[-1]] = clean_df[flow_cols[-1]]
lagged_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs)
0,1993-01-01,26.1
1,1993-01-02,24.86
2,1993-01-03,23.6
3,1993-01-04,23.47
4,1993-01-05,60.7
5,1993-01-06,98.01
6,1993-01-07,56.99
7,1993-01-08,56.66
8,1993-01-09,78.1
9,1993-01-10,125.7


In [77]:
# Lagging rainfall and flow columns by 1 to 3 days
for i in range(3):
    for col in flow_cols:
        col_name = col.replace("(Cumecs)", f"(t-{i+1})")
        lagged_df[col_name] = clean_df[col].shift(i+1)

for i in range(3):
    for col in rain_cols:
        col_name = col.replace("(mm)", f"(t-{i+1})")
        lagged_df[col_name] = clean_df[col].shift(i+1)

lagged_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)
0,1993-01-01,26.1,,,,,,,,,...,,,,,,,,,,
1,1993-01-02,24.86,10.4,4.393,9.291,26.1,,,,,...,0.0,4.0,,,,,,,,
2,1993-01-03,23.6,9.95,4.239,8.622,24.86,10.4,4.393,9.291,26.1,...,0.8,0.0,0.0,0.0,0.0,4.0,,,,
3,1993-01-04,23.47,9.46,4.124,8.057,23.6,9.95,4.239,8.622,24.86,...,0.8,0.0,0.0,0.0,0.8,0.0,0.0,0.0,0.0,4.0
4,1993-01-05,60.7,9.41,4.363,7.925,23.47,9.46,4.124,8.057,23.6,...,0.8,61.6,0.0,0.0,0.8,0.0,0.0,0.0,0.8,0.0
5,1993-01-06,98.01,26.3,11.962,58.704,60.7,9.41,4.363,7.925,23.47,...,33.6,111.2,2.4,24.8,0.8,61.6,0.0,0.0,0.8,0.0
6,1993-01-07,56.99,32.1,10.237,34.416,98.01,26.3,11.962,58.704,60.7,...,1.6,0.8,11.2,5.6,33.6,111.2,2.4,24.8,0.8,61.6
7,1993-01-08,56.66,19.3,7.254,22.263,56.99,32.1,10.237,34.416,98.01,...,17.6,36.0,0.0,0.0,1.6,0.8,11.2,5.6,33.6,111.2
8,1993-01-09,78.1,22.0,7.266,29.587,56.66,19.3,7.254,22.263,56.99,...,1.6,2.4,5.6,4.0,17.6,36.0,0.0,0.0,1.6,0.8
9,1993-01-10,125.7,35.5,8.153,60.253,78.1,22.0,7.266,29.587,56.66,...,55.2,104.8,1.6,0.0,1.6,2.4,5.6,4.0,17.6,36.0


In [80]:
# Correlations between lagged columns
lagged_df.corr()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),Crakehill MDF (t-3),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)
Skelton MDF (Cumecs),1.0,0.881752,0.881944,0.908287,0.886878,0.723829,0.73283,0.731264,0.749477,0.626238,...,0.489,0.577851,0.407372,0.25053,0.404737,0.480735,0.317296,0.187745,0.333385,0.38591
Crakehill MDF (t-1),0.881752,1.0,0.930827,0.899135,0.97289,0.817739,0.805517,0.856354,0.79899,0.636757,...,0.306172,0.364734,0.533147,0.388672,0.484966,0.586026,0.366427,0.231089,0.356624,0.426056
Skip Bridge MDF (t-1),0.881944,0.930827,1.0,0.865032,0.948227,0.805629,0.861204,0.825034,0.814771,0.661861,...,0.324782,0.363945,0.485849,0.328659,0.434301,0.502084,0.397495,0.230546,0.360419,0.410911
Westwick MDF (t-1),0.908287,0.899135,0.865032,1.0,0.886743,0.665608,0.676689,0.740998,0.689073,0.559045,...,0.480368,0.555791,0.473601,0.26372,0.501717,0.602304,0.300092,0.158808,0.332633,0.367421
Skelton MDF (t-1),0.886878,0.97289,0.948227,0.886743,1.0,0.881751,0.881943,0.90829,0.886878,0.723827,...,0.292292,0.343805,0.505105,0.340767,0.48899,0.577844,0.407362,0.250528,0.404731,0.480728
Crakehill MDF (t-2),0.723829,0.817739,0.805629,0.665608,0.881751,1.0,0.930826,0.899136,0.972892,0.817739,...,0.164204,0.200821,0.330429,0.238076,0.306152,0.364719,0.533138,0.388671,0.484965,0.586023
Skip Bridge MDF (t-2),0.73283,0.805517,0.861204,0.676689,0.881943,0.930826,1.0,0.865033,0.948228,0.805629,...,0.159569,0.185451,0.370347,0.256995,0.324764,0.36393,0.48584,0.328657,0.434296,0.502076
Westwick MDF (t-2),0.731264,0.856354,0.825034,0.740998,0.90829,0.899136,0.865033,1.0,0.886739,0.665591,...,0.176608,0.194085,0.472362,0.293623,0.480338,0.555769,0.473583,0.263719,0.501684,0.60228
Skelton MDF (t-2),0.749477,0.79899,0.814771,0.689073,0.886878,0.972892,0.948228,0.886739,1.0,0.881747,...,0.183922,0.225514,0.296278,0.183452,0.292256,0.343778,0.50509,0.340767,0.488964,0.577824
Crakehill MDF (t-3),0.626238,0.636757,0.661861,0.559045,0.723827,0.817739,0.805629,0.665591,0.881747,1.0,...,0.137964,0.19311,0.147801,0.060246,0.164147,0.200777,0.330403,0.238075,0.306081,0.364666


In [81]:
# Dropping rows with null values
lagged_df[lagged_df.isna().any(axis=1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)
0,1993-01-01,26.1,,,,,,,,,...,,,,,,,,,,
1,1993-01-02,24.86,10.4,4.393,9.291,26.1,,,,,...,0.0,4.0,,,,,,,,
2,1993-01-03,23.6,9.95,4.239,8.622,24.86,10.4,4.393,9.291,26.1,...,0.8,0.0,0.0,0.0,0.0,4.0,,,,


In [82]:
lagged_df.dropna(how="any", inplace=True)
lagged_df[lagged_df.isna().any(axis=1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)


In [83]:
lagged_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),Crakehill MDF (t-3),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)
count,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,...,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0
mean,47.023622,19.537023,7.606707,21.517077,47.012696,19.532983,7.605369,21.513186,47.006191,19.531809,...,14.010497,9.652486,5.783978,5.295028,14.007182,9.649724,5.781768,5.291713,14.007182,9.651934
std,55.74023,25.272741,10.02328,28.287685,55.743271,25.273807,10.023601,28.289072,55.745308,25.274192,...,25.065902,20.9533,13.274272,12.308373,25.067332,20.954309,13.274969,12.309152,25.067332,20.953544
min,3.694,2.06,1.002,1.954,3.694,2.06,1.002,1.954,3.694,2.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,12.3775,5.215,2.1545,5.60925,12.3775,5.215,2.1545,5.60925,12.3775,5.215,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,24.245,10.05,3.4805,10.385,24.23,10.0,3.4805,10.3545,24.23,10.0,...,1.6,0.8,0.8,0.0,1.6,0.8,0.8,0.0,1.6,0.8
75%,59.27,21.8,8.85525,26.724,59.27,21.8,8.85525,26.724,59.27,21.8,...,18.4,10.4,6.4,5.3,18.4,10.4,6.4,5.3,18.4,10.4
max,448.1,220.0,80.244,374.061,448.1,220.0,80.244,374.061,448.1,220.0,...,252.0,268.8,225.2,165.6,252.0,268.8,225.2,165.6,252.0,268.8


### Moving Averages

In [136]:
moving_avg_df = pd.DataFrame()
moving_avg_df["Date"] = clean_df["Date"]
moving_avg_df[flow_cols[-1]] = clean_df[flow_cols[-1]]
moving_avg_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs)
0,1993-01-01,26.1
1,1993-01-02,24.86
2,1993-01-03,23.6
3,1993-01-04,23.47
4,1993-01-05,60.7
5,1993-01-06,98.01
6,1993-01-07,56.99
7,1993-01-08,56.66
8,1993-01-09,78.1
9,1993-01-10,125.7


In [137]:
for i in range(3, 8):
    for col in flow_cols:
        col_name = col.replace("(Cumecs)", f"(MA{i})")
        moving_avg_df[col_name] = clean_df[col].rolling(i).mean()

for i in range(3, 8):
    for col in rain_cols:
        col_name = col.replace("(mm)", f"(MA{i})")
        moving_avg_df[col_name] = clean_df[col].rolling(i).mean()

moving_avg_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (MA3),Skip Bridge MDF (MA3),Westwick MDF (MA3),Skelton MDF (MA3),Crakehill MDF (MA4),Skip Bridge MDF (MA4),Westwick MDF (MA4),Skelton MDF (MA4),...,Malham Tarn DRT (MA5),Snaizeholme DRT (MA5),Arkengarthdale DRT (MA6),East Cowton DRT (MA6),Malham Tarn DRT (MA6),Snaizeholme DRT (MA6),Arkengarthdale DRT (MA7),East Cowton DRT (MA7),Malham Tarn DRT (MA7),Snaizeholme DRT (MA7)
0,1993-01-01,26.1,,,,,,,,,...,,,,,,,,,,
1,1993-01-02,24.86,,,,,,,,,...,,,,,,,,,,
2,1993-01-03,23.6,9.936667,4.252,8.656667,24.853333,,,,,...,,,,,,,,,,
3,1993-01-04,23.47,9.606667,4.242,8.201333,23.976667,9.805,4.27975,8.47375,24.5075,...,,,,,,,,,,
4,1993-01-05,60.7,15.056667,6.816333,24.895333,35.923333,13.78,6.172,20.827,33.1575,...,7.2,35.36,,,,,,,,
5,1993-01-06,98.01,22.603333,8.854,33.681667,60.726667,19.3175,7.6715,27.2755,51.445,...,7.52,34.72,2.266667,5.066667,6.266667,29.6,,,,
6,1993-01-07,56.99,25.9,9.817667,38.461,71.9,21.7775,8.454,30.827,59.7925,...,10.88,41.92,3.2,5.733333,9.2,34.933333,2.742857,4.914286,7.885714,30.514286
7,1993-01-08,56.66,24.466667,8.252333,28.755333,70.553333,24.925,9.17975,36.2425,68.09,...,11.04,42.4,3.466667,5.733333,9.333333,35.333333,2.971429,4.914286,8.114286,30.285714
8,1993-01-09,78.1,25.6,7.557667,37.367667,63.916667,27.225,8.2275,36.62975,72.44,...,21.92,51.04,5.866667,5.866667,18.4,52.8,5.028571,5.028571,15.885714,45.257143
9,1993-01-10,125.7,36.166667,9.565,61.263667,86.82,31.95,8.98725,51.5135,79.3625,...,30.4,56.16,8.933333,2.133333,30.933333,65.333333,8.0,5.371429,26.628571,64.8


In [138]:
moving_avg_df.corr()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (MA3),Skip Bridge MDF (MA3),Westwick MDF (MA3),Skelton MDF (MA3),Crakehill MDF (MA4),Skip Bridge MDF (MA4),Westwick MDF (MA4),Skelton MDF (MA4),Crakehill MDF (MA5),...,Malham Tarn DRT (MA5),Snaizeholme DRT (MA5),Arkengarthdale DRT (MA6),East Cowton DRT (MA6),Malham Tarn DRT (MA6),Snaizeholme DRT (MA6),Arkengarthdale DRT (MA7),East Cowton DRT (MA7),Malham Tarn DRT (MA7),Snaizeholme DRT (MA7)
Skelton MDF (Cumecs),1.0,0.938738,0.914594,0.949677,0.929391,0.904795,0.881894,0.922418,0.893389,0.874747,...,0.593746,0.690495,0.603329,0.413266,0.591107,0.693164,0.599033,0.412839,0.588475,0.695555
Crakehill MDF (MA3),0.938738,1.0,0.950469,0.94916,0.982764,0.981097,0.933412,0.954153,0.954356,0.948594,...,0.565631,0.668877,0.606963,0.443224,0.569525,0.675134,0.600626,0.442991,0.566959,0.678565
Skip Bridge MDF (MA3),0.914594,0.950469,1.0,0.917204,0.964027,0.942131,0.984918,0.929325,0.944836,0.919828,...,0.552858,0.623684,0.628179,0.41385,0.565639,0.640302,0.626908,0.412714,0.571864,0.652073
Westwick MDF (MA3),0.949677,0.94916,0.917204,1.0,0.952057,0.919614,0.890707,0.976818,0.91727,0.889173,...,0.650361,0.741363,0.621496,0.380418,0.646721,0.741301,0.614665,0.385091,0.639734,0.739875
Skelton MDF (MA3),0.929391,0.982764,0.964027,0.952057,1.0,0.984274,0.962192,0.974207,0.987378,0.966673,...,0.585141,0.683508,0.615161,0.406951,0.599004,0.700393,0.616254,0.411281,0.604288,0.710965
Crakehill MDF (MA4),0.904795,0.981097,0.942131,0.919614,0.984274,1.0,0.954259,0.956906,0.985032,0.986174,...,0.548924,0.65599,0.608214,0.43306,0.567151,0.677195,0.60958,0.439739,0.572173,0.686348
Skip Bridge MDF (MA4),0.881894,0.933412,0.984918,0.890707,0.962192,0.954259,1.0,0.928832,0.967195,0.948292,...,0.533751,0.606885,0.621442,0.402606,0.558722,0.635438,0.629137,0.408651,0.572193,0.653145
Westwick MDF (MA4),0.922418,0.954153,0.929325,0.976818,0.974207,0.956906,0.928832,1.0,0.963056,0.935725,...,0.646111,0.741925,0.634039,0.377911,0.65369,0.750984,0.630628,0.385298,0.65295,0.753843
Skelton MDF (MA4),0.893389,0.954356,0.944836,0.91727,0.987378,0.985032,0.967195,0.963056,1.0,0.986461,...,0.559852,0.660197,0.606439,0.390913,0.588233,0.692538,0.616661,0.402705,0.601889,0.710214
Crakehill MDF (MA5),0.874747,0.948594,0.919828,0.889173,0.966673,0.986174,0.948292,0.935725,0.986461,1.0,...,0.512522,0.620434,0.593858,0.411721,0.552462,0.666047,0.608434,0.428696,0.568862,0.686776


In [139]:
# Dropping rows with null values
moving_avg_df[moving_avg_df.isna().any(axis=1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (MA3),Skip Bridge MDF (MA3),Westwick MDF (MA3),Skelton MDF (MA3),Crakehill MDF (MA4),Skip Bridge MDF (MA4),Westwick MDF (MA4),Skelton MDF (MA4),...,Malham Tarn DRT (MA5),Snaizeholme DRT (MA5),Arkengarthdale DRT (MA6),East Cowton DRT (MA6),Malham Tarn DRT (MA6),Snaizeholme DRT (MA6),Arkengarthdale DRT (MA7),East Cowton DRT (MA7),Malham Tarn DRT (MA7),Snaizeholme DRT (MA7)
0,1993-01-01,26.1,,,,,,,,,...,,,,,,,,,,
1,1993-01-02,24.86,,,,,,,,,...,,,,,,,,,,
2,1993-01-03,23.6,9.936667,4.252,8.656667,24.853333,,,,,...,,,,,,,,,,
3,1993-01-04,23.47,9.606667,4.242,8.201333,23.976667,9.805,4.27975,8.47375,24.5075,...,,,,,,,,,,
4,1993-01-05,60.7,15.056667,6.816333,24.895333,35.923333,13.78,6.172,20.827,33.1575,...,7.2,35.36,,,,,,,,
5,1993-01-06,98.01,22.603333,8.854,33.681667,60.726667,19.3175,7.6715,27.2755,51.445,...,7.52,34.72,2.266667,5.066667,6.266667,29.6,,,,


In [140]:
moving_avg_df.dropna(how="any", inplace=True)
moving_avg_df[moving_avg_df.isna().any(axis=1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (MA3),Skip Bridge MDF (MA3),Westwick MDF (MA3),Skelton MDF (MA3),Crakehill MDF (MA4),Skip Bridge MDF (MA4),Westwick MDF (MA4),Skelton MDF (MA4),...,Malham Tarn DRT (MA5),Snaizeholme DRT (MA5),Arkengarthdale DRT (MA6),East Cowton DRT (MA6),Malham Tarn DRT (MA6),Snaizeholme DRT (MA6),Arkengarthdale DRT (MA7),East Cowton DRT (MA7),Malham Tarn DRT (MA7),Snaizeholme DRT (MA7)


In [141]:
moving_avg_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (MA3),Skip Bridge MDF (MA3),Westwick MDF (MA3),Skelton MDF (MA3),Crakehill MDF (MA4),Skip Bridge MDF (MA4),Westwick MDF (MA4),Skelton MDF (MA4),Crakehill MDF (MA5),...,Malham Tarn DRT (MA5),Snaizeholme DRT (MA5),Arkengarthdale DRT (MA6),East Cowton DRT (MA6),Malham Tarn DRT (MA6),Snaizeholme DRT (MA6),Arkengarthdale DRT (MA7),East Cowton DRT (MA7),Malham Tarn DRT (MA7),Snaizeholme DRT (MA7)
count,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,...,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0
mean,46.995172,19.545218,7.608666,21.515503,47.028298,19.546846,7.60942,21.521166,47.033714,19.547596,...,14.027183,9.613619,5.792203,5.292826,14.02713,9.621961,5.791162,5.291626,14.02709,9.628314
std,55.777355,23.160818,9.371964,25.103715,52.756141,22.399448,9.116826,24.211841,51.530965,21.768351,...,15.169586,12.909387,7.301313,6.078797,14.46201,12.375209,6.983505,5.732045,13.87434,11.911054
min,3.694,2.083333,1.012667,1.990333,3.733,2.095,1.01475,2.00375,3.83425,2.11,...,0.0,0.0,0.0,0.0,8.65974e-15,0.0,5.075305e-16,0.0,5.77316e-15,0.0
25%,12.37,5.383333,2.176333,5.825667,12.82,5.585,2.1795,5.85,13.05,5.696,...,1.76,0.8,0.666667,0.933333,2.133333,1.2,0.8,1.257143,2.171429,1.371429
50%,24.24,10.276667,3.568667,10.934333,24.773333,10.435,3.59325,11.279,25.365,10.806,...,9.76,4.96,3.466667,3.466667,10.33333,5.333333,3.771429,3.657143,10.51429,5.828571
75%,59.05,23.966667,9.565,29.075333,62.006667,25.05,9.58225,29.579,63.0125,25.38,...,21.12,13.28,8.4,7.266667,21.4,13.333333,8.342857,7.142857,21.02857,13.371429
max,448.1,176.666667,67.550333,251.526667,361.066667,158.25,65.92175,216.44025,337.3,157.96,...,90.24,105.04,73.4,40.4,97.33333,87.8,69.65714,35.2,104.0571,75.257143


In [142]:
# Creating lagged moving averages
lagged_ma_df = pd.DataFrame()
lagged_ma_df["Date"] = moving_avg_df["Date"]
lagged_ma_df[flow_cols[-1]] = moving_avg_df[flow_cols[-1]]
lagged_ma_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs)
6,1993-01-07,56.99
7,1993-01-08,56.66
8,1993-01-09,78.1
9,1993-01-10,125.7
10,1993-01-11,195.9
11,1993-01-12,125.4
12,1993-01-13,161.5
13,1993-01-14,204.0
14,1993-01-15,200.6
15,1993-01-16,234.4


In [143]:
# Lagging moving averages by 1 days
## lagging them by more than 1 day results in much weaker correlations
mdf_cols = list(moving_avg_df.columns[2:22])
drt_cols = list(moving_avg_df.columns[22:])

for col in mdf_cols:
    col_name = col + f" (t-1)"
    lagged_ma_df[col_name] = moving_avg_df[col].shift(1)

for col in drt_cols:
    col_name = col + f" (t-1)"
    lagged_ma_df[col_name] = moving_avg_df[col].shift(1)

lagged_ma_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (MA3) (t-1),Skip Bridge MDF (MA3) (t-1),Westwick MDF (MA3) (t-1),Skelton MDF (MA3) (t-1),Crakehill MDF (MA4) (t-1),Skip Bridge MDF (MA4) (t-1),Westwick MDF (MA4) (t-1),Skelton MDF (MA4) (t-1),...,Malham Tarn DRT (MA5) (t-1),Snaizeholme DRT (MA5) (t-1),Arkengarthdale DRT (MA6) (t-1),East Cowton DRT (MA6) (t-1),Malham Tarn DRT (MA6) (t-1),Snaizeholme DRT (MA6) (t-1),Arkengarthdale DRT (MA7) (t-1),East Cowton DRT (MA7) (t-1),Malham Tarn DRT (MA7) (t-1),Snaizeholme DRT (MA7) (t-1)
6,1993-01-07,56.99,,,,,,,,,...,,,,,,,,,,
7,1993-01-08,56.66,25.9,9.817667,38.461,71.9,21.7775,8.454,30.827,59.7925,...,10.88,41.92,3.2,5.733333,9.2,34.933333,2.742857,4.914286,7.885714,30.514286
8,1993-01-09,78.1,24.466667,8.252333,28.755333,70.553333,24.925,9.17975,36.2425,68.09,...,11.04,42.4,3.466667,5.733333,9.333333,35.333333,2.971429,4.914286,8.114286,30.285714
9,1993-01-10,125.7,25.6,7.557667,37.367667,63.916667,27.225,8.2275,36.62975,72.44,...,21.92,51.04,5.866667,5.866667,18.4,52.8,5.028571,5.028571,15.885714,45.257143
10,1993-01-11,195.9,36.166667,9.565,61.263667,86.82,31.95,8.98725,51.5135,79.3625,...,30.4,56.16,8.933333,2.133333,30.933333,65.333333,8.0,5.371429,26.628571,64.8
11,1993-01-12,125.4,50.666667,15.663333,74.569,133.233333,43.5,13.564,63.3235,114.09,...,32.48,61.6,8.8,3.866667,27.333333,51.466667,9.142857,4.114286,28.228571,60.0
12,1993-01-13,161.5,49.5,19.850667,67.989333,149.0,46.0,16.92625,66.05525,131.275,...,29.12,59.2,10.0,4.533333,27.2,55.333333,8.571429,3.885714,23.542857,47.542857
13,1993-01-14,204.0,53.933333,27.605333,72.256667,160.933333,53.2,24.023,77.68025,152.125,...,45.36,82.32,15.6,5.333333,38.066667,69.0,14.171429,5.142857,35.142857,64.285714
14,1993-01-15,200.6,55.133333,28.570667,69.197333,163.633333,57.725,27.81825,69.27375,171.7,...,35.92,63.92,16.266667,5.6,39.133333,70.733333,14.171429,4.8,33.771429,60.971429
15,1993-01-16,234.4,76.0,32.919333,103.079,188.7,65.0,29.86825,87.43775,172.875,...,35.12,50.96,19.866667,7.466667,41.933333,65.266667,19.085714,6.514286,43.828571,70.914286


In [144]:
# Dropping rows with null values
lagged_ma_df[lagged_ma_df.isna().any(axis=1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (MA3) (t-1),Skip Bridge MDF (MA3) (t-1),Westwick MDF (MA3) (t-1),Skelton MDF (MA3) (t-1),Crakehill MDF (MA4) (t-1),Skip Bridge MDF (MA4) (t-1),Westwick MDF (MA4) (t-1),Skelton MDF (MA4) (t-1),...,Malham Tarn DRT (MA5) (t-1),Snaizeholme DRT (MA5) (t-1),Arkengarthdale DRT (MA6) (t-1),East Cowton DRT (MA6) (t-1),Malham Tarn DRT (MA6) (t-1),Snaizeholme DRT (MA6) (t-1),Arkengarthdale DRT (MA7) (t-1),East Cowton DRT (MA7) (t-1),Malham Tarn DRT (MA7) (t-1),Snaizeholme DRT (MA7) (t-1)
6,1993-01-07,56.99,,,,,,,,,...,,,,,,,,,,


In [145]:
lagged_ma_df.dropna(how="any", inplace=True)
lagged_ma_df[lagged_ma_df.isna().any(axis=1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (MA3) (t-1),Skip Bridge MDF (MA3) (t-1),Westwick MDF (MA3) (t-1),Skelton MDF (MA3) (t-1),Crakehill MDF (MA4) (t-1),Skip Bridge MDF (MA4) (t-1),Westwick MDF (MA4) (t-1),Skelton MDF (MA4) (t-1),...,Malham Tarn DRT (MA5) (t-1),Snaizeholme DRT (MA5) (t-1),Arkengarthdale DRT (MA6) (t-1),East Cowton DRT (MA6) (t-1),Malham Tarn DRT (MA6) (t-1),Snaizeholme DRT (MA6) (t-1),Arkengarthdale DRT (MA7) (t-1),East Cowton DRT (MA7) (t-1),Malham Tarn DRT (MA7) (t-1),Snaizeholme DRT (MA7) (t-1)


In [146]:
lagged_ma_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (MA3) (t-1),Skip Bridge MDF (MA3) (t-1),Westwick MDF (MA3) (t-1),Skelton MDF (MA3) (t-1),Crakehill MDF (MA4) (t-1),Skip Bridge MDF (MA4) (t-1),Westwick MDF (MA4) (t-1),Skelton MDF (MA4) (t-1),Crakehill MDF (MA5) (t-1),...,Malham Tarn DRT (MA5) (t-1),Snaizeholme DRT (MA5) (t-1),Arkengarthdale DRT (MA6) (t-1),East Cowton DRT (MA6) (t-1),Malham Tarn DRT (MA6) (t-1),Snaizeholme DRT (MA6) (t-1),Arkengarthdale DRT (MA7) (t-1),East Cowton DRT (MA7) (t-1),Malham Tarn DRT (MA7) (t-1),Snaizeholme DRT (MA7) (t-1)
count,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,...,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0
mean,46.988251,19.548435,7.609933,21.521265,47.037087,19.550722,7.610739,21.527241,47.043488,19.551895,...,14.033352,9.618393,5.792336,5.291043,14.03389,9.627054,5.791848,5.290305,14.03419,9.633478
std,55.796058,23.168519,9.375087,25.111456,52.77336,22.406724,9.119847,24.219128,51.547477,21.775279,...,15.173028,12.912583,7.303841,6.080526,14.46474,12.377981,6.985875,5.733811,13.87652,11.913562
min,3.694,2.083333,1.012667,1.990333,3.733,2.095,1.01475,2.00375,3.83425,2.11,...,0.0,0.0,0.0,0.0,8.65974e-15,0.0,5.075305e-16,0.0,5.77316e-15,0.0
25%,12.3525,5.3775,2.17425,5.824167,12.803333,5.574375,2.178812,5.848125,13.049875,5.694,...,1.76,0.8,0.666667,0.933333,2.133333,1.2,0.8,1.257143,2.171429,1.371429
50%,24.23,10.263333,3.567667,10.922667,24.756667,10.43,3.592375,11.25975,25.34125,10.803,...,9.76,4.96,3.466667,3.466667,10.33333,5.333333,3.771429,3.657143,10.51429,5.828571
75%,59.1,24.016667,9.570917,29.109167,62.124167,25.05,9.585,29.5895,63.085,25.39,...,21.14,13.3,8.4,7.266667,21.4,13.333333,8.342857,7.1,21.04286,13.371429
max,448.1,176.666667,67.550333,251.526667,361.066667,158.25,65.92175,216.44025,337.3,157.96,...,90.24,105.04,73.4,40.4,97.33333,87.8,69.65714,35.2,104.0571,75.257143


In [147]:
# Creating weighted moving averages
weighted_ma_df = pd.DataFrame()
weighted_ma_df["Date"] = clean_df["Date"]
weighted_ma_df[flow_cols[-1]] = clean_df[flow_cols[-1]]
weighted_ma_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs)
0,1993-01-01,26.1
1,1993-01-02,24.86
2,1993-01-03,23.6
3,1993-01-04,23.47
4,1993-01-05,60.7
5,1993-01-06,98.01
6,1993-01-07,56.99
7,1993-01-08,56.66
8,1993-01-09,78.1
9,1993-01-10,125.7


In [148]:
for i in range(3, 8):
    for col in flow_cols:
        col_name = col.replace("(Cumecs)", f"(WMA{i})")
        weighted_ma_df[col_name] = clean_df[col].ewm(span=i).mean()

for i in range(3, 8):
    for col in rain_cols:
        col_name = col.replace("(mm)", f"(WMA{i})")
        weighted_ma_df[col_name] = clean_df[col].ewm(span=i).mean()

weighted_ma_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (WMA3),Skip Bridge MDF (WMA3),Westwick MDF (WMA3),Skelton MDF (WMA3),Crakehill MDF (WMA4),Skip Bridge MDF (WMA4),Westwick MDF (WMA4),Skelton MDF (WMA4),...,Malham Tarn DRT (WMA5),Snaizeholme DRT (WMA5),Arkengarthdale DRT (WMA6),East Cowton DRT (WMA6),Malham Tarn DRT (WMA6),Snaizeholme DRT (WMA6),Arkengarthdale DRT (WMA7),East Cowton DRT (WMA7),Malham Tarn DRT (WMA7),Snaizeholme DRT (WMA7)
0,1993-01-01,26.1,10.4,4.393,9.291,26.1,10.4,4.393,9.291,26.1,...,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0
1,1993-01-02,24.86,10.1,4.290333,8.845,25.273333,10.11875,4.29675,8.872875,25.325,...,0.48,1.6,0.0,0.0,0.466667,1.666667,0.0,0.0,0.457143,1.714286
2,1993-01-03,23.6,9.734286,4.195286,8.394714,24.317143,9.782653,4.208612,8.456612,24.444898,...,0.631579,0.842105,0.0,0.0,0.616514,0.917431,0.0,0.0,0.605405,0.972973
3,1993-01-04,23.47,9.561333,4.284733,8.1442,23.865333,9.611397,4.279562,8.212305,23.996875,...,0.701538,26.08,0.927027,9.579279,0.687387,24.356757,0.877714,9.069714,0.676571,23.145143
4,1993-01-05,60.7,18.200645,8.247194,34.239581,42.876774,16.849688,7.61164,30.111895,39.915996,...,13.330806,58.756398,4.532554,8.182663,12.238795,54.83631,4.261204,7.932394,11.468374,52.008195
5,1993-01-06,98.01,25.260635,9.257889,34.32919,70.880952,23.248349,8.713177,31.917793,64.29083,...,9.044211,37.578346,3.039206,5.486709,8.733616,37.032886,2.965251,5.519929,8.467122,36.434333
6,1993-01-07,56.99,22.25685,8.248055,28.248591,63.880787,21.623524,8.112697,27.944653,61.286393,...,12.073434,37.019524,3.847544,5.017416,11.532371,36.706846,3.725407,5.081412,11.102064,36.309023
7,1993-01-08,56.66,22.127922,7.755102,28.92042,60.256235,21.776687,7.768232,28.612815,59.404222,...,8.440539,25.011134,3.158713,3.47967,8.488282,26.19243,3.134942,3.669731,8.46227,26.888668
8,1993-01-09,78.1,28.827045,7.95444,44.617368,69.195577,27.321895,7.923706,41.397731,66.958665,...,24.443295,52.31773,6.533868,2.675109,22.513302,49.79406,6.179831,2.894057,21.095246,47.947713
9,1993-01-10,125.7,39.92436,10.617821,69.308296,97.475406,36.850755,10.077648,62.54692,90.598138,...,41.932146,80.975455,10.755867,2.593692,38.342455,75.543086,10.052984,2.763172,35.64053,71.486327


In [149]:
weighted_ma_df.corr()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (WMA3),Skip Bridge MDF (WMA3),Westwick MDF (WMA3),Skelton MDF (WMA3),Crakehill MDF (WMA4),Skip Bridge MDF (WMA4),Westwick MDF (WMA4),Skelton MDF (WMA4),Crakehill MDF (WMA5),...,Malham Tarn DRT (WMA5),Snaizeholme DRT (WMA5),Arkengarthdale DRT (WMA6),East Cowton DRT (WMA6),Malham Tarn DRT (WMA6),Snaizeholme DRT (WMA6),Arkengarthdale DRT (WMA7),East Cowton DRT (WMA7),Malham Tarn DRT (WMA7),Snaizeholme DRT (WMA7)
Skelton MDF (Cumecs),1.0,0.974858,0.943348,0.961817,0.964263,0.959379,0.927569,0.956856,0.942997,0.943325,...,0.581981,0.679102,0.618884,0.429512,0.597209,0.695978,0.628888,0.439717,0.606593,0.705803
Crakehill MDF (WMA3),0.974858,1.0,0.951375,0.94742,0.983057,0.995477,0.945789,0.95459,0.970526,0.985648,...,0.530627,0.636607,0.592102,0.426059,0.55044,0.658424,0.607264,0.44224,0.563608,0.67223
Skip Bridge MDF (WMA3),0.943348,0.951375,1.0,0.918588,0.964938,0.953549,0.996072,0.931227,0.958334,0.949768,...,0.527492,0.60047,0.609613,0.400521,0.551243,0.625423,0.627806,0.415421,0.568711,0.642895
Westwick MDF (WMA3),0.961817,0.94742,0.918588,1.0,0.950522,0.939728,0.909495,0.995165,0.936605,0.929457,...,0.641513,0.740023,0.641326,0.391134,0.657147,0.756075,0.6517,0.402871,0.666904,0.76528
Skelton MDF (WMA3),0.964263,0.983057,0.964938,0.950522,1.0,0.990035,0.968178,0.968198,0.996575,0.989463,...,0.54389,0.643068,0.584155,0.377091,0.569737,0.671346,0.60433,0.395595,0.588314,0.691008
Crakehill MDF (WMA4),0.959379,0.995477,0.953549,0.939728,0.990035,1.0,0.955557,0.954987,0.985055,0.997129,...,0.5139,0.621543,0.575006,0.400609,0.537278,0.647295,0.593927,0.419793,0.553756,0.66473
Skip Bridge MDF (WMA4),0.927569,0.945789,0.996072,0.909495,0.968178,0.955557,1.0,0.929044,0.968286,0.957745,...,0.509787,0.584196,0.588081,0.374791,0.536581,0.612353,0.609905,0.392426,0.55694,0.632866
Westwick MDF (WMA4),0.956856,0.95459,0.931227,0.995165,0.968198,0.954987,0.929044,1.0,0.960908,0.950664,...,0.622815,0.721922,0.627399,0.371261,0.643388,0.743384,0.64212,0.385909,0.657478,0.757292
Skelton MDF (WMA4),0.942997,0.970526,0.958334,0.936605,0.996575,0.985055,0.968286,0.960908,1.0,0.990605,...,0.524147,0.624568,0.564142,0.351382,0.552866,0.655936,0.587393,0.37209,0.57427,0.678627
Crakehill MDF (WMA5),0.943325,0.985648,0.949768,0.929457,0.989463,0.997129,0.957745,0.950664,0.990605,1.0,...,0.497648,0.606465,0.557391,0.376371,0.523517,0.635018,0.579098,0.397596,0.542468,0.655207


In [150]:
weighted_ma_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (WMA3),Skip Bridge MDF (WMA3),Westwick MDF (WMA3),Skelton MDF (WMA3),Crakehill MDF (WMA4),Skip Bridge MDF (WMA4),Westwick MDF (WMA4),Skelton MDF (WMA4),Crakehill MDF (WMA5),...,Malham Tarn DRT (WMA5),Snaizeholme DRT (WMA5),Arkengarthdale DRT (WMA6),East Cowton DRT (WMA6),Malham Tarn DRT (WMA6),Snaizeholme DRT (WMA6),Arkengarthdale DRT (WMA7),East Cowton DRT (WMA7),Malham Tarn DRT (WMA7),Snaizeholme DRT (WMA7)
count,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,...,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0,1451.0
mean,46.977784,19.519175,7.600024,21.492349,46.971894,19.518767,7.599538,21.492851,46.971182,19.518829,...,13.984398,9.651831,5.775847,5.286105,13.985534,9.659651,5.775058,5.284842,13.986942,9.668315
std,55.691669,22.509756,9.131015,24.494882,51.546413,21.708574,8.85412,23.552924,50.203609,21.070805,...,14.834316,12.682914,7.1172,5.928408,14.046123,12.084998,6.759876,5.554247,13.408911,11.612662
min,3.694,2.106613,1.013019,1.988822,3.784612,2.124633,1.015611,2.003615,3.813018,2.143048,...,0.0,2.3e-05,0.0,0.0,0.0,0.00013,0.0,0.0,0.0,0.000424
25%,12.385,5.537138,2.189022,5.931549,12.991008,5.66471,2.20073,6.034924,13.232278,5.781697,...,2.396532,1.361718,1.034738,1.297775,2.864697,1.573018,1.227319,1.504496,3.248877,1.729078
50%,24.25,10.408988,3.59544,11.193894,25.273333,10.764707,3.623453,11.656633,25.870124,11.025246,...,9.522856,5.152346,3.629764,3.47967,10.051803,5.627342,3.795612,3.669731,10.562586,5.856426
75%,59.15,24.015073,9.343599,29.361253,63.598434,24.935907,9.732283,29.933482,62.757663,24.765063,...,21.230186,12.726148,8.114486,7.154646,21.351626,12.777683,8.044823,7.163909,21.049487,12.844838
max,448.1,171.548253,66.767533,283.847215,358.363081,163.374574,63.330198,252.563531,342.37138,155.556128,...,105.652838,97.166216,83.577971,51.966182,100.117414,86.24775,77.825716,45.74493,95.347821,80.502521


In [151]:
# Creating lagged weighted moving averages
lagged_wma_df = pd.DataFrame()
lagged_wma_df["Date"] = weighted_ma_df["Date"]
lagged_wma_df[flow_cols[-1]] = weighted_ma_df[flow_cols[-1]]
lagged_wma_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs)
0,1993-01-01,26.1
1,1993-01-02,24.86
2,1993-01-03,23.6
3,1993-01-04,23.47
4,1993-01-05,60.7
5,1993-01-06,98.01
6,1993-01-07,56.99
7,1993-01-08,56.66
8,1993-01-09,78.1
9,1993-01-10,125.7


In [152]:
# Lagging weighted moving averages
w_mdf_cols = list(weighted_ma_df.columns[2:22])
w_drt_cols = list(weighted_ma_df.columns[22:])

for col in w_mdf_cols:
    col_name = col + f" (t-1)"
    lagged_wma_df[col_name] = weighted_ma_df[col].shift(1)

for col in w_drt_cols:
    col_name = col + f" (t-1)"
    lagged_wma_df[col_name] = weighted_ma_df[col].shift(1)

lagged_wma_df.head(20)

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Crakehill MDF (WMA4) (t-1),Skip Bridge MDF (WMA4) (t-1),Westwick MDF (WMA4) (t-1),Skelton MDF (WMA4) (t-1),...,Malham Tarn DRT (WMA5) (t-1),Snaizeholme DRT (WMA5) (t-1),Arkengarthdale DRT (WMA6) (t-1),East Cowton DRT (WMA6) (t-1),Malham Tarn DRT (WMA6) (t-1),Snaizeholme DRT (WMA6) (t-1),Arkengarthdale DRT (WMA7) (t-1),East Cowton DRT (WMA7) (t-1),Malham Tarn DRT (WMA7) (t-1),Snaizeholme DRT (WMA7) (t-1)
0,1993-01-01,26.1,,,,,,,,,...,,,,,,,,,,
1,1993-01-02,24.86,10.4,4.393,9.291,26.1,10.4,4.393,9.291,26.1,...,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0
2,1993-01-03,23.6,10.1,4.290333,8.845,25.273333,10.11875,4.29675,8.872875,25.325,...,0.48,1.6,0.0,0.0,0.466667,1.666667,0.0,0.0,0.457143,1.714286
3,1993-01-04,23.47,9.734286,4.195286,8.394714,24.317143,9.782653,4.208612,8.456612,24.444898,...,0.631579,0.842105,0.0,0.0,0.616514,0.917431,0.0,0.0,0.605405,0.972973
4,1993-01-05,60.7,9.561333,4.284733,8.1442,23.865333,9.611397,4.279562,8.212305,23.996875,...,0.701538,26.08,0.927027,9.579279,0.687387,24.356757,0.877714,9.069714,0.676571,23.145143
5,1993-01-06,98.01,18.200645,8.247194,34.239581,42.876774,16.849688,7.61164,30.111895,39.915996,...,13.330806,58.756398,4.532554,8.182663,12.238795,54.83631,4.261204,7.932394,11.468374,52.008195
6,1993-01-07,56.99,25.260635,9.257889,34.32919,70.880952,23.248349,8.713177,31.917793,64.29083,...,9.044211,37.578346,3.039206,5.486709,8.733616,37.032886,2.965251,5.519929,8.467122,36.434333
7,1993-01-08,56.66,22.25685,8.248055,28.248591,63.880787,21.623524,8.112697,27.944653,61.286393,...,12.073434,37.019524,3.847544,5.017416,11.532371,36.706846,3.725407,5.081412,11.102064,36.309023
8,1993-01-09,78.1,22.127922,7.755102,28.92042,60.256235,21.776687,7.768232,28.612815,59.404222,...,8.440539,25.011134,3.158713,3.47967,8.488282,26.19243,3.134942,3.669731,8.46227,26.888668
9,1993-01-10,125.7,28.827045,7.95444,44.617368,69.195577,27.321895,7.923706,41.397731,66.958665,...,24.443295,52.31773,6.533868,2.675109,22.513302,49.79406,6.179831,2.894057,21.095246,47.947713


In [154]:
lagged_wma_df[lagged_wma_df.isna().any(1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Crakehill MDF (WMA4) (t-1),Skip Bridge MDF (WMA4) (t-1),Westwick MDF (WMA4) (t-1),Skelton MDF (WMA4) (t-1),...,Malham Tarn DRT (WMA5) (t-1),Snaizeholme DRT (WMA5) (t-1),Arkengarthdale DRT (WMA6) (t-1),East Cowton DRT (WMA6) (t-1),Malham Tarn DRT (WMA6) (t-1),Snaizeholme DRT (WMA6) (t-1),Arkengarthdale DRT (WMA7) (t-1),East Cowton DRT (WMA7) (t-1),Malham Tarn DRT (WMA7) (t-1),Snaizeholme DRT (WMA7) (t-1)
0,1993-01-01,26.1,,,,,,,,,...,,,,,,,,,,


In [155]:
lagged_wma_df.dropna(how="any", inplace=True)
lagged_wma_df[lagged_wma_df.isna().any(1)]

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Crakehill MDF (WMA4) (t-1),Skip Bridge MDF (WMA4) (t-1),Westwick MDF (WMA4) (t-1),Skelton MDF (WMA4) (t-1),...,Malham Tarn DRT (WMA5) (t-1),Snaizeholme DRT (WMA5) (t-1),Arkengarthdale DRT (WMA6) (t-1),East Cowton DRT (WMA6) (t-1),Malham Tarn DRT (WMA6) (t-1),Snaizeholme DRT (WMA6) (t-1),Arkengarthdale DRT (WMA7) (t-1),East Cowton DRT (WMA7) (t-1),Malham Tarn DRT (WMA7) (t-1),Snaizeholme DRT (WMA7) (t-1)


In [156]:
lagged_wma_df.corr()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Crakehill MDF (WMA4) (t-1),Skip Bridge MDF (WMA4) (t-1),Westwick MDF (WMA4) (t-1),Skelton MDF (WMA4) (t-1),Crakehill MDF (WMA5) (t-1),...,Malham Tarn DRT (WMA5) (t-1),Snaizeholme DRT (WMA5) (t-1),Arkengarthdale DRT (WMA6) (t-1),East Cowton DRT (WMA6) (t-1),Malham Tarn DRT (WMA6) (t-1),Snaizeholme DRT (WMA6) (t-1),Arkengarthdale DRT (WMA7) (t-1),East Cowton DRT (WMA7) (t-1),Malham Tarn DRT (WMA7) (t-1),Snaizeholme DRT (WMA7) (t-1)
Skelton MDF (Cumecs),1.0,0.858361,0.846748,0.900415,0.848036,0.844495,0.830926,0.88528,0.832012,0.831944,...,0.626056,0.735049,0.64546,0.449179,0.627424,0.736302,0.644561,0.450985,0.62658,0.734637
Crakehill MDF (WMA3) (t-1),0.858361,1.0,0.951374,0.947425,0.983057,0.995477,0.945788,0.954596,0.970526,0.985648,...,0.530624,0.636615,0.592122,0.42614,0.550444,0.658436,0.607284,0.442328,0.563615,0.672244
Skip Bridge MDF (WMA3) (t-1),0.846748,0.951374,1.0,0.918592,0.964938,0.953548,0.996073,0.931233,0.958334,0.949767,...,0.52749,0.600474,0.609632,0.400599,0.551248,0.625432,0.627826,0.415506,0.568721,0.642907
Westwick MDF (WMA3) (t-1),0.900415,0.947425,0.918592,1.0,0.950524,0.939731,0.909502,0.995164,0.936606,0.929459,...,0.641481,0.740004,0.641372,0.391264,0.657119,0.75606,0.651746,0.40301,0.666878,0.765267
Skelton MDF (WMA3) (t-1),0.848036,0.983057,0.964938,0.950524,1.0,0.990035,0.968178,0.968202,0.996575,0.989463,...,0.54388,0.643068,0.584178,0.377179,0.569733,0.671351,0.604354,0.395689,0.588315,0.691016
Crakehill MDF (WMA4) (t-1),0.844495,0.995477,0.953548,0.939731,0.990035,1.0,0.955557,0.954991,0.985055,0.997129,...,0.513888,0.621542,0.575028,0.400695,0.537272,0.647299,0.593949,0.419887,0.553754,0.664736
Skip Bridge MDF (WMA4) (t-1),0.830926,0.945788,0.996073,0.909502,0.968178,0.955557,1.0,0.929053,0.968287,0.957745,...,0.509793,0.584208,0.588096,0.374858,0.536595,0.61237,0.609921,0.392499,0.556959,0.632886
Westwick MDF (WMA4) (t-1),0.88528,0.954596,0.931233,0.995164,0.968202,0.954991,0.929053,1.0,0.960911,0.950667,...,0.622777,0.721899,0.627446,0.371392,0.643355,0.743365,0.642168,0.386049,0.657448,0.757275
Skelton MDF (WMA4) (t-1),0.832012,0.970526,0.958334,0.936606,0.996575,0.985055,0.968287,0.960911,1.0,0.990605,...,0.524132,0.624563,0.564165,0.351469,0.552858,0.655937,0.587417,0.372185,0.574267,0.678632
Crakehill MDF (WMA5) (t-1),0.831944,0.985648,0.949767,0.929459,0.989463,0.997129,0.957745,0.950667,0.990605,1.0,...,0.497633,0.606462,0.557413,0.376457,0.523508,0.63502,0.579121,0.397688,0.542463,0.655212


In [157]:
lagged_wma_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Crakehill MDF (WMA4) (t-1),Skip Bridge MDF (WMA4) (t-1),Westwick MDF (WMA4) (t-1),Skelton MDF (WMA4) (t-1),Crakehill MDF (WMA5) (t-1),...,Malham Tarn DRT (WMA5) (t-1),Snaizeholme DRT (WMA5) (t-1),Arkengarthdale DRT (WMA6) (t-1),East Cowton DRT (WMA6) (t-1),Malham Tarn DRT (WMA6) (t-1),Snaizeholme DRT (WMA6) (t-1),Arkengarthdale DRT (WMA7) (t-1),East Cowton DRT (WMA7) (t-1),Malham Tarn DRT (WMA7) (t-1),Snaizeholme DRT (WMA7) (t-1)
count,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,...,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0
mean,46.992182,19.522053,7.601166,21.497952,46.979458,19.521839,7.60047,21.498375,46.978713,19.52184,...,13.990728,9.656912,5.775426,5.284441,13.991815,9.664587,5.774654,5.283198,13.993078,9.673086
std,55.708181,22.517255,9.134062,24.502403,51.563392,21.715748,8.857104,23.560109,50.220109,21.077762,...,14.837473,12.685812,7.119637,5.930115,14.04893,12.087704,6.762191,5.55581,13.411499,11.615246
min,3.694,2.106613,1.013019,1.988822,3.784612,2.124633,1.015611,2.003615,3.813018,2.143048,...,0.0,2.3e-05,0.0,0.0,0.0,0.00013,0.0,0.0,0.0,0.000424
25%,12.3825,5.53447,2.188817,5.928907,12.988892,5.652475,2.199878,6.030254,13.230015,5.778349,...,2.392643,1.36039,1.033939,1.293475,2.855575,1.571982,1.227186,1.504081,3.244706,1.72884
50%,24.245,10.407141,3.595122,11.170101,25.272767,10.740245,3.621673,11.629449,25.859974,11.015578,...,9.528469,5.157073,3.628686,3.47835,10.064458,5.628753,3.792445,3.664996,10.593781,5.863176
75%,59.2,24.05189,9.386454,29.367607,63.638607,24.975226,9.753887,29.937759,62.787376,24.787001,...,21.253441,12.729215,8.1188,7.14641,21.354523,12.78622,8.049764,7.160675,21.058371,12.84806
max,448.1,171.548253,66.767533,283.847215,358.363081,163.374574,63.330198,252.563531,342.37138,155.556128,...,105.652838,97.166216,83.577971,51.966182,100.117414,86.24775,77.825716,45.74493,95.347821,80.502521


### Exporting Datasets

In [160]:
lagged_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),Crakehill MDF (t-3),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)
count,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,...,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0
mean,47.023622,19.537023,7.606707,21.517077,47.012696,19.532983,7.605369,21.513186,47.006191,19.531809,...,14.010497,9.652486,5.783978,5.295028,14.007182,9.649724,5.781768,5.291713,14.007182,9.651934
std,55.74023,25.272741,10.02328,28.287685,55.743271,25.273807,10.023601,28.289072,55.745308,25.274192,...,25.065902,20.9533,13.274272,12.308373,25.067332,20.954309,13.274969,12.309152,25.067332,20.953544
min,3.694,2.06,1.002,1.954,3.694,2.06,1.002,1.954,3.694,2.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,12.3775,5.215,2.1545,5.60925,12.3775,5.215,2.1545,5.60925,12.3775,5.215,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,24.245,10.05,3.4805,10.385,24.23,10.0,3.4805,10.3545,24.23,10.0,...,1.6,0.8,0.8,0.0,1.6,0.8,0.8,0.0,1.6,0.8
75%,59.27,21.8,8.85525,26.724,59.27,21.8,8.85525,26.724,59.27,21.8,...,18.4,10.4,6.4,5.3,18.4,10.4,6.4,5.3,18.4,10.4
max,448.1,220.0,80.244,374.061,448.1,220.0,80.244,374.061,448.1,220.0,...,252.0,268.8,225.2,165.6,252.0,268.8,225.2,165.6,252.0,268.8


In [161]:
moving_avg_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (MA3),Skip Bridge MDF (MA3),Westwick MDF (MA3),Skelton MDF (MA3),Crakehill MDF (MA4),Skip Bridge MDF (MA4),Westwick MDF (MA4),Skelton MDF (MA4),Crakehill MDF (MA5),...,Malham Tarn DRT (MA5),Snaizeholme DRT (MA5),Arkengarthdale DRT (MA6),East Cowton DRT (MA6),Malham Tarn DRT (MA6),Snaizeholme DRT (MA6),Arkengarthdale DRT (MA7),East Cowton DRT (MA7),Malham Tarn DRT (MA7),Snaizeholme DRT (MA7)
count,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,...,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0,1445.0
mean,46.995172,19.545218,7.608666,21.515503,47.028298,19.546846,7.60942,21.521166,47.033714,19.547596,...,14.027183,9.613619,5.792203,5.292826,14.02713,9.621961,5.791162,5.291626,14.02709,9.628314
std,55.777355,23.160818,9.371964,25.103715,52.756141,22.399448,9.116826,24.211841,51.530965,21.768351,...,15.169586,12.909387,7.301313,6.078797,14.46201,12.375209,6.983505,5.732045,13.87434,11.911054
min,3.694,2.083333,1.012667,1.990333,3.733,2.095,1.01475,2.00375,3.83425,2.11,...,0.0,0.0,0.0,0.0,8.65974e-15,0.0,5.075305e-16,0.0,5.77316e-15,0.0
25%,12.37,5.383333,2.176333,5.825667,12.82,5.585,2.1795,5.85,13.05,5.696,...,1.76,0.8,0.666667,0.933333,2.133333,1.2,0.8,1.257143,2.171429,1.371429
50%,24.24,10.276667,3.568667,10.934333,24.773333,10.435,3.59325,11.279,25.365,10.806,...,9.76,4.96,3.466667,3.466667,10.33333,5.333333,3.771429,3.657143,10.51429,5.828571
75%,59.05,23.966667,9.565,29.075333,62.006667,25.05,9.58225,29.579,63.0125,25.38,...,21.12,13.28,8.4,7.266667,21.4,13.333333,8.342857,7.142857,21.02857,13.371429
max,448.1,176.666667,67.550333,251.526667,361.066667,158.25,65.92175,216.44025,337.3,157.96,...,90.24,105.04,73.4,40.4,97.33333,87.8,69.65714,35.2,104.0571,75.257143


In [163]:
lagged_ma_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (MA3) (t-1),Skip Bridge MDF (MA3) (t-1),Westwick MDF (MA3) (t-1),Skelton MDF (MA3) (t-1),Crakehill MDF (MA4) (t-1),Skip Bridge MDF (MA4) (t-1),Westwick MDF (MA4) (t-1),Skelton MDF (MA4) (t-1),Crakehill MDF (MA5) (t-1),...,Malham Tarn DRT (MA5) (t-1),Snaizeholme DRT (MA5) (t-1),Arkengarthdale DRT (MA6) (t-1),East Cowton DRT (MA6) (t-1),Malham Tarn DRT (MA6) (t-1),Snaizeholme DRT (MA6) (t-1),Arkengarthdale DRT (MA7) (t-1),East Cowton DRT (MA7) (t-1),Malham Tarn DRT (MA7) (t-1),Snaizeholme DRT (MA7) (t-1)
count,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,...,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0,1444.0
mean,46.988251,19.548435,7.609933,21.521265,47.037087,19.550722,7.610739,21.527241,47.043488,19.551895,...,14.033352,9.618393,5.792336,5.291043,14.03389,9.627054,5.791848,5.290305,14.03419,9.633478
std,55.796058,23.168519,9.375087,25.111456,52.77336,22.406724,9.119847,24.219128,51.547477,21.775279,...,15.173028,12.912583,7.303841,6.080526,14.46474,12.377981,6.985875,5.733811,13.87652,11.913562
min,3.694,2.083333,1.012667,1.990333,3.733,2.095,1.01475,2.00375,3.83425,2.11,...,0.0,0.0,0.0,0.0,8.65974e-15,0.0,5.075305e-16,0.0,5.77316e-15,0.0
25%,12.3525,5.3775,2.17425,5.824167,12.803333,5.574375,2.178812,5.848125,13.049875,5.694,...,1.76,0.8,0.666667,0.933333,2.133333,1.2,0.8,1.257143,2.171429,1.371429
50%,24.23,10.263333,3.567667,10.922667,24.756667,10.43,3.592375,11.25975,25.34125,10.803,...,9.76,4.96,3.466667,3.466667,10.33333,5.333333,3.771429,3.657143,10.51429,5.828571
75%,59.1,24.016667,9.570917,29.109167,62.124167,25.05,9.585,29.5895,63.085,25.39,...,21.14,13.3,8.4,7.266667,21.4,13.333333,8.342857,7.1,21.04286,13.371429
max,448.1,176.666667,67.550333,251.526667,361.066667,158.25,65.92175,216.44025,337.3,157.96,...,90.24,105.04,73.4,40.4,97.33333,87.8,69.65714,35.2,104.0571,75.257143


In [164]:
lagged_wma_df.describe()

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (WMA3) (t-1),Skip Bridge MDF (WMA3) (t-1),Westwick MDF (WMA3) (t-1),Skelton MDF (WMA3) (t-1),Crakehill MDF (WMA4) (t-1),Skip Bridge MDF (WMA4) (t-1),Westwick MDF (WMA4) (t-1),Skelton MDF (WMA4) (t-1),Crakehill MDF (WMA5) (t-1),...,Malham Tarn DRT (WMA5) (t-1),Snaizeholme DRT (WMA5) (t-1),Arkengarthdale DRT (WMA6) (t-1),East Cowton DRT (WMA6) (t-1),Malham Tarn DRT (WMA6) (t-1),Snaizeholme DRT (WMA6) (t-1),Arkengarthdale DRT (WMA7) (t-1),East Cowton DRT (WMA7) (t-1),Malham Tarn DRT (WMA7) (t-1),Snaizeholme DRT (WMA7) (t-1)
count,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,...,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0,1450.0
mean,46.992182,19.522053,7.601166,21.497952,46.979458,19.521839,7.60047,21.498375,46.978713,19.52184,...,13.990728,9.656912,5.775426,5.284441,13.991815,9.664587,5.774654,5.283198,13.993078,9.673086
std,55.708181,22.517255,9.134062,24.502403,51.563392,21.715748,8.857104,23.560109,50.220109,21.077762,...,14.837473,12.685812,7.119637,5.930115,14.04893,12.087704,6.762191,5.55581,13.411499,11.615246
min,3.694,2.106613,1.013019,1.988822,3.784612,2.124633,1.015611,2.003615,3.813018,2.143048,...,0.0,2.3e-05,0.0,0.0,0.0,0.00013,0.0,0.0,0.0,0.000424
25%,12.3825,5.53447,2.188817,5.928907,12.988892,5.652475,2.199878,6.030254,13.230015,5.778349,...,2.392643,1.36039,1.033939,1.293475,2.855575,1.571982,1.227186,1.504081,3.244706,1.72884
50%,24.245,10.407141,3.595122,11.170101,25.272767,10.740245,3.621673,11.629449,25.859974,11.015578,...,9.528469,5.157073,3.628686,3.47835,10.064458,5.628753,3.792445,3.664996,10.593781,5.863176
75%,59.2,24.05189,9.386454,29.367607,63.638607,24.975226,9.753887,29.937759,62.787376,24.787001,...,21.253441,12.729215,8.1188,7.14641,21.354523,12.78622,8.049764,7.160675,21.058371,12.84806
max,448.1,171.548253,66.767533,283.847215,358.363081,163.374574,63.330198,252.563531,342.37138,155.556128,...,105.652838,97.166216,83.577971,51.966182,100.117414,86.24775,77.825716,45.74493,95.347821,80.502521


In [169]:
# Exporting datasets with lags and moving averages
lagged_df.to_excel('River-Data-Lagged.xlsx')
moving_avg_df.to_excel('River-Data-MA.xlsx')
lagged_ma_df.to_excel('River-Data-MA-Lagged.xlsx')
weighted_ma_df.to_excel('River-Data-WMA.xlsx')
lagged_ma_df.to_excel('River-Data-WMA-Lagged.xlsx')