# ML for Hydrology - Model Proposals

In [None]:
# Install libraries
%pip install scikit-learn
%pip install pandas

In [None]:
# DATA INITIALIZATION
import pandas as pd

# Read CSV and ensure data is complete
df = pd.read_csv('./PlatteRiverDataset.csv')
if any(df.isnull().sum()):
    print("[WARNING]: Data contains null values")

expected_stage_values = list(df['Stage'].values)
expected_discharge_values = list(df['Discharge'].values)

generic_feature_names = ['grayMean','graySigma','entropyMean','entropySigma','hMean','hSigma','sMean','sSigma','vMean','vSigma']
above_weir_feature_names = ['grayMean0','graySigma0','entropyMean0','entropySigma0','hMean0','hSigma0','sMean0','sSigma0','vMean0','vSigma0']
below_weir_feature_names = ['grayMean1','graySigma1','entropyMean1','entropySigma1','hMean1','hSigma1','sMean1','sSigma1','vMean1','vSigma1']
selected_feature_names = generic_feature_names + above_weir_feature_names + below_weir_feature_names

feature_values = list(df[selected_feature_names].values)

In [None]:
# ML INITIALIZATION
from sklearn.metrics import mean_squared_error

def train_and_test_models(stage_model, discharge_model):
    percent_train = 0.8
    num_train = int(percent_train * len(expected_stage_values))

    # Train models
    print("Training models...")
    stage_expectations = expected_stage_values[0:num_train]
    discharge_expectations = expected_discharge_values[0:num_train]
    test_values = feature_values[0:num_train]
    stage_model.fit(test_values, stage_expectations)
    discharge_model.fit(test_values, discharge_expectations)
    print("Training complete")

    # Test the model
    print("Testing models...")
    test_values = feature_values[num_train:]
    stage_predictions = stage_model.predict(X=test_values)
    discharge_predictions = discharge_model.predict(X=test_values)
    print("Testing complete")

    # Calculate the error
    stage_expectations = expected_stage_values[num_train:]
    discharge_expectations = expected_discharge_values[num_train:]
    print("[Stage] Mean squared error: %.3f" % mean_squared_error(stage_expectations, stage_predictions))
    print("[Discharge] Mean squared error: %.3f" % mean_squared_error(discharge_expectations, discharge_predictions))


In [None]:
# Proposal 1: Use a Support Vector Machine to predict stage and discharge
from sklearn.svm import SVR
stage_model = SVR()
discharge_model = SVR()
train_and_test_models(stage_model, discharge_model)

In [None]:
# Proposal 2: Use a Random Forest to predict stage and discharge
from sklearn.ensemble import RandomForestRegressor
stage_model = RandomForestRegressor()
discharge_model = RandomForestRegressor()
train_and_test_models(stage_model, discharge_model)

In [None]:
# Proposal 4: Use a Multi-Layer Perceptron to predict stage and discharge
from sklearn.neural_network import MLPRegressor
stage_model = MLPRegressor()
discharge_model = MLPRegressor()
train_and_test_models(stage_model, discharge_model)