In [1]:
import wntr
import pandas as pd

ModuleNotFoundError: No module named 'wntr'

# Custom function library

In [None]:
from math import sqrt

def distance_between_points(x1: float, y1: float, x2: float, y2: float) -> float:
    """
    Returns distance between 2 points on a flat plane using pythagoreas theorem
    Arguments: x1, y1 - first point coordinates (floats)
               x2, y2 - second point coordinates (floats)
    Returns: distance on a flat plane (float)
    """
    return sqrt(((x1 - x2)**2) + ((y1 - y2)**2))

def get_closest_nodes(water_network: wntr.network.WaterNetworkModel, base_node_name: str, max_distance: float) -> list:
    """
    Finds nodes that are closest to base_node
    Arguments: water_network - WNTR Water Network Model (wntr.network.WaterNetworkModel)
               base_node_name - reference node name (str)
               max_distance - units in meters (float)
    Returns:   list of strings (names of closest nodes)
    """
    
    # get base node coordinates
    x_base, y_base = wn.get_node(base_node_name).coordinates
    
    # check each node if it's in base node's proximity, if so append to list
    closest_nodes = []
    for node_name in wn.node_name_list:
        x, y = wn.get_node(node_name).coordinates
        if distance_between_points(x_base, y_base , x, y) <= max_distance:
            closest_nodes.append(node_name)
            
    # remove the base node from return list (distance from itself is always 0)
    closest_nodes.remove(base_node_name)
    
    return closest_nodes

# Read a water network model, simulate without leaks

In [None]:
inp_file = 'Walkerton_v1.inp'
wn = wntr.network.WaterNetworkModel(inp_file)
sim = wntr.sim.WNTRSimulator(wn)
results = sim.run_sim()

Which node will have its pressure predicted?

In [None]:
main_node = 'J126'

# get the nodes pressure over time
main_node_pressure = results.node['pressure'].loc[:, main_node]
main_node_pressure

Which nodes are the closest to the main node?

In [None]:
max_distance = 400.0           # how far-away nodes are we considering?
closest_nodes = get_closest_nodes(water_network=wn,
                                  base_node_name=main_node,
                                  max_distance=max_distance)
print(f'{len(closest_nodes)} nodes found.')

Get pressure over time for each close node

In [None]:
closest_nodes_pressure = []
for node in closest_nodes:
    closest_nodes_pressure.append(results.node['pressure'].loc[:, node])
    
# merge into a single DataFrame
closest_nodes_pressure = pd.concat(closest_nodes_pressure, axis=1)

# add main node to have a complete dataset in single DataFrame
noLeaks_data = pd.concat([closest_nodes_pressure, main_node_pressure], axis=1)
    
noLeaks_data

# Implement linear regression

Shuffling dataset in order not to get a skewed test set

In [None]:
from sklearn.utils import shuffle
noLeaks_data_shuffled = shuffle(noLeaks_data)
noLeaks_data_shuffled

Divide dataset into train and test subsets

In [None]:
test_set_size = 70                            # dataset has 673 records overall
data_size = noLeaks_data_shuffled.shape[0]    # total number of rows

# separate X from Y
noLeaks_x = noLeaks_data_shuffled.loc[:, noLeaks_data_shuffled.columns != main_node]
noLeaks_y = noLeaks_data_shuffled.loc[:, noLeaks_data_shuffled.columns == main_node]

# get normalization stats for later
norm_mean = noLeaks_x.mean()
norm_std = noLeaks_x.std()

# train data (prenorm - pre normalization)
noLeaks_xtrain_prenorm = noLeaks_x[:-test_set_size]
noLeaks_ytrain = noLeaks_y[:-test_set_size]

# test data
noLeaks_xtest_prenorm = noLeaks_x[data_size-test_set_size:]
noLeaks_ytest = noLeaks_y[data_size-test_set_size:]

noLeaks_xtrain_prenorm

Normalize X dataset

In [None]:
noLeaks_xtrain = (noLeaks_xtrain_prenorm - norm_mean) / norm_std
noLeaks_xtest = (noLeaks_xtest_prenorm - norm_mean) / norm_std

Convert data to numpy array

In [None]:
noLeaks_xtrain_array = noLeaks_xtrain.to_numpy()
noLeaks_xtest_array = noLeaks_xtest.to_numpy()
noLeaks_ytrain_array = noLeaks_ytrain.to_numpy()
noLeaks_ytest_array = noLeaks_ytest.to_numpy()

noLeaks_xtrain_array

Create regression model, fit the data

In [None]:
from sklearn import linear_model
regression = linear_model.LinearRegression()
regression.fit(noLeaks_xtrain_array, noLeaks_ytrain_array)

# Review the model

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

regression_y_pred = regression.predict(noLeaks_xtest_array)

print(f'Coefficients: {regression.coef_}\n')
print(f'Mean squared error: {mean_squared_error(noLeaks_ytest_array, regression_y_pred)}\n')
print(f'Coefficient of determination R^2: {r2_score(noLeaks_ytest_array, regression_y_pred)}\n')

See the errors

In [None]:
errors_linear = noLeaks_ytest - regression_y_pred
errors_linear

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# reshape to 1-dimensional numpy array
predictions = regression_y_pred.flatten()

# convert it to DataFrame
predictions_df = pd.DataFrame(predictions, index=noLeaks_ytest.index.copy())

# plot setup
ax = plt.gca()
ax.set_title(label='Predictions and labels')
ax.set_xlabel(xlabel='Time [s]')
ax.set_ylabel(ylabel='Pressure [m]')
red_patch = mpatches.Patch(color='red', label='Predictions')
blue_patch = mpatches.Patch(color='blue', label='Labels')
ax.legend(handles=[red_patch, blue_patch])

# plot labels
ax.scatter(noLeaks_ytest.index, noLeaks_ytest, color='blue', marker='x')

# plot predictions
ax.scatter(predictions_df.index, predictions_df, color='red', marker='.')

# Read a water network model, simulate with leaks (leak on main node)

Read water network model

In [None]:
inp_file = 'Walkerton_v1.inp'
wn_leaks = wntr.network.WaterNetworkModel(inp_file)

Add a leak on main node

In [None]:
leak_node = wn_leaks.get_node(main_node)
leak_node.add_leak(wn_leaks, area=0.01, start_time=50*3600, end_time=90*3600)

Run the simulation

In [None]:
sim_leaks = wntr.sim.WNTRSimulator(wn_leaks)
results_leaks = sim_leaks.run_sim()
results_leaks.node['pressure']

Get pressures in closest nodes

In [None]:
closest_nodes_pressure_leak = []
for node in closest_nodes:
    closest_nodes_pressure_leak.append(results_leaks.node['pressure'].loc[:, node])

# pressures for close nodes
Leaks_xtest_prenorm = pd.concat(closest_nodes_pressure_leak, axis=1)

# pressures for main node (the one with the leak)
Leaks_ytest = results_leaks.node['pressure'].loc[:, main_node]

# normalize x data just like in training set
Leaks_xtest = (Leaks_xtest_prenorm - norm_mean) / norm_std

Leaks_xtest

# Review model in case of main node leak 

Only the time period with a leak will really matter here

In [None]:
leak_regression_y_pred = regression.predict(Leaks_xtest)

print(f'Mean squared error: {mean_squared_error(Leaks_ytest, leak_regression_y_pred)}\n')
print(f'Coefficient of determination R^2: {r2_score(Leaks_ytest, leak_regression_y_pred)}\n')

In [None]:
# get predictions on leak data
leak_regression_y_pred = regression.predict(Leaks_xtest)
leak_regression_y_pred

# reshape to 1-dimensional numpy array
leak_predictions = leak_regression_y_pred.flatten()

# convert it to DataFrame
leak_predictions_df = pd.DataFrame(leak_predictions, index=Leaks_ytest.index.copy())
leak_predictions_df

# plot setup
leak_ax = plt.gca()
leak_ax.set_title(label='Predictions and real values')
leak_ax.set_xlabel(xlabel='Time [s]')
leak_ax.set_ylabel(ylabel='Pressure [m]')
leak_red_patch = mpatches.Patch(color='red', label='Predictions')
leak_blue_patch = mpatches.Patch(color='blue', label='Real values')
leak_ax.legend(handles=[leak_red_patch, leak_blue_patch])

# plot labels
leak_ax.scatter(Leaks_ytest.index, Leaks_ytest, color='blue', marker='x')

# plot predictions
leak_ax.scatter(leak_predictions_df.index, leak_predictions_df, color='red', marker='.')

You can clearly see the time period when the leak happend, the diffrence between real value and prediction is about 10 m

# Read a water network model, simulate with leaks (leaks on nearby nodes)

Read water network model

In [None]:
inp_file = 'Walkerton_v1.inp'
wn_leaks2 = wntr.network.WaterNetworkModel(inp_file)

Add a leak on main node

In [None]:
nearby_node = 'J125'                          # from nearby nodes list
leak_node2 = wn_leaks2.get_node(nearby_node)
leak_node2.add_leak(wn_leaks2, area=0.01, start_time=50*3600, end_time=90*3600)

Run the simulation

In [None]:
sim_leaks2 = wntr.sim.WNTRSimulator(wn_leaks2)
results_leaks2 = sim_leaks2.run_sim()
results_leaks2.node['pressure']

Get pressures in closest nodes

In [None]:
closest_nodes_pressure_leak2 = []
for node in closest_nodes:
    closest_nodes_pressure_leak2.append(results_leaks2.node['pressure'].loc[:, node])

# pressures for close nodes
Leaks_xtest_prenorm2 = pd.concat(closest_nodes_pressure_leak2, axis=1)

# pressures for main node (the one with the leak)
Leaks_ytest2 = results_leaks2.node['pressure'].loc[:, main_node]

# normalize x data just like in training set
Leaks_xtest2 = (Leaks_xtest_prenorm2 - norm_mean) / norm_std

Leaks_xtest2

# Review model in case of nearby node leak 

Only the time period with a leak will really matter here

In [None]:
leak_regression_y_pred2 = regression.predict(Leaks_xtest)

print(f'Mean squared error: {mean_squared_error(Leaks_ytest2, leak_regression_y_pred2)}\n')
print(f'Coefficient of determination R^2: {r2_score(Leaks_ytest2, leak_regression_y_pred2)}\n')

In [None]:
# get predictions on leak data
leak_regression_y_pred2 = regression.predict(Leaks_xtest2)
leak_regression_y_pred2

# reshape to 1-dimensional numpy array
leak_predictions2 = leak_regression_y_pred2.flatten()

# convert it to DataFrame
leak_predictions_df2 = pd.DataFrame(leak_predictions2, index=Leaks_ytest2.index.copy())
leak_predictions_df2

# plot setup
leak_ax2 = plt.gca()
leak_ax2.set_title(label='Predictions and real values')
leak_ax2.set_xlabel(xlabel='Time [s]')
leak_ax2.set_ylabel(ylabel='Pressure [m]')
leak_red_patch2 = mpatches.Patch(color='red', label='Predictions')
leak_blue_patch2 = mpatches.Patch(color='blue', label='Real values')
leak_ax2.legend(handles=[leak_red_patch2, leak_blue_patch2])

# plot labels
leak_ax2.scatter(Leaks_ytest2.index, Leaks_ytest2, color='blue', marker='x')

# plot predictions
leak_ax2.scatter(leak_predictions_df2.index, leak_predictions_df2, color='red', marker='.')

The diffrence is smaller, but still very noticable