In [1]:
import numpy as np
import pandas as pd
import os
import copy
import seaborn as sbn
import matplotlib.pylab as plt

from sklearn.preprocessing import MinMaxScaler, StandardScaler,PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression

## Loading the datafiles for training and testing models

In [2]:
# Getting path for the 'parent folder'
path_cwd = os.getcwd()
path_parent = os.path.abspath(os.path.join(path_cwd, os.pardir))

# Getting path for the data files
datafiles_folder_name = 'Data_files'
trainfile = 'data_base_demand_train_1.csv'
testfile = 'data_base_demand_test.csv'
path_trainfile = os.path.join(path_parent,datafiles_folder_name,trainfile)
path_testfile = os.path.join(path_parent,datafiles_folder_name,testfile)

In [3]:
#Loading datafiles
operating_scenarios_df_train = pd.read_csv(path_trainfile)
operating_scenarios_df_test = pd.read_csv(path_testfile)

In [4]:
# Slight change in column names to maintain consistency that may be useful later
operating_scenarios_df_train = operating_scenarios_df_train.rename(columns={'Res_Head':'Node_head1'})
operating_scenarios_df_test = operating_scenarios_df_test.rename(columns={'Res_Head':'Node_head1'})

In [5]:
# Initializing a few parameters
num_demand = 32
num_head = 32
num_flow = 34

In [6]:
# Seperating out the column names in the datafiles among demand, head and flow value column headers
column_demand = operating_scenarios_df_train.columns[:num_demand]
column_head = operating_scenarios_df_train.columns[num_demand:num_demand+num_head]
column_flow = operating_scenarios_df_train.columns[num_demand+num_head:]

## The model is a linear regression model, detailed below:
* Subnetowrk level prediction of nodal heads and flowrates
* Input: Only head and flowrate just outside of the network
* Output: One output at a time, either flow in a link or prssure head at a node in the network

In [7]:
# Function to extract data for model training and testing
# Pred type could be 'head' or 'flow'
# Pred num is the number of the node for 'head' type and number of the link for 'flow' type
# Input is the demand values across the subnetwork and and flow and head outside the subnetwork

def data_for_model(operating_scenarios_df,pred_type,pred_num,input_head,input_flow):
    
    headlist = ['Node_head'+str(i) for i in input_head]
    flowlist = ['Link_flow'+str(i) for i in input_flow]
    inputlist = headlist+flowlist
    
    data_in = np.array(operating_scenarios_df[inputlist])
    
    if pred_type == 'head':
        data_out = np.array(operating_scenarios_df['Node_head'+str(pred_num)]).reshape(-1,1)
    else:
        data_out = np.array(operating_scenarios_df['Link_flow'+str(pred_num)]).reshape(-1,1)
        
    data_model = np.hstack((data_in,data_out))
    
    return data_in, data_out

## Function that works as a linear regression model

In [8]:
# polynomial fit
def polymodel(n,X_train,y_train):
    
    poly = PolynomialFeatures(degree=n)
    X_train_poly = poly.fit_transform(X_train)    
    lin_model = LinearRegression()
    lin_model.fit(X_train_poly,y_train)    
    
    return lin_model

## Below code uses the 'polymodel' function iteratively to train a regression model and make predictions in each iteration. Iterations are made over all the links within the subnetwork. Flowrate in the link is learnt and predicted

In [9]:
flow_error_mean=[]
flow_error_std=[]
flow_error_r2=[]
Fytest = {}
Fypred = {}

in_flows = [23,26]
in_heads = [20,26]
out_flows = [24,25,29,30,31,32,33,34]
    
for i in out_flows:
    X_train,y_train = data_for_model(operating_scenarios_df_train,'flow',i,in_heads,in_flows)
    X_test,y_test = data_for_model(operating_scenarios_df_test,'flow',i,in_heads,in_flows)
    
    lin_model = polymodel(2,X_train,y_train)
    poly = PolynomialFeatures(degree=2)
    X_test_poly = poly.fit_transform(X_test)
    Fypred[i] = lin_model.predict(X_test_poly)
    Fytest[i] = y_test
    error = Fytest[i] - Fypred[i]
    flow_error_mean.append(np.mean(error))
    flow_error_std.append(np.std(error))
    flow_error_r2.append(r2_score(Fytest[i],Fypred[i]))

In [10]:
PolyReg_flows = pd.DataFrame(columns=['link','mean','Std','R2'])
PolyReg_flows['link']=out_flows
PolyReg_flows['mean']=flow_error_mean
PolyReg_flows['Std']=flow_error_std
PolyReg_flows['R2']=flow_error_r2

In [11]:
PolyReg_flows

Unnamed: 0,link,mean,Std,R2
0,24,9.5e-05,0.014328,0.993217
1,25,0.002776,0.021757,0.97735
2,29,-0.003721,0.032936,0.736956
3,30,0.007637,0.020917,0.584218
4,31,-9.5e-05,0.026086,-0.03971
5,32,0.002357,0.037232,0.119113
6,33,0.005633,0.048519,0.330999
7,34,0.011252,0.035344,0.805189


## Below code uses the 'polymodel' function iteratively to train a regression model and make predictions in each iteration. Iterations are made over all the nodes and pressure head is learnt and predicted

In [15]:
head_error_mean=[]
head_error_std=[]
head_error_r2=[]
Hytest={}
Hypred={}

in_demands = [23,24,25,28,29,30,31,32]
in_flows = [23,26]
in_heads = [20,26]
out_heads = [23,24,25,28,29,30,31,32] 


for i in out_heads:
    X_train,y_train = data_for_model(operating_scenarios_df_train,'head',i,in_heads,in_flows)
    X_test,y_test = data_for_model(operating_scenarios_df_test,'head',i,in_heads,in_flows)
    
    lin_model = polymodel(2,X_train,y_train)
    poly = PolynomialFeatures(degree=2)
    X_test_poly = poly.fit_transform(X_test)
    Hypred[i] = lin_model.predict(X_test_poly)
    Hytest[i] = y_test
    error = Hytest[i] - Hypred[i]
    head_error_mean.append(np.mean(error))
    head_error_std.append(np.std(error))
    head_error_r2.append(r2_score(Hytest[i],Hypred[i]))

In [16]:
PolyReg_heads = pd.DataFrame(columns=['mean','Std','R2'])
PolyReg_heads['mean']=head_error_mean
PolyReg_heads['Std']=head_error_std
PolyReg_heads['R2']=head_error_r2

In [17]:
PolyReg_heads

Unnamed: 0,mean,Std,R2
0,0.000362,0.0017,1.0
1,0.012227,0.182381,0.999991
2,-0.000453,0.001446,1.0
3,0.32464,3.10492,0.997381
4,-0.306108,3.810728,0.996088
5,-0.481903,2.998411,0.997523
6,-0.5184,2.563962,0.998159
7,-0.296795,0.948324,0.999733
