In [8]:
import numpy as np
import pandas as pd
import os
import copy
import seaborn as sbn
import matplotlib.pylab as plt

from sklearn.preprocessing import MinMaxScaler, StandardScaler,PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from itertools import permutations, combinations

## Load the Leak datafile

In [9]:
# Getting path for the 'parent folder'
path_cwd = os.getcwd()
path_parent = os.path.abspath(os.path.join(path_cwd, os.pardir))

# Getting path for the data files
datafiles_folder_name = 'Data_files'
file14 = 'data_leak_in_14.csv'
file24 = 'data_leak_in_24.csv'
file31 = 'data_leak_in_31.csv'

path_file14 = os.path.join(path_parent,datafiles_folder_name,file14)
path_file24 = os.path.join(path_parent,datafiles_folder_name,file24)
path_file31 = os.path.join(path_parent,datafiles_folder_name,file31)

In [10]:
leak_file_14 = pd.read_csv(path_file14)
leak_file_24 = pd.read_csv(path_file24)
leak_file_31 = pd.read_csv(path_file31)

In [11]:
# Check leak sizes
leak_file_14.leak_area.unique()

array([0.0001, 0.0005, 0.001 , 0.002 , 0.003 , 0.004 , 0.005 ])

## The code below takes a leakfile and a leak size and for each link or node, calculates the mean value and std deviation of parameter with and without leak. These values are stored in a dataframe 

In [12]:
def leakimpact(leakfile,leaksize):
    
    leakfile_onesize = leakfile.loc[leakfile.leak_area==leaksize]

    # flowrates
    numlink = []
    flowmean_normal = []
    flowmean_leak = []
    flowstd_normal = []
    flowstd_leak = []
    for i in range(1,num_links+1):
        col_normal = 'Link_flow'+str(i)
        col_leak = 'leak_flow_'+str(i)
        flow_normal = leakfile_onesize[col_normal]*1000
        flow_leak = leakfile_onesize[col_leak]*1000
        numlink.append(i)
        flowmean_normal.append(np.mean(flow_normal))
        flowstd_normal.append(np.std(flow_normal))
        flowmean_leak.append(np.mean(flow_leak))
        flowstd_leak.append(np.std(flow_leak))

    df_flow = pd.DataFrame()
    df_flow['Link_no']=numlink
    df_flow['Mean_flow_normal']=flowmean_normal
    df_flow['StdDev_flow_normal']=flowstd_normal
    df_flow['Mean_flow_leak']=flowmean_leak
    df_flow['StdDev_flow_leak']=flowstd_leak 
    
    # heads
    numhead = []
    headmean_normal = []
    headmean_leak = []
    headstd_normal = []
    headstd_leak = []
    for i in range(2,num_nodes+1):
        col_normal = 'Node_head'+str(i)
        col_leak = 'leak_head_'+str(i)
        head_normal = leakfile_onesize[col_normal]
        head_leak = leakfile_onesize[col_leak]
        numhead.append(i)
        headmean_normal.append(np.mean(head_normal))
        headstd_normal.append(np.std(head_normal))
        headmean_leak.append(np.mean(head_leak))
        headstd_leak.append(np.std(head_leak))

    df_head = pd.DataFrame()
    df_head['Node_no']=numhead
    df_head['Mean_head_normal']=headmean_normal
    df_head['StdDev_head_normal']=headstd_normal
    df_head['Mean_head_leak']=headmean_leak
    df_head['StdDev_head_leak']=headstd_leak
    
    return df_head, df_flow

In [13]:
# Check for leak size 0.002
df_head_14, df_flow_14=leakimpact(leak_file_14,0.002)
df_head_24,df_flow_24=leakimpact(leak_file_24,0.002)
df_head_31,df_flow_31=leakimpact(leak_file_31,0.002)

In [14]:
df_flow_14

Unnamed: 0,Link_no,Mean_flow_normal,StdDev_flow_normal,Mean_flow_leak,StdDev_flow_leak
0,1,6225.348594,697.276419,6289.532566,698.672062
1,2,5965.039431,673.830602,6029.22338,675.387282
2,3,2409.998405,281.016644,2444.257441,281.645069
3,4,2352.870824,266.640071,2387.129854,267.218864
4,5,2132.789074,240.924031,2167.048088,241.533862
5,6,1833.402285,211.56059,1867.661307,212.029695
6,7,1430.633479,195.001739,1464.892505,195.223113
7,8,1256.785724,168.811988,1291.044747,168.954102
8,9,1094.355028,138.82851,1128.614061,138.664033
9,10,627.546907,78.891873,627.546905,78.891871


In [15]:
df_head_14

Unnamed: 0,Node_no,Mean_head_normal,StdDev_head_normal,Mean_head_leak,StdDev_head_leak
0,2,216.39929,60.015035,216.331064,59.992821
1,3,171.674568,57.557807,170.718034,57.250562
2,4,166.105249,57.387959,165.002547,57.037451
3,5,159.302167,57.222608,158.016479,56.818478
4,6,152.15131,57.08044,150.653277,56.614789
5,7,150.473533,57.058456,148.917522,56.576232
6,8,148.463444,57.038528,146.81868,56.530936
7,9,146.882818,57.032606,145.158485,56.50249
8,10,145.733276,57.036947,143.94233,56.487903
9,11,143.754672,57.093597,141.963693,56.5456


## For better interpretetion of the 'relative' impact of a leak on flow and pressure across network, the 'change' in mean values i.e. mean_without_leak minus mean_with_leak is scaled between 0 and 1. This helps in making the observation that different leaks impact different parts of the network differently. In later experiments, this differential impact is forms the basis for identifying the leak location

In [16]:
# percent change in mean
df_leakimpact_mean_change_head = pd.DataFrame()
df_leakimpact_mean_change_head['Leak14_head']=(df_head_14.Mean_head_normal-df_head_14.Mean_head_leak)
df_leakimpact_mean_change_head['Leak24_head']=(df_head_24.Mean_head_normal-df_head_24.Mean_head_leak)
df_leakimpact_mean_change_head['Leak31_head']=(df_head_31.Mean_head_normal-df_head_31.Mean_head_leak)
df_leakimpact_mean_change_head = df_leakimpact_mean_change_head.abs()
scaler_head = MinMaxScaler()
df_leakimpact_mean_change_head = pd.DataFrame(scaler_head.fit_transform(df_leakimpact_mean_change_head.values))
df_leakimpact_mean_change_head.columns=['Leak14head','Leak24head','Leak31head']

df_leakimpact_mean_change_flow = pd.DataFrame()
df_leakimpact_mean_change_flow['Leak14_flow']=(df_flow_14.Mean_flow_normal-df_flow_14.Mean_flow_leak)
df_leakimpact_mean_change_flow['Leak24_flow']=(df_flow_24.Mean_flow_normal-df_flow_24.Mean_flow_leak)
df_leakimpact_mean_change_flow['Leak31_flow']=(df_flow_31.Mean_flow_normal-df_flow_31.Mean_flow_leak)
df_leakimpact_mean_change_flow = df_leakimpact_mean_change_flow.abs()
scaler_flow = MinMaxScaler()
df_leakimpact_mean_change_flow = pd.DataFrame(scaler_flow.fit_transform(df_leakimpact_mean_change_flow.values))
df_leakimpact_mean_change_flow.columns=['Leak14flow','Leak24flow','Leak31flow']


In [28]:
df_leakimpact_mean_change_flow

Unnamed: 0,Leak14flow,Leak24flow,Leak31flow
0,1.0,1.0,1.0
1,1.0,1.0,1.0
2,0.538,0.078,0.102
3,0.538,0.078,0.102
4,0.538,0.078,0.102
5,0.538,0.078,0.102
6,0.538,0.078,0.102
7,0.538,0.078,0.102
8,0.538,0.078,0.102
9,0.0,0.0,0.0


In [29]:
df_leakimpact_mean_change_head

Unnamed: 0,Leak14head,Leak24head,Leak31head
0,0.0,0.0,0.0
1,0.233,0.39,0.147
2,0.272,0.4,0.152
3,0.321,0.411,0.158
4,0.377,0.425,0.165
5,0.392,0.428,0.166
6,0.415,0.434,0.169
7,0.436,0.439,0.172
8,0.454,0.443,0.174
9,0.454,0.443,0.174
