In [22]:
import yaml
import epynet
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import numpy as np
import networkx as nx
from lsdlm import utils, lsdlm
import time
import argparse
from scipy import sparse
from matplotlib.colors import LogNorm
import seaborn as sns
from sklearn.metrics import mean_absolute_error

In [23]:
df = pd.read_csv('C:/Users/aznur/PycharmProjects/research_project/proposed_method/data/2018_predictions.csv', index_col='Unnamed: 0')
print(df)

               0         1         2         3         4         5         6  \
0       0.082900  0.063666  0.074538  0.180708  0.237690  0.126197  0.032904   
1       0.082918  0.063741  0.074854  0.181041  0.238176  0.126304  0.032966   
2       0.083072  0.063827  0.074768  0.181019  0.238423  0.126383  0.033033   
3       0.083204  0.063935  0.075009  0.181165  0.238817  0.126321  0.033028   
4       0.083304  0.064024  0.074947  0.181381  0.238947  0.126869  0.033135   
...          ...       ...       ...       ...       ...       ...       ...   
105112  0.078915  0.060505  0.071069  0.177497  0.233391  0.123255  0.030928   
105113  0.078920  0.060517  0.071173  0.177713  0.233517  0.123550  0.030937   
105114  0.079128  0.060729  0.071360  0.177768  0.233923  0.123727  0.031089   
105115  0.079252  0.060776  0.071537  0.178021  0.234118  0.123776  0.031146   
105116  0.079347  0.060940  0.071508  0.178132  0.234448  0.123987  0.031178   

               7         8         9  .

In [24]:
df.columns = ['n{}'.format(int(node)+1) for node in df.columns]
df.index = pd.date_range(start='2018-01-01 00:00:00', periods=len(df), freq = '5min')

In [25]:
print('Importing dataset configuration...\n')

# Open the dataset configuration file
with open('C:/Users/aznur/PycharmProjects/research_project/proposed_method/data/dataset_configuration.yaml') as file:

    # Load the configuration to a dictionary
    config = yaml.load(file, Loader=yaml.FullLoader)

# Generate a list of integers, indicating the number of the node
# at which a  pressure sensor is present
sensors = [int(string.replace("n", "")) for string in config['pressure_sensors']]

Importing dataset configuration...



In [26]:
print(sensors)

[1, 4, 31, 54, 105, 114, 163, 188, 215, 229, 288, 296, 332, 342, 410, 415, 429, 458, 469, 495, 506, 516, 519, 549, 613, 636, 644, 679, 722, 726, 740, 752, 769]


In [27]:
new_list = []
for i in sensors:
    new_list.append('n{}'.format(i))
print(new_list)

['n1', 'n4', 'n31', 'n54', 'n105', 'n114', 'n163', 'n188', 'n215', 'n229', 'n288', 'n296', 'n332', 'n342', 'n410', 'n415', 'n429', 'n458', 'n469', 'n495', 'n506', 'n516', 'n519', 'n549', 'n613', 'n636', 'n644', 'n679', 'n722', 'n726', 'n740', 'n752', 'n769']


In [28]:
for i in df.columns:
    if i not in new_list:
        df.drop(i, 1, inplace=True)
        
print(df)

  df.drop(i, 1, inplace=True)


                           n1        n4       n31       n54      n105  \
2018-01-01 00:00:00  0.082900  0.180708  0.248038  0.255949  0.528629   
2018-01-01 00:05:00  0.082918  0.181041  0.248374  0.256698  0.528781   
2018-01-01 00:10:00  0.083072  0.181019  0.248525  0.256125  0.528480   
2018-01-01 00:15:00  0.083204  0.181165  0.248683  0.256359  0.528659   
2018-01-01 00:20:00  0.083304  0.181381  0.249035  0.256985  0.528864   
...                       ...       ...       ...       ...       ...   
2018-12-31 23:20:00  0.078915  0.177497  0.244821  0.248733  0.525919   
2018-12-31 23:25:00  0.078920  0.177713  0.244544  0.249554  0.526198   
2018-12-31 23:30:00  0.079128  0.177768  0.245110  0.249944  0.526361   
2018-12-31 23:35:00  0.079252  0.178021  0.245139  0.250753  0.526391   
2018-12-31 23:40:00  0.079347  0.178132  0.245500  0.250481  0.526459   

                         n114      n163      n188      n215      n229  ...  \
2018-01-01 00:00:00  0.593807  0.566884  0.62

In [29]:
df.describe().mean(axis=1)

count    105117.000000
mean          0.436744
std           0.008859
min           0.407310
25%           0.429963
50%           0.436012
75%           0.443707
max           0.454615
dtype: float64

In [30]:
df_2018 = pd.read_excel(f'data/2018_SCADA_data.xlsx', index_col=0, sheet_name='Pressures (m)')
df_2019 = pd.read_excel(f'data/2019_SCADA_data.xlsx', index_col=0, sheet_name='Pressures (m)')

In [31]:
df_2018_mine = pd.read_excel(f'2018_predictions_mine.xlsx', index_col=0, sheet_name='Sheet1')

In [32]:
df_2018.describe().mean(axis=1)

count    105120.000000
mean         46.236477
std           0.481325
min          44.442121
25%          45.873030
50%          46.201515
75%          46.613333
max          47.196061
dtype: float64

In [33]:
df_2019.describe().mean(axis=1)

count    105120.000000
mean         45.774177
std           0.651158
min          43.965758
25%          45.230606
50%          45.779091
75%          46.297576
max          47.152121
dtype: float64

In [36]:
error = mean_absolute_error(df, df_2018[3:])
print(f'MAE: {error:.2f}\n')

MAE: 45.80



In [19]:
df_2018_mine.describe().mean(axis=1)

count    105114.000000
mean         42.640453
std           1.659716
min          34.882731
25%          41.744491
50%          42.636193
75%          43.860041
max          46.834732
dtype: float64

In [9]:
df.to_excel("2018_gardar.xlsx", sheet_name='predictions')