# Extract Data
We want to extract the 'state' of the network at each time step. The state of the network is defined by the following variables:
- pressure_value
- base_demand
- demand_value

In [1]:
import pandas as pd

In [6]:
path_rand_demand = "dati/networks/dataset-network-out/NET_0_bd_0_005_h_720_leak_area_0_nodes.csv"
data = pd.read_csv(path_rand_demand, delimiter=";")
columns = list(data.columns)
print("COLUMNS in the dataset: ", columns)
print(f"{len(data['nodeID'].unique())} NODES in the dataset: {data['nodeID'].unique()}")
data.head()

COLUMNS in the dataset:  ['hour', 'nodeID', 'base_demand', 'demand_value', 'head_value', 'pressure_value', 'x_pos', 'y_pos', 'node_type', 'has_leak', 'leak_area_value', 'leak_discharge_value', 'leak_demand_value']
13 nodes in the dataset: [880000 880001 880002 880003 880004 880005 880006 880007 880008 880009
 880010 880011 880012]


Unnamed: 0,hour,nodeID,base_demand,demand_value,head_value,pressure_value,x_pos,y_pos,node_type,has_leak,leak_area_value,leak_discharge_value,leak_demand_value
0,0:00:00,880000,0.004848,0.000867,129.078246,4.078242,4308452.95,3781906.0,Junction,False,0.0,0.0,0.0
1,0:00:00,880001,0.004199,0.001251,129.076928,5.076912,4307717.23,3781182.0,Junction,False,0.0,0.0,0.0
2,0:00:00,880002,0.00225,-0.0,129.078357,3.078366,4308600.43,3781645.0,Junction,False,0.0,0.0,0.0
3,0:00:00,880003,0.004689,-0.0,129.077265,1.077267,4307885.2,3780861.0,Junction,False,0.0,0.0,0.0
4,0:00:00,880004,0.003701,-0.0,129.077772,1.077774,4307947.41,3780894.0,Junction,False,0.0,0.0,0.0


In [3]:
data['hour'].map(lambda x: x.split(":")[0]).unique()

array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
       '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34',
       '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45',
       '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56',
       '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67',
       '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78',
       '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89',
       '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100',
       '101', '102', '103', '104', '105', '106', '107', '108', '109',
       '110', '111', '112', '113', '114', '115', '116', '117', '118',
       '119', '120', '121', '122', '123', '124', '125', '126', '127',
       '128', '129', '130', '131', '132', '133', '134', '135', '136',
       '137', '138', '139', '140', '141', '142', '143', '144

In [4]:
df = data[['hour', 'nodeID', 'pressure_value', 'base_demand', 'demand_value']]
df.columns = ['hour', 'nodeID', 'p', 'b', 'd']
df = df.pivot_table(index='hour', columns='nodeID', values=['p', 'b', 'd'])
df.columns = ['_'.join((col[0], str(col[1]))) for col in df.columns]
df.index = df.index.map(lambda x: int(x.split(":")[0]))
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,b_880000,b_880001,b_880002,b_880003,b_880004,b_880005,b_880006,b_880007,b_880008,b_880009,...,p_880003,p_880004,p_880005,p_880006,p_880007,p_880008,p_880009,p_880010,p_880011,p_880012
hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.004848,0.004199,0.00225,0.004689,0.003701,0.00204,0.001341,0.002578,0.004318,0.00476,...,1.077267,1.077774,3.079432,5.07397,2.089346,2.076619,2.084041,-3.080648,4.092576,0.0
1,0.001145,0.001529,0.000318,5e-06,0.004973,0.004885,0.000945,0.000108,0.003758,0.002758,...,2.506159,2.506302,4.506773,6.50319,3.518131,3.505721,3.512335,-3.080464,5.521658,0.0
2,0.004169,0.003663,0.003149,0.002765,0.000915,0.003125,0.002784,0.000433,0.002716,0.001469,...,1.159793,1.16047,3.16268,5.147269,2.174552,2.15795,2.16823,-3.080634,4.178399,0.0
3,0.001266,0.004262,0.000741,0.001314,0.004219,0.004595,0.004942,0.001024,0.004139,0.004099,...,0.955383,0.956376,2.959617,4.924084,1.972065,1.951049,1.965521,-3.080668,3.976046,0.0
4,0.001318,0.00174,0.000669,0.004558,0.002984,0.003776,0.001387,0.002647,0.004804,0.001025,...,1.756036,1.756243,3.756923,5.751432,2.763513,2.755369,2.760093,-3.08055,4.765597,0.0


In [25]:
df.to_numpy().reshape(-1, 3, 13).swapaxes(1, 2)[0]

array([[ 4.84838000e-03,  8.67030000e-04,  4.07824187e+00],
       [ 4.19857000e-03,  1.25103000e-03,  5.07691223e+00],
       [ 2.24972000e-03,  0.00000000e+00,  3.07836562e+00],
       [ 4.68926000e-03,  0.00000000e+00,  1.07726671e+00],
       [ 3.70115000e-03,  0.00000000e+00,  1.07777389e+00],
       [ 2.04014000e-03,  0.00000000e+00,  3.07943181e+00],
       [ 1.34067000e-03,  3.99100000e-04,  5.07397010e+00],
       [ 2.57832000e-03,  0.00000000e+00,  2.08934647e+00],
       [ 4.31797000e-03,  0.00000000e+00,  2.07661934e+00],
       [ 4.75999000e-03,  0.00000000e+00,  2.08404060e+00],
       [ 3.42589000e-03,  0.00000000e+00, -3.08064820e+00],
       [ 3.65509000e-03,  6.61920000e-04,  4.09257567e+00],
       [ 0.00000000e+00,  3.17908000e-03,  0.00000000e+00]])