## Imports

In [16]:
import geojson as gj
import matplotlib.pyplot as plt
import folium as fl
import geopandas as gpd
import json
import pandas as pd
import networkx as nx
import pickle
import haversine as hs

In [20]:
import json

try:
    borderpoints_gdf['param'] = borderpoints_gdf['param'].apply(lambda x: json.loads(x.replace("'", '"')))
    param_df = pd.json_normalize(borderpoints_gdf['param'])
    borderpoints_gdf = borderpoints_gdf.drop(columns=['param']).join(param_df)

    borderpoints_gdf['uncertainty'] = borderpoints_gdf['uncertainty'].apply(lambda x: json.loads(x.replace("'", '"')))
    uncertainty_df = pd.json_normalize(borderpoints_gdf['uncertainty'])
    uncertainty_df.columns = ['uncertainty_' + col for col in uncertainty_df.columns]
    borderpoints_gdf = borderpoints_gdf.drop(columns=['uncertainty']).join(uncertainty_df)

    borderpoints_gdf['method'] = borderpoints_gdf['method'].apply(lambda x: json.loads(x.replace("'", '"')))
    method_df = pd.json_normalize(borderpoints_gdf['method'])
    method_df.columns = ['method_' + col for col in method_df.columns]
    borderpoints_gdf = borderpoints_gdf.drop(columns=['method']).join(method_df)

except json.JSONDecodeError:
    pass

borderpoints_gdf.head()

Unnamed: 0,id,name,source_id,node_id,lat,long,country_code,comment,param,uncertainty,method,tags
0,INET_BP_0,Almeria,['INET_BP_0'],['INET_N_38'],36.7348,-2.3036,ES,,"{'end_year': 2050, 'pipe_name': None, 'start_y...","{'end_year': 20, 'pipe_name': None, 'start_yea...","{'end_year': 'make_Attrib(const)', 'pipe_name'...",{}
1,INET_BP_1,Alveringem,['INET_BP_1'],['INET_N_1146'],50.926317,2.315596,BE,,"{'end_year': 2050, 'pipe_name': 'Alveringem-Ma...","{'end_year': 20, 'pipe_name': 0, 'start_year':...","{'end_year': 'make_Attrib(const)', 'pipe_name'...",{}
2,INET_BP_2,Arnoldstein,['INET_BP_2'],['INET_N_76'],46.54999,13.705762,AT,,"{'end_year': 2050, 'pipe_name': 'medgaz', 'sta...","{'end_year': 20, 'pipe_name': 0, 'start_year':...","{'end_year': 'make_Attrib(const)', 'pipe_name'...",{}
3,INET_BP_3,Backi Breg,['INET_BP_3'],['INET_N_1387'],45.78,19.12,RS,,"{'end_year': 2050, 'pipe_name': None, 'start_y...","{'end_year': 20, 'pipe_name': None, 'start_yea...","{'end_year': 'make_Attrib(const)', 'pipe_name'...",{}
4,INET_BP_4,Badajoz,['INET_BP_4'],['INET_N_108'],38.87665,-6.9714,ES,,"{'end_year': 2050, 'pipe_name': None, 'start_y...","{'end_year': 20, 'pipe_name': None, 'start_yea...","{'end_year': 'make_Attrib(const)', 'pipe_name'...",{}


## Nodes

In [4]:
# Load the file "/Users/johanmasvie/programming/tiø4900/master_thesis/Scigrid_data/IGGIN_Nodes.geojson" into a GeoDataFrame
nodes_gdf = gpd.read_file('/Users/johanmasvie/programming/tiø4900/master_thesis/Scigrid_data/IGGIN_Nodes.geojson')

nodes_gdf.head()

Unnamed: 0,name,id,country_code,tags,param,method,geometry
0,N_0,NO_N_0,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (6.79720 65.10640)
1,N_1,NO_N_1,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (5.19357 59.33286)
2,N_2,NO_N_2,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (2.89575 60.56023)
3,N_3,NO_N_3,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (2.89614 60.78219)
4,N_4,NO_N_4,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (3.29837 54.78877)


## Pipelines

In [5]:
# Open the file: "/Users/johanmasvie/programming/tiø4900/master_thesis/Scigrid_data/IGGIN_PipeSegments.csv" as a GeoDataFrame: gdf
pipelines_gdf = gpd.read_file('/Users/johanmasvie/programming/tiø4900/master_thesis/Scigrid_data/IGGIN_PipeSegments.csv')

# Split the 'param' column into separate columns
pipelines_gdf['param'] = pipelines_gdf['param'].apply(lambda x: json.loads(x.replace("'", '"')))
param_df = pd.json_normalize(pipelines_gdf['param'])
pipelines_gdf = pipelines_gdf.drop(columns=['param']).join(param_df)

# Split the 'uncertainty' column into separate columns and add them to the GeoDataFrame, prefaced by the string 'uncertainty_'
pipelines_gdf['uncertainty'] = pipelines_gdf['uncertainty'].apply(lambda x: json.loads(x.replace("'", '"')))
uncertainty_df = pd.json_normalize(pipelines_gdf['uncertainty'])
uncertainty_df.columns = ['uncertainty_' + col for col in uncertainty_df.columns]
pipelines_gdf = pipelines_gdf.drop(columns=['uncertainty']).join(uncertainty_df)

# Split the 'method' column into separate columns and add them to the GeoDataFrame, prefaced by the string 'method_'
pipelines_gdf['method'] = pipelines_gdf['method'].apply(lambda x: json.loads(x.replace("'", '"')))
method_df = pd.json_normalize(pipelines_gdf['method'])
method_df.columns = ['method_' + col for col in method_df.columns]
pipelines_gdf = pipelines_gdf.drop(columns=['method']).join(method_df)

# Split the "country_code" column into two separate columns. "['ES', 'ES']" is an example of a value in the "country_code" column
pipelines_gdf['country_code'] = pipelines_gdf['country_code'].apply(lambda x: x.replace('[', '').replace(']', '').replace("'", '').split(', '))
country_code_df = pd.DataFrame(pipelines_gdf['country_code'].to_list(), columns=['country_code_1', 'country_code_2'])
pipelines_gdf = pipelines_gdf.drop(columns=['country_code']).join(country_code_df)

# Split the node_id column into two separate columns. "['INET_N_23', 'INET_N_295']" is an example of a value in the "node_id" column
pipelines_gdf['node_id'] = pipelines_gdf['node_id'].apply(lambda x: x.replace('[', '').replace(']', '').replace("'", '').split(', '))
node_id_df = pd.DataFrame(pipelines_gdf['node_id'].to_list(), columns=['node_id_1', 'node_id_2'])
pipelines_gdf = pipelines_gdf.drop(columns=['node_id']).join(node_id_df)

# Print the first few rows of the GeoDataFrame
pipelines_gdf.head()

Unnamed: 0,id,name,source_id,lat,long,comment,tags,geometry,diameter_mm,end_year,...,method_long_mean,method_max_cap_M_m3_per_d,method_max_pressure_bar,method_num_compressor,method_start_year,method_waterDepth_m,country_code_1,country_code_2,node_id_1,node_id_2
0,INET_PL_1,AlcazarDeSanJuan_Chinchilla,['INET_PL_1'],"[39.3900748, 38.892435]","[-3.3500984, -1.719831]",,{},,923.792529,2050,...,make_Attrib(latlong_mean),Lasso,Median,Median,make_Attrib(const),Lasso,ES,ES,INET_N_23,INET_N_295
1,INET_PL_3,Tarancon_Madrid,['INET_PL_3'],"[40.01, 40.223]","[-3.207, -3.681]",,{},,923.792529,2050,...,make_Attrib(latlong_mean),Lasso,Median,Median,make_Attrib(const),Lasso,ES,ES,INET_N_1460,INET_N_904
2,INET_PL_5,Alessandria_Cortemaggiore,['INET_PL_5'],"[44.991076, 44.90008]","[9.920891, 8.60944]",,{},,914.4,2050,...,make_Attrib(latlong_mean),Lasso,raw,Median,make_Attrib(const),Lasso,IT,IT,INET_N_317,INET_N_26
3,INET_PL_7,Almendralejo_Badajoz,['INET_PL_7'],"[38.711202, 38.87665]","[-6.382539, -6.9714]",,{},,923.792529,2050,...,make_Attrib(latlong_mean),Lasso,Median,Median,make_Attrib(const),Lasso,ES,ES,INET_N_36,INET_N_108
4,INET_PL_9,Almendralejo_Zamora,['INET_PL_9'],"[38.711202, 41.506]","[-6.382539, -5.644]",,{},,923.792529,2050,...,make_Attrib(latlong_mean),Lasso,Median,Median,make_Attrib(const),Lasso,ES,ES,INET_N_36,INET_N_1653


## Creating a NetworkX graph

In [6]:
# Create a graph of the pipeline network
G = nx.Graph()

# Add the nodes to the graph
for i, row in nodes_gdf.iterrows():
    G.add_node(row['id'], pos=(row['geometry'].x, row['geometry'].y))

# Add the edges to the graph
for i, row in pipelines_gdf.iterrows():
    G.add_edge(row['node_id_1'], row['node_id_2'])

# Save the graph object to a file
with open('pipeline_network.pkl', 'wb') as f:
    pickle.dump(G, f)

## Connected components

In [7]:
# Add a column to the nodes_gdf GeoDataFrame that contains a number symbolizing the connected component the node is a part of
nodes_gdf['connected_component'] = 0
for i, component in enumerate(nx.connected_components(G)):
    for node in component:
        nodes_gdf.loc[nodes_gdf['id'] == node, 'connected_component'] = i

## Plotting the network

In [8]:
# Plot the nodes and pipeline segments in a folium map
m = fl.Map(location=[nodes_gdf['geometry'].y.mean(), nodes_gdf['geometry'].x.mean()], zoom_start=5)

# Create a list of colors for each connected component
colors = ['red', 'darkblue', 'darkgreen', 'gray', 'white', 'beige']

# Create a feature group for the node markers
node_markers = fl.FeatureGroup(name='Node Markers')

for i, row in nodes_gdf.iterrows():
    # Get the connected component of the node
    component = nodes_gdf.loc[nodes_gdf['id'] == row['id'], 'connected_component'].values[0]
    # Assign a color based on the connected component
    color = colors[component % len(colors)]
    fl.Marker([row['geometry'].y, row['geometry'].x], popup=row['id'], icon=fl.Icon(color=color)).add_to(node_markers)

# Add the node markers feature group to the map
node_markers.add_to(m)

# Create a feature group for the pipeline segments
pipeline_segments = fl.FeatureGroup(name='Pipeline Segments')

for i, row in pipelines_gdf.iterrows():
    # Get the connected component of the pipeline segment
    component = nodes_gdf.loc[nodes_gdf['id'] == row['node_id_1'], 'connected_component'].values[0]
    # Assign a color based on the connected component
    color = colors[component % len(colors)]
    fl.PolyLine([(nodes_gdf[nodes_gdf['id'] == row['node_id_1']]['geometry'].y.values[0], nodes_gdf[nodes_gdf['id'] == row['node_id_1']]['geometry'].x.values[0]),
                 (nodes_gdf[nodes_gdf['id'] == row['node_id_2']]['geometry'].y.values[0], nodes_gdf[nodes_gdf['id'] == row['node_id_2']]['geometry'].x.values[0])],
                color=color).add_to(pipeline_segments)

# Add the pipeline segments feature group to the map
pipeline_segments.add_to(m)

# Add the layer control to the map
fl.LayerControl().add_to(m)

m.save('pipeline_network.html')

KeyboardInterrupt: 

## Identifying duplicate nodes

### Dunkerque: INET_N_407 and NO_N_33

![Image Description](Screenshots/1.png)

### Zeebrugge: INET_N_1656 and NO_N_5
![Image Description](Screenshots/2.png)

### Dornmum: INET_N_379, NO_N_27 and Storages_0
![Image Description](Screenshots/3.png)

### Emden: INET_N_436 and NO_N_19
![Image Description](Screenshots/4.png)

### North Sea: NO_N_17 and NO_N_32
![Image Description](Screenshots/5.png)

### North Sea: NO_N_48 and NO_N_16
![image.png](Screenshots/6.png)

### North Sea: NO_N_15 and NO_N_22
![image.png](Screenshots/7.png)

### Easington: INET_N_410 and NO_N_39
![image.png](Screenshots/8.png)

### St. Fergus: INET_N_1401 and NO_N_24
![image.png](Screenshots/9.png)

## Combining nodes

In [None]:
# Drop the connected_component column from the nodes_gdf GeoDataFrame
nodes_gdf = nodes_gdf.drop(columns=['connected_component'])

nodes_gdf.head()

Unnamed: 0,name,id,country_code,tags,param,method,geometry
0,N_0,NO_N_0,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (6.79720 65.10640)
1,N_1,NO_N_1,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (5.19357 59.33286)
2,N_2,NO_N_2,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (2.89575 60.56023)
3,N_3,NO_N_3,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (2.89614 60.78219)
4,N_4,NO_N_4,NO,{},"{'eic_code': None, 'elevation_m': None, 'exact...","{'eic_code': None, 'elevation_m': None, 'exact...",POINT (3.29837 54.78877)


In [None]:
def combine_nodes(node_ids):

    # Get the rows corresponding to the node ids
    rows = nodes_gdf[nodes_gdf['id'].isin(node_ids)]

    # Print the columns and their type
    
    
    # # Create a new combined node
    # combined_node = {
    #     'id': '-'.join(node_ids),
    #     'geometry': rows['geometry'].unary_union,
    #     # Add other columns as needed
    # }
    
    # # Drop the rows corresponding to the individual nodes
    # nodes_gdf.drop(nodes_gdf[nodes_gdf['id'].isin(node_ids)].index, inplace=True)
    
    # # Append the combined node to the dataframe
    # nodes_gdf = nodes_gdf.append(combined_node, ignore_index=True)
    
    # return nodes_gdf

test = ['INET_N_407', 'NO_N_33']

combine_nodes(test)

          name          id country_code tags  \
32        N_53     NO_N_33           FR   {}   
262  Dunkerque  INET_N_407           FR   {}   

                                                 param  \
32   {'eic_code': None, 'elevation_m': None, 'exact...   
262  {'eic_code': None, 'elevation_m': 1, 'exact': ...   

                                                method  \
32   {'eic_code': None, 'elevation_m': None, 'exact...   
262  {'eic_code': None, 'elevation_m': 'bing API', ...   

                     geometry  
32   POINT (2.25136 51.04555)  
262  POINT (2.19479 51.03313)  
