This notebook is used to randomly select a certain number of buses (among those that have GPS coordinates) and save the corresponding variable names to a JSON file to be used in training a CNN for momentum estimation.

In [None]:
import os
import sys
import json
import pickle
import numpy as np
from numpy.random import RandomState, SeedSequence, MT19937
from sklearn.cluster import KMeans, SpectralClustering, DBSCAN
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import CRS

In [None]:
import networkx as nx
if '..' not in sys.path:
    sys.path.append('..')
from pfcommon import SiteNode, LineSitesEdge

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar
import seaborn as sns
fontsize = 9
lw = 0.75
matplotlib.rc('font', **{'family': 'Arial', 'size': fontsize})
matplotlib.rc('axes', **{'linewidth': 0.75, 'labelsize': fontsize})
matplotlib.rc('xtick', **{'labelsize': fontsize})
matplotlib.rc('ytick', **{'labelsize': fontsize})
matplotlib.rc('xtick.major', **{'width': lw, 'size':3})
matplotlib.rc('ytick.major', **{'width': lw, 'size':3})
matplotlib.rc('ytick.minor', **{'width': lw, 'size':1.5})

### The graph of the network

In [None]:
def build_graph(all_edges, vratings=None, graph_class=nx.MultiGraph, weight=None):
    G = graph_class()
    for e in edges:
        if e.node1 != e.node2 and (vratings is None or e.vrating in vratings):
#             kwargs = {'coords': e.coords, 'length': e.length,
#                       'vrating': e.vrating, 'label': e.name}
            kwargs = {'length': e.length, 'vrating': e.vrating, 'label': e.name}
            if weight is not None:
                if weight == 'length':
                    wgt = e.length
                elif weight == 'vrating':
                    wgt = 1/e.vrating
                else:
                    raise Exception(f"Unknown weight '{weight}'")
                kwargs['weight'] = wgt
            G.add_edge(e.node1.name, e.node2.name, **kwargs)
    return G

Load the data about nodes and edges and build a graph:

In [None]:
nodes,edges = pickle.load(open('../V2020_Rete_Sardegna_2021_06_03cr_GRAPH.pkl', 'rb'))

Basic information about the graph:

In [None]:
G = build_graph(edges, vratings=[400], weight='vrating', graph_class=nx.MultiGraph)
assert nx.is_connected(G)

print('No. of nodes: {}.'.format(len(G.nodes)))
print('No. of edges: {}.'.format(len(G.edges)))
if nx.is_connected(G):
    print('The graph is connected.')
else:
    print('The graph is not connected. No. of connected components: {}'.\
          format(nx.number_connected_components(G)))

#### Plots of the graph

Here we plot the graph including only a subset of edges, based on their voltage rating.

In [None]:
geo_pos = {k: np.array([v.lon,v.lat]) for k,v in nodes.items()}
pos = nx.spectral_layout(G)

In [None]:
vratings_cmap = {150: [0.6,0.6,0.6], 220: [.8,0,.8], 400: [0,0,0]}
vratings_wmap = {400: 3, 220: 1.5, 150: 0.5}
edge_colors,edge_widths = [],[]
for edge in G.edges:
    vrat = int(G.get_edge_data(*edge)['vrating'])
    edge_colors.append(vratings_cmap[vrat])
    edge_widths.append(vratings_wmap[vrat])

In [None]:
fig,ax = plt.subplots(1, 1, figsize=(6,6))
nx.draw_networkx_edges(G, geo_pos, edge_color=edge_colors, width=edge_widths, ax=ax)
nx.draw_networkx_nodes(G, geo_pos, node_size=10, node_color='tab:green', ax=ax)
ax.axis('equal')
ax.axis('off')
fig.tight_layout()

In [None]:
M = nx.normalized_laplacian_matrix(G, weight='length').toarray()
# M = nx.laplacian_matrix(G, weight='length').toarray()
# M = nx.adjacency_matrix(G, weight='length').toarray()
# M += np.random.normal(loc=0, scale=1, size=M.shape)
eig = np.linalg.eigvals(M)
assert np.all(~np.isnan(M))

In [None]:
plt.spy(M)

In [None]:
dx = 0.1
bins = np.r_[0 : 2+dx/2 : dx]
n,_ = np.histogram(eig, bins=bins)
fig,ax = plt.subplots(1, 1, figsize=(5,3))
ax.bar(bins[:-1], n, width=0.8*dx, align='edge', color='k')
ax.set_xlabel('Eigenvalue')
ax.set_ylabel('Count')
sns.despine()
fig.tight_layout()

In [None]:
N_clusters = 3
clustering = SpectralClustering(N_clusters, affinity='precomputed').fit(M+1)
clustering.labels_

In [None]:
fig,ax = plt.subplots(1, 1, figsize=(6,6))
nx.draw_networkx_edges(G, geo_pos, edge_color=edge_colors, width=edge_widths, ax=ax)
nx.draw_networkx_nodes(G, geo_pos, node_size=10, node_color=clustering.labels_,
                       cmap=plt.cm.coolwarm, vmin=0, vmax=N_clusters-1, ax=ax)
ax.axis('equal')
ax.axis('off')
fig.tight_layout()

In [None]:
def plot_graph(G, pos=None, edges_vrat=None, edges_labels=None, communities=None, layout_algo='kamada_kawai',
               ax=None, figsize=(6,6), vratings_cmap=None, **layout_algo_args):
    if pos is None:
        func = getattr(nx, layout_algo + '_layout')
        pos = func(G, **layout_algo_args)
    if ax is None:
        _,ax = plt.subplots(1, 1, figsize=figsize)
    if edges_vrat is None:
        nx.draw_networkx_edges(G, pos, ax=ax)
        if edges_labels is not None:
            nx.draw_networkx_edge_labels(G, pos, edge_labels=edges_labels, ax=ax, font_size=7)
    else:
        for vrat,lst in edges_vrat.items():
            width = vratings_cmap(vrat) if vratings_cmap is not None else vrat/100
            nx.draw_networkx_edges(G, pos, edgelist=lst, width=width, ax=ax)
            if edges_labels is not None and vrat in edges_labels:
                nx.draw_networkx_edge_labels(G, pos, edge_labels=edges_labels[vrat], ax=ax, font_size=6)
    if communities is None:
        nx.draw_networkx_nodes(G, pos, ax=ax, node_size=20, node_color='tab:red')
    else:
        cmap = plt.get_cmap('tab10')
        for i,nodes in enumerate(communities):
            col = [cmap(i)[:3]]
            nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_size=30, node_color=col)
    ax.axis('off')

In [None]:
def build_graph(all_edges, vratings, graph_class=nx.MultiGraph, weight=None):
    edges = []
    edges_vrat = {vrat: [] for vrat in vratings}
    edges_labels = {vrat: {} for vrat in vratings}
    for e in all_edges:
        if e.node1 != e.node2 and e.vrating in vratings:
            edge = [e.node1.name, e.node2.name]
            edges_labels[e.vrating][(edge[0],edge[1])] = e.name[:12]
            if weight is not None:
                if weight == 'length':
                    edge.append(e.length)
                elif weight == 'vrating':
                    edge.append(1/e.vrating)
                else:
                    raise Exception(f"Unknown weight '{weight}'")
            edges.append(edge)
            edges_vrat[e.vrating].append(edge)
    G = graph_class()
    if weight is None:
        G.add_edges_from(edges)
    else:
        G.add_weighted_edges_from(edges)
    return G,edges_vrat,edges_labels

def plot_graph(G, pos=None, edges_vrat=None, edges_labels=None, communities=None, layout_algo='kamada_kawai',
               ax=None, figsize=(6,6), vratings_cmap=None, **layout_algo_args):
    if pos is None:
        func = getattr(nx, layout_algo + '_layout')
        pos = func(G, **layout_algo_args)
    if ax is None:
        _,ax = plt.subplots(1, 1, figsize=figsize)
    if edges_vrat is None:
        nx.draw_networkx_edges(G, pos, ax=ax)
        if edges_labels is not None:
            nx.draw_networkx_edge_labels(G, pos, edge_labels=edges_labels, ax=ax, font_size=7)
    else:
        for vrat,lst in edges_vrat.items():
            width = vratings_cmap(vrat) if vratings_cmap is not None else vrat/100
            nx.draw_networkx_edges(G, pos, edgelist=lst, width=width, ax=ax)
            if edges_labels is not None and vrat in edges_labels:
                nx.draw_networkx_edge_labels(G, pos, edge_labels=edges_labels[vrat], ax=ax, font_size=6)
    if communities is None:
        nx.draw_networkx_nodes(G, pos, ax=ax, node_size=20, node_color='tab:red')
    else:
        cmap = plt.get_cmap('tab10')
        for i,nodes in enumerate(communities):
            col = [cmap(i)[:3]]
            nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_size=30, node_color=col)
    ax.axis('off')
#     fig.tight_layout()
cmap = lambda vrat: {400: 4, 220: 1.5, 150: 0.5}[vrat]

In [None]:
coords_pos = {k: np.array([v.lon,v.lat]) for k,v in nodes.items()}

In [None]:
G,edges_vrat,edges_lbls = build_graph(edges, [400,], weight='length', graph_class=nx.MultiGraph)
# comms = nx.community.greedy_modularity_communities(G)
comms = nx.community.louvain_communities(G, resolution=0.5)
plot_graph(G, pos=coords_pos, edges_vrat=edges_vrat, edges_labels=edges_lbls, communities=comms, figsize=(5,5))

In [None]:
G,edges_vrat,edges_lbls = build_graph(edges, [400,220], weight='length', graph_class=nx.MultiGraph)
# comms = nx.community.greedy_modularity_communities(G)
comms = nx.community.louvain_communities(G, resolution=2)
plot_graph(G, pos=coords_pos, edges_vrat=edges_vrat, edges_labels={400: edges_lbls[400]},
           communities=comms, vratings_cmap=cmap)

In [None]:
G,edges_vrat,_ = build_graph(edges, [400,220,150], weight='length', graph_class=nx.MultiGraph)
# print(len(G.edges))
comms = nx.community.greedy_modularity_communities(G)
# comms = nx.community.louvain_communities(G, resolution=1.5)
fig,ax = plt.subplots(1, 1, figsize=(4,6))
plot_graph(G, pos=coords_pos, edges_vrat=edges_vrat, edges_labels={400: edges_lbls[400]},
           communities=comms, vratings_cmap=cmap, ax=ax)
ax.axis('equal')
fig.tight_layout()

In [None]:
L = nx.normalized_laplacian_matrix(G)
eig = np.linalg.eigvals(L.toarray())

In [None]:
plt.hist(eig, bins=100)

#### The coordinate reference systems

In [None]:
# 3035: Lambert azimuthal equal area
# 3857: spherical Mercator projection
# 4326: world geodetic system 1984
source_coord_ref = 4326
coord_ref = 3857

Load the file containing all coordinates:

In [None]:
info_file = os.path.join('..','V2020_Rete_Sardegna_2021_06_03cr_FULL_INFO.json')
info = json.load(open(info_file))

Make dataframes for sites, synchronous machines and terminals:

In [None]:
def make_df(info, obj_type, source_coord_ref, dst_coord_ref, remove_max=True):
    names = list(info[obj_type].keys())
    XY = np.array([v['coords'] for v in info[obj_type].values()])
    if remove_max:
        # remove the terminals in Corse
        idx, = np.where(XY[:,0] != np.max(XY[:,0]))
    else:
        idx = np.arange(len(names))
    names = [names[i] for i in idx]
    coords = [Point(long,lat) for lat,long in zip(XY[idx,0],XY[idx,1])]
    gdf = gpd.GeoDataFrame(data={'name': names, 'geometry': coords})
    gdf.crs = CRS.from_user_input(source_coord_ref)
    return gdf.to_crs(epsg=dst_coord_ref), XY[idx]

site_gdf,site_XY = make_df(info, 'ElmSite', source_coord_ref, coord_ref, remove_max=True)
SM_gdf,SM_XY = make_df(info, 'ElmSym', source_coord_ref, coord_ref, remove_max=True)
terminal_gdf,terminal_XY = make_df(info, 'ElmTerm', source_coord_ref, coord_ref, remove_max=True)
site_gdf.head()

In [None]:
names = [name for name in info['ElmLne'].keys() if len(info['ElmLne'][name]['coords']) > 0]
coords = [LineString(np.fliplr(info['ElmLne'][name]['coords'])) for name in names]
vrating = [info['ElmLne'][name]['vrating'] for name in names]
gdf = gpd.GeoDataFrame(data={'name': names, 'vrating': vrating, 'geometry': coords})
gdf.crs = CRS.from_user_input(source_coord_ref)
line_gdf = gdf.to_crs(epsg=coord_ref)
line_gdf.head()

### Cluster the sites

First of all, remove duplicate coordinates and perform K-Means clustering with a variable number of clusters to choose how many we shall use:

In [None]:
kmeans_seed = 1000
XY,XY_index = np.unique(site_XY, return_index=True, axis=0)
max_N_clusters = 30
N_clusters = np.arange(max_N_clusters) + 1
inertia = np.zeros_like(N_clusters)
for i,nc in enumerate(N_clusters):
    km = KMeans(n_clusters=nc, random_state=kmeans_seed).fit(XY)
    inertia[i] = km.inertia_

In [None]:
fig,ax = plt.subplots(1, 1, figsize=(4,2.5))
ax.plot(N_clusters, inertia, 'k', lw=1.5)
ax.set_yscale('log')
ax.set_xlabel('# of clusters')
ax.set_ylabel('Inertia')
sns.despine()
fig.tight_layout()

10-15 clusters look like a reasonable value. Perform the actual clustering and find the site closest to the center of each cluster:

In [None]:
N_clusters = 15
km = KMeans(n_clusters=N_clusters, random_state=kmeans_seed).fit(XY)
site_idx = []
for i in range(N_clusters):
    idx, = np.where(km.labels_ == i)
    jdx = np.argmin(np.sqrt(np.sum((XY[idx] - km.cluster_centers_[i])**2, axis=1)))
    site_idx.append(idx[jdx])
selected_sites = np.sort(XY_index[site_idx])

Plot the results of the clustering with each cluster's center and selected site shown with a black dot and red cross, respectively:

In [None]:
fig,ax = plt.subplots(1, 1,  figsize=(3,4.5))
cmap = plt.get_cmap('Paired')
for i in range(N_clusters):
    idx, = np.where(km.labels_ == i)
    ax.plot(XY[idx,1], XY[idx,0], '.', color=cmap(i), ms=4)
    ax.plot(XY[site_idx[i],1], XY[site_idx[i],0], 'x', color='tab:red', ms=7, lw=1)
ax.plot(km.cluster_centers_[:,1], km.cluster_centers_[:,0], 'ko',
        markerfacecolor='w', markeredgewidth=1.5, ms=5)
ax.axis('equal')
ax.axis('off')
fig.tight_layout()

Pick `N_buses` terminals that have coordinates:

### Map of Sardinia

First define the bounding box:

In [None]:
limits = {'WS': Point(8, 38.75), 'EN': Point(10, 41)}
bbox = gpd.GeoDataFrame(data=limits.values(),
                        index=pd.Index(data=limits.keys(), name='name'),
                        columns=['geometry'])
bbox.crs = CRS.from_user_input(source_coord_ref)
bbox = bbox.to_crs(epsg=coord_ref)

Then load the geo data of Europe and keep only those coordinates that fall within the bounding box:

In [None]:
scale = 1 # 1 : 1,000,000
year = 2021
europe_folder = f'geography/ref-nuts-{year}-{scale:02d}m'
N_levels = 4
map_types = 'BN', #'LB' # BN: boundary, LB: label, RG: region
europe = {map_type: {} for map_type in map_types}
for level in range(N_levels):
    for map_type in map_types:
        if map_type == 'LB':
            europe_file = f'{europe_folder}/NUTS_{map_type}_{year}_{coord_ref}_LEVL_{level}.json'
        else:
            europe_file = f'{europe_folder}/NUTS_{map_type}_{scale:02d}M_{year}_{coord_ref}_LEVL_{level}.json'
        tmp = gpd.read_file(europe_file)
        tmp.crs = CRS.from_user_input(coord_ref)
        europe[map_type][level] = tmp.cx[bbox.loc['WS','geometry'].x : bbox.loc['EN','geometry'].x,
                                         bbox.loc['WS','geometry'].y : bbox.loc['EN','geometry'].y]

In [None]:
ms = 8
width,height = 3.5,2.75
width,height = 5,4
fig,ax = plt.subplots(1, 1, figsize=(width, height))
light_gray = .8 + np.zeros(3)
dark_gray = .2 + np.zeros(3)
europe['BN'][0].plot(ax=ax, lw=1, color=dark_gray)
europe['BN'][3].plot(ax=ax, lw=0.5, color=light_gray)
for vrating in np.unique(line_gdf.vrating):
    print(vrating)
    idx = line_gdf.vrating == vrating
    line_gdf.loc[idx,:].plot(ax=ax, color=[.6,1,.6], lw=vrating/200)
site_gdf.iloc[selected_sites,:].plot(marker='o', ax=ax, markersize=ms,
                                     color=[1,0,1], label='Selected site')
site_gdf.plot(marker='o', ax=ax, markersize=ms/10, color=[.2,.2,.2], label='Site')
# terminal_gdf.plot(marker='o', ax=ax, markersize=ms/4, color=light_gray-0.3, label='Terminal')
# terminal_gdf.iloc[terminals_idx,:].plot(marker='o', ax=ax, markersize=ms*2, color='tab:red',
#                                         label='Selected terminal')
SM_gdf.plot(marker='s', ax=ax, markersize=ms, color='k', facecolor='k',
            lw=1, label='Synch. generator')
ax.legend(loc='lower left', bbox_to_anchor=(-0.85, 0.5, 0.5, 0.3), fontsize=8, frameon=False)
ax.axis('off')
ax.add_artist(ScaleBar(dx=1, fixed_value=50, fixed_units='km', location='lower right'))
fig.tight_layout()
# plt.savefig(f'Sardinia_geo_with_selected_terminals_{seed}.pdf')