This notebook is used to randomly select a certain number of buses (among those that have GPS coordinates) and save the corresponding variable names to a JSON file to be used in training a CNN for momentum estimation.

In [None]:
import os
import sys
import json
import numpy as np
from numpy.random import RandomState, SeedSequence, MT19937
from sklearn.cluster import KMeans
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from pyproj import CRS

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar
import seaborn as sns
fontsize = 9
lw = 0.75
matplotlib.rc('font', **{'family': 'Arial', 'size': fontsize})
matplotlib.rc('axes', **{'linewidth': 0.75, 'labelsize': fontsize})
matplotlib.rc('xtick', **{'labelsize': fontsize})
matplotlib.rc('ytick', **{'labelsize': fontsize})
matplotlib.rc('xtick.major', **{'width': lw, 'size':3})
matplotlib.rc('ytick.major', **{'width': lw, 'size':3})
matplotlib.rc('ytick.minor', **{'width': lw, 'size':1.5})

#### The coordinate reference systems

In [None]:
# 3035: Lambert azimuthal equal area
# 3857: spherical Mercator projection
# 4326: world geodetic system 1984
source_coord_ref = 4326
coord_ref = 3857

Load the file containing all coordinates:

In [None]:
info_file = os.path.join('..','V2020_Rete_Sardegna_2021_06_03cr_FULL_INFO.json')
info = json.load(open(info_file))

Make dataframes for sites, synchronous machines and terminals:

In [None]:
def make_df(info, obj_type, source_coord_ref, dst_coord_ref, remove_max=True):
    names = list(info[obj_type].keys())
    XY = np.array([v['coords'] for v in info[obj_type].values()])
    if remove_max:
        # remove the terminals in Corse
        idx, = np.where(XY[:,0] != np.max(XY[:,0]))
    else:
        idx = np.arange(len(names))
    names = [names[i] for i in idx]
    coords = [Point(long,lat) for lat,long in zip(XY[idx,0],XY[idx,1])]
    gdf = gpd.GeoDataFrame(data={'name': names, 'geometry': coords})
    gdf.crs = CRS.from_user_input(source_coord_ref)
    return gdf.to_crs(epsg=dst_coord_ref), XY[idx]

site_gdf,site_XY = make_df(info, 'ElmSite', source_coord_ref, coord_ref, remove_max=True)
SM_gdf,SM_XY = make_df(info, 'ElmSym', source_coord_ref, coord_ref, remove_max=True)
terminal_gdf,terminal_XY = make_df(info, 'ElmTerm', source_coord_ref, coord_ref, remove_max=True)
site_gdf.head()

### Cluster the sites

First of all, remove duplicate coordinates and perform K-Means clustering with a variable number of clusters to choose how many we shall use:

In [None]:
kmeans_seed = 1000
XY,XY_index = np.unique(site_XY, return_index=True, axis=0)
max_N_clusters = 30
N_clusters = np.arange(max_N_clusters) + 1
inertia = np.zeros_like(N_clusters)
for i,nc in enumerate(N_clusters):
    km = KMeans(n_clusters=nc, random_state=kmeans_seed).fit(XY)
    inertia[i] = km.inertia_

In [None]:
fig,ax = plt.subplots(1, 1, figsize=(4,2.5))
ax.plot(N_clusters, inertia, 'k', lw=1.5)
ax.set_yscale('log')
ax.set_xlabel('# of clusters')
ax.set_ylabel('Inertia')
sns.despine()
fig.tight_layout()

10-15 clusters look like a reasonable value. Perform the actual clustering and find the site closest to the center of each cluster:

In [None]:
N_clusters = 15
km = KMeans(n_clusters=N_clusters, random_state=kmeans_seed).fit(XY)
site_idx = []
for i in range(N_clusters):
    idx, = np.where(km.labels_ == i)
    jdx = np.argmin(np.sqrt(np.sum((XY[idx] - km.cluster_centers_[i])**2, axis=1)))
    site_idx.append(idx[jdx])
selected_sites = np.sort(XY_index[site_idx])

Plot the results of the clustering with each cluster's center and selected site shown with a black dot and red cross, respectively:

In [None]:
fig,ax = plt.subplots(1, 1,  figsize=(3,4.5))
cmap = plt.get_cmap('Paired')
for i in range(N_clusters):
    idx, = np.where(km.labels_ == i)
    ax.plot(XY[idx,1], XY[idx,0], '.', color=cmap(i), ms=4)
    ax.plot(XY[site_idx[i],1], XY[site_idx[i],0], 'x', color='tab:red', ms=7, lw=1)
ax.plot(km.cluster_centers_[:,1], km.cluster_centers_[:,0], 'ko',
        markerfacecolor='w', markeredgewidth=1.5, ms=5)
ax.axis('equal')
ax.axis('off')
fig.tight_layout()

Pick `N_buses` terminals that have coordinates:

### Map of Sardinia

First define the bounding box:

In [None]:
limits = {'WS': Point(8, 38.75), 'EN': Point(10, 41)}
bbox = gpd.GeoDataFrame(data=limits.values(),
                        index=pd.Index(data=limits.keys(), name='name'),
                        columns=['geometry'])
bbox.crs = CRS.from_user_input(source_coord_ref)
bbox = bbox.to_crs(epsg=coord_ref)

Then load the geo data of Europe and keep only those coordinates that fall within the bounding box:

In [None]:
scale = 1 # 1 : 1,000,000
year = 2021
europe_folder = f'geography/ref-nuts-{year}-{scale:02d}m'
N_levels = 4
map_types = 'BN', #'LB' # BN: boundary, LB: label, RG: region
europe = {map_type: {} for map_type in map_types}
for level in range(N_levels):
    for map_type in map_types:
        if map_type == 'LB':
            europe_file = f'{europe_folder}/NUTS_{map_type}_{year}_{coord_ref}_LEVL_{level}.json'
        else:
            europe_file = f'{europe_folder}/NUTS_{map_type}_{scale:02d}M_{year}_{coord_ref}_LEVL_{level}.json'
        tmp = gpd.read_file(europe_file)
        tmp.crs = CRS.from_user_input(coord_ref)
        europe[map_type][level] = tmp.cx[bbox.loc['WS','geometry'].x : bbox.loc['EN','geometry'].x,
                                         bbox.loc['WS','geometry'].y : bbox.loc['EN','geometry'].y]

In [None]:
ms = 8
width,height = 3.5,2.75
fig,ax = plt.subplots(1, 1, figsize=(width, height))
light_gray = .8 + np.zeros(3)
dark_gray = .2 + np.zeros(3)
europe['BN'][0].plot(ax=ax, lw=1, color=dark_gray)
europe['BN'][3].plot(ax=ax, lw=0.5, color=light_gray)
site_gdf.iloc[selected_sites,:].plot(marker='o', ax=ax, markersize=ms,
                                     color=[1,0,.5], label='Selected site')
site_gdf.plot(marker='o', ax=ax, markersize=ms/10, color=[.2,.2,.2], label='Site')
# terminal_gdf.plot(marker='o', ax=ax, markersize=ms/4, color=light_gray-0.3, label='Terminal')
# terminal_gdf.iloc[terminals_idx,:].plot(marker='o', ax=ax, markersize=ms*2, color='tab:red',
#                                         label='Selected terminal')
# SM_gdf.plot(marker='s', ax=ax, markersize=ms, color='k', facecolor='k',
#             lw=1, label='Synch. generator')
ax.legend(loc='lower left', bbox_to_anchor=(-0.85, 0.5, 0.5, 0.3), fontsize=8, frameon=False)
ax.axis('off')
ax.add_artist(ScaleBar(dx=1, fixed_value=50, fixed_units='km', location='lower right'))
fig.tight_layout()
# plt.savefig(f'Sardinia_geo_with_selected_terminals_{seed}.pdf')