In [1]:
# Import libraries
import networkx as nx
import numpy as np
import pandas as pd
import warnings
import igraph as ig
from ast import literal_eval
import pickle as pkl
import os
import re
import matplotlib.pyplot as plt


# Suppress specific warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Useful functions

def make_attr_dict(*args, **kwargs): 
    
    argCount = len(kwargs)
    
    if argCount > 0:
        attributes = {}
        for kwarg in kwargs:
            attributes[kwarg] = kwargs.get(kwarg, None)
        return attributes
    else:
        return None
    
# Load data
nodes_path = "../data/Montereale_Valcellina_gdf_nodes.csv"
edges_path = "../data/Montereale_Valcellina_all_lts.csv"
nodes_data = pd.read_csv(nodes_path)
nodes_data = nodes_data[nodes_data['lts'].isin([1, 2, 3, 4])].copy()
nodes_data['lts'] = nodes_data['lts'].astype(int)
edges_data = pd.read_csv(edges_path)

edges_data = edges_data[edges_data['lts'].isin([1, 2, 3, 4])].copy()
edges_data['lts'] = edges_data['lts'].astype(int)

# Adding info on the type
nodes_data['type_stress'] = np.where(nodes_data['lts'].isin([1, 2]), 'low', 
                                     np.where(nodes_data['lts'].isin([3, 4]), 'high', np.nan))
nodes_data = nodes_data.sort_values(by = "osmid").reset_index(drop = True)
nodes_data["attr_dict"] = nodes_data.apply(lambda x: make_attr_dict(category_node = x.type_stress, coord = x.geometry), axis = 1)

# Add edge ids (strings with "id1, id2" sorted (id1 <id2))
edges_data["edge_id"] = edges_data.apply(lambda x: str(sorted([x["u"], x["v"]])), axis = 1)

# Adding info on the type
edges_data['type_stress'] = np.where(edges_data['lts'].isin([1, 2]), 'low', 
                                     np.where(edges_data['lts'].isin([3, 4]), 'high', np.nan))

# Finding duplicates by ["u","v","osmid", "oneway", "edge_id", "length", "type_stress"]
# Simplifying network into undirected 
# Removing all parallel edges

edges_data = edges_data.drop_duplicates(subset = ["u", "v", "osmid", "length", "edge_id", "type_stress"],
                  keep = "first",
                  inplace = False,
                  ignore_index = True).copy()

# Add attribute dictionary (for nx)
edges_data["attr_dict"] = edges_data.apply(lambda x: make_attr_dict(length = x.length, 
                                                    category_edge = x.type_stress,
                                                    edge_id = x.edge_id,
                                                    coord = x.geometry,
                                                    intnodes = []), # intnodes attribute: for storing simplification info on interstitial nodes 
                             axis = 1)

# sort by "left" node (id1 < id2 - to control order of tuple keys in nx)
edges_data["order"] = edges_data.apply(lambda x: np.min([x["u"], x["v"]]), axis = 1)
edges_data = edges_data.sort_values(by = "order").reset_index(drop = True)
edges_data["orig"] = edges_data.apply(lambda x: np.min([x["u"], x["v"]]), axis = 1)
edges_data["dest"] = edges_data.apply(lambda x: np.max([x["u"], x["v"]]), axis = 1)
edges_data = edges_data.drop(columns = ["order", "u", "v", "osmid"]) # instead of "u" and "v",
# we will use "origin" and "destination" where osmid(origin) < osmid (destination)!
print("these is edges data:", edges_data.head())



these is edges data:    key  lanes                                    name       highway  maxspeed  \
0    0    NaN          ['Largo Duomo', 'Piazza Roma']     secondary       NaN   
1    0    NaN          ['Largo Duomo', 'Piazza Roma']     secondary       NaN   
2    0    NaN  ['Via Marziano Ciotti', 'Piazza Roma']     secondary       NaN   
3    0    NaN                         ['Piazza Roma']  unclassified       NaN   
4    0    NaN                         ['Piazza Roma']  unclassified       NaN   

                                            geometry  length rule  lts  group  \
0  LINESTRING (782693.903763619 5118386.029298064...  50.625  m12    4      1   
1  LINESTRING (782685.2558167536 5118435.77659598...  50.625  m12    4      1   
2  LINESTRING (782695.9299623996 5118354.87035418...  31.325  m12    4      1   
3  LINESTRING (782713.8450469453 5118390.13402393...  20.291  m10    3      1   
4  LINESTRING (782693.903763619 5118386.029298064...  20.291  m10    3      1   

   ..

In [2]:
# CREATE NX OBJECTS

# make multinetwork containing ALL edges
mnw = nx.Graph()
mnw.add_nodes_from(nodes_data.loc[:,["osmid", "attr_dict"]].itertuples(index = False))
mnw.add_edges_from(edges_data.loc[:,["orig", "dest", "attr_dict"]].itertuples(index = False))

# save to pickle ("original" nw = non-simplified, with disconnected components)
nx.write_gpickle(mnw, "../data/mnw.gpickle")

# Sample Nodes and Edges
print("\nFirst few nodes and their attributes:")
for node, data in list(mnw.nodes(data=True))[:5]:
    print(node, data)

print("\nFirst few edges and their attributes:")
for edge in list(mnw.edges(data=True))[:5]:
    print(edge)

# KEEP ONLY LARGEST CONNECTED COMPONENT

# make list of connected components
cd_nodeset = []

for comp in nx.connected_components(mnw):
    
    cd_nodeset = cd_nodeset + [comp]
    
n = len(cd_nodeset)
    
print("number of disconnected components on mnw: " + str(n))

cd_size = [None]*n
cd_network = [None]*n
cd_coord_dict = [None]*n
cd_coord_list = [None]*n
cd_type_stress = [None]*n

for i in range(n):
    cd_size[i] = len(cd_nodeset[i])
    cd_network[i] = nx.subgraph(mnw, cd_nodeset[i])
    cd_coord_dict[i] = nx.get_edge_attributes(cd_network[i], "coord")
    cd_coord_list[i] = [cd_coord_dict[i][key] for key in cd_coord_dict[i].keys()]
    cd_type_stress[i] = nx.get_edge_attributes(cd_network[i], "category_edge")

# make df with info on connected components
comps = pd.DataFrame({
    'nodeset': cd_nodeset, 
    'size': cd_size,
    'network': cd_network,
    'coord': cd_coord_list,
    'type_stress': cd_type_stress})

del(cd_nodeset, cd_size, cd_network, cd_coord_list, cd_type_stress, cd_coord_dict)

# lcc is the size of the largest connected component
lcc = np.max(comps["size"])

print("size of lcc: " + str(lcc))

comps = comps.sort_values(by = "size", ascending = False).reset_index(drop = True)

# DEFINE MNWL as largest connected component
mnwl_nodes = comps["nodeset"][0]
mnwl_edges = edges_data.loc[edges_data.apply(lambda x: x.orig in mnwl_nodes, axis = 1),:].copy().reset_index(drop = True)
mnwl = nx.subgraph(mnw, mnwl_nodes)

# save as pickle ("original" nw = non-simplified, but only LCC)
nx.write_gpickle(mnwl, "../data/mnwl.gpickle")



First few nodes and their attributes:
275047641 {'category_node': 'high', 'coord': 'POINT (12.6615432 46.1603739)'}
275047646 {'category_node': 'high', 'coord': 'POINT (12.6603584 46.1576454)'}
275047789 {'category_node': 'high', 'coord': 'POINT (12.6516924 46.1014144)'}
275047807 {'category_node': 'high', 'coord': 'POINT (12.6684513 46.1461234)'}
275048618 {'category_node': 'high', 'coord': 'POINT (12.6611966 46.1630372)'}

First few edges and their attributes:
(275047641, 912303265, {'length': 50.625, 'category_edge': 'high', 'edge_id': '[275047641, 912303265]', 'coord': 'LINESTRING (782685.2558167536 5118435.776595989, 782686.9408203791 5118417.20532012, 782691.20776897 5118394.488988528, 782693.903763619 5118386.029298064)', 'intnodes': []})
(275047641, 6532857567, {'length': 31.325, 'category_edge': 'high', 'edge_id': '[275047641, 6532857567]', 'coord': 'LINESTRING (782693.903763619 5118386.029298064, 782695.5478483913 5118378.066608992, 782695.7932620606 5118371.408813569, 78269

In [3]:
import itertools
import time
# make a copy of mnwl - H will be simplified and manipulated throughout while loop
H = mnwl.copy()

# set parameters for the while loop
simplify_further = True
run = 0

# make dictionary of edge attributes of mnwl
mnwl_typedict = nx.get_edge_attributes(mnwl, "category_edge")

# loop runs while there are interstitial nodes on the nw
while simplify_further:
    
    run += 1
    print("Run " + str(run) + ", " + time.ctime())
    
    # get all nodes from nw
    points_all_list = sorted(list(H.nodes))

    # get all node degrees
    degrees_all_list = [None]*len(points_all_list)
    for i in range(len(points_all_list)):
        degrees_all_list[i] = H.degree(points_all_list[i])

    # make df with node + degree info + remove (T/F) + types (of incident edges)
    pointsall = pd.DataFrame({
        "osmid": points_all_list, 
        "d": degrees_all_list, 
        "remove": None, 
        "types": None})
    
    # get edge attributes (of CURRENT nw) as dict
    catdict = nx.get_edge_attributes(H, "category_edge")
    # get edge type information (car/bike/multi) from attribute dictionary
    pointsall["types"] = pointsall.apply(lambda x: 
                                     [ catdict[tuple(sorted(edge))] for edge in H.edges(x.osmid) if tuple(sorted(edge)) in catdict], 
                                     axis = 1)


    # split df in "endpoints" and d2 nodes
    pointsend = pointsall[pointsall["d"]!=2].copy().reset_index(drop = True)
    pointsd2 = pointsall[pointsall["d"]==2].copy().reset_index(drop = True)

    # non-d2 nodes: all of them are remove=False (to keep)
    pointsend["remove"] = False
    # d2 nodes: the ones that have same 2 edge types incident are remove=True
    pointsd2["remove"] = pointsd2.apply(lambda x: x.types[0] == x.types[1] if len(x.types) > 1 else False, axis=1)

    # final result: 2 dfs - nodes_final and nodes_interstitial

    # nodes_final = nodes to keep (either they have d!=2 or they have d==2 but 2 different edge types)
    nodes_final = pd.concat([pointsend, pointsd2[pointsd2["remove"]==False].copy()]).reset_index(drop = True)

    # nodes_interstitial = nodes to remove (d2 nodes with same 2 edge types incident)
    nodes_interstitial = pointsd2[pointsd2["remove"]==True].copy().reset_index(drop = True)
    nodes_interstitial["types"] = nodes_interstitial.apply(lambda x: x.types[0], axis = 1) # remove second-edge info (is same as first)

    del(pointsall, catdict, degrees_all_list, points_all_list, pointsend, pointsd2)

    # save info about endpoint/interstitial to node attributes on mnwl
    for i in range(len(nodes_interstitial)):
        H.nodes[nodes_interstitial.loc[i, "osmid"]]["category_point"] = "int"
    for i in range(len(nodes_final)):
        H.nodes[nodes_final.loc[i, "osmid"]]["category_point"] = "end"

    # make df with interstitial edges
    eint = nodes_interstitial.copy() 
    eint["orig"] = eint.apply(lambda x: sorted([n for n in H.neighbors(x.osmid)])[0], axis = 1)
    eint["dest"] = eint.apply(lambda x: sorted([n for n in H.neighbors(x.osmid)])[1], axis = 1)

    # add info on edge lengths
    lendict = nx.get_edge_attributes(H, "length")
    eint["length_new"] = eint.apply(lambda x: 
                                    np.sum(
                                        [lendict[tuple(sorted(edge))] for edge in H.edges(x.osmid)]
                                    ), 
                                    axis = 1)

    stack = list(np.unique(eint["osmid"]))
    
    Hprior = H.copy() # make a copy of the nw in each simplification step
    # to use for checking for neighbours for removing from stack
    
    # interstitial nodes dictionary - to keep track of nodes that are removed by "while stack"
    intnodesdict = nx.get_edge_attributes(H, "intnodes")
    # edge coordinate dictionary - to merge linestrings of aggregated edges
    edgecoorddict = nx.get_edge_attributes(H, "coord")
    
    while stack:

        mynode = stack.pop()
        
        for n in nx.neighbors(Hprior, mynode): # remove neighbors from ORIGINAL nw
            if n in stack:
                stack.remove(n)
                #print("removed "+ str(n))
                
        # u and v are the neighbors of "mynode"
        u = eint.loc[eint["osmid"]==mynode]["orig"].values[0]
        v = eint.loc[eint["osmid"]==mynode]["dest"].values[0]
        
        # counter (to break out of loop if it is not increased)
        nodes_removed = 0
        
        if (u,v) not in H.edges: # only if neighbors are not neighbors themselves - 
            # to avoid roundabouts from disappearing
            
            # get info on interstitional nodes (for deriving edge coordinates later on)
            myintnodes = [intnodesdict[tuple(sorted(edge))] for edge in H.edges(mynode)]
            myintnodes.append([mynode])
            myintnodes = [x for x in list(itertools.chain.from_iterable(myintnodes)) if x]
            
            H.add_edge(u_of_edge = u,
                        v_of_edge = v,
                        length = eint.loc[eint["osmid"]==mynode]["length_new"].values[0],
                        category_edge = eint.loc[eint["osmid"]==mynode]["types"].values[0],
                        intnodes = myintnodes,
                        edge_id = str(sorted([u, v])),
                        coord=edgecoorddict.get(tuple(sorted([u, mynode])), []) + edgecoorddict.get(tuple(sorted([v, mynode])), []))

            H.remove_node(mynode)
            nodes_removed += 1
    
    if nodes_removed == 0:
        
        simplify_further = False # to break out of loop
                
        # save simplified network to H gpickle
        nx.write_gpickle(H, "../data/H.gpickle") 
        
        print("Done")

print("First few nodes:", list(H.nodes())[:5])
print("First few edges:", list(H.edges(data=True))[:5])

Run 1, Mon Oct 30 23:22:15 2023
Run 2, Mon Oct 30 23:22:15 2023
Run 3, Mon Oct 30 23:22:15 2023
Run 4, Mon Oct 30 23:22:15 2023
Done
First few nodes: [275047641, 275047646, 275047789, 275047807, 275048618]
First few edges: [(275047641, 912303265, {'length': 50.625, 'category_edge': 'high', 'edge_id': '[275047641, 912303265]', 'coord': 'LINESTRING (782685.2558167536 5118435.776595989, 782686.9408203791 5118417.20532012, 782691.20776897 5118394.488988528, 782693.903763619 5118386.029298064)', 'intnodes': []}), (275047641, 6532857567, {'length': 31.325, 'category_edge': 'high', 'edge_id': '[275047641, 6532857567]', 'coord': 'LINESTRING (782693.903763619 5118386.029298064, 782695.5478483913 5118378.066608992, 782695.7932620606 5118371.408813569, 782695.8524641837 5118359.231211402, 782695.9299623996 5118354.870354182)', 'intnodes': []}), (275047641, 912285611, {'length': 90.04799999999999, 'category_edge': 'high', 'intnodes': [912304324, 912303407], 'edge_id': '[275047641, 912285611]', 'co

In [20]:
# make "bikeable" network from H (excluding high stress edges)
bikeable_nodes = [node for node in H.nodes if "category_node" in H.nodes[node] and H.nodes[node]["category_node"] != "high"]
H_lowlts_induced = H.subgraph(bikeable_nodes).copy() 

# induced subgraph - still contains the highstress edges that lie between multi nodes; - exclude them:
banw = H_lowlts_induced.copy()
banw.remove_edges_from([edge for edge in banw.edges if banw.edges[edge]["category_edge"]=="high"])

nx.write_gpickle(banw, "../data/B.gpickle") 

# conversion to igraph
h = ig.Graph.from_networkx(H)
h.write_pickle("../data/h.pickle")
b = ig.Graph.from_networkx(banw)
b.write_pickle("../data/b.pickle")

# eids: "conversion table" for edge ids from igraph to nx 
eids_nx = [tuple(sorted(literal_eval(h.es(i)["edge_id"][0]))) for i in range(len(h.es))]
eids_ig = [i for i in range(len(h.es))]
eids_conv = pd.DataFrame({"nx": eids_nx, "ig": eids_ig})

# nids: "conversion table" for node ids from igraph to nx
nids_nx = [h.vs(i)["_nx_name"][0] for i in range(len(h.vs))]
nids_ig = [i for i in range(len(h.vs))]
nids_conv = pd.DataFrame({"nx": nids_nx, "ig": nids_ig})

eids_conv.to_pickle("../data/eids_conv.pickle")
nids_conv.to_pickle("../data/nids_conv.pickle")

In [21]:
# extract edge and node attributes as dictionaries

tnd = nx.get_node_attributes(H, "category_node") # type of nodes dictionary tnd
ted = nx.get_edge_attributes(H, "category_edge") # type of edges dictionary tnd
led = nx.get_edge_attributes(H, "length") # length of edges dictionary led
cnd = nx.get_node_attributes(H, "coord") # coordinates of nodes dictionary cnd
ced = nx.get_edge_attributes(H, "coord") # coordinates of edges dictionary ced

# make data frame of ebc with:
ebc = pd.DataFrame({"edge_ig": [e.index for e in h.es]}) # igraph edge ID
ebc["edge_nx"] = ebc.apply(lambda x: tuple(literal_eval(h.es[x.edge_ig]["edge_id"])), axis = 1) # nx edge ID
ebc["length"] = ebc.apply(lambda x: h.es[x.edge_ig]["length"], axis = 1) # length in meters

# compute ebcs:
ebc["ebc_inf"] = h.edge_betweenness(directed = False, cutoff = None, weights = "length") # "standard" ebc
ebc["ebc_lambda"] = h.edge_betweenness(directed = False, cutoff = 2500, weights = "length") # ebc only including *paths* below 2500m
print(ebc.head())

ebc.to_pickle("../data/ebc.pickle")

   edge_ig                 edge_nx   length    ebc_inf  ebc_lambda
0        0   (33344145, 245980310)  106.472   734114.0      9521.0
1        1  (33344145, 3325048776)  127.559   733786.0      9833.0
2        2   (33344145, 245980471)  211.667     2293.0       974.0
3        3  (33344145, 2925389440)  154.501     2329.0      1076.0
4        4  (82550591, 8477370231)   19.042  1198831.0     25497.0


In [23]:
#Identify and Prioritize
import math

# Custom Functions
# computes pathlength by nx - handling error message if nodes are not connected/not part of the network
def pathlength_if_connected(my_nw, my_o, my_d):
    try:
        return(nx.dijkstra_path_length(my_nw, my_o, my_d, weight = "length"))
    except:
        return(math.inf)

def extract_coords_from_linestring(linestring):
    # Extract coordinates using regex
    matches = re.findall(r'(\d+\.\d+) (\d+\.\d+)', linestring)
    return [(float(y), float(x)) for x, y in matches]

def get_path_coords(my_path, my_coorddict):
    pathcoords = []
    for edge_id in my_path:
        sorted_edge_id = tuple(sorted(edge_id))
        if sorted_edge_id in my_coorddict:
            linestring = my_coorddict[sorted_edge_id]
            edge_coords = extract_coords_from_linestring(linestring)
            pathcoords.append(edge_coords)
        else:
            print(f"Key {sorted_edge_id} not found in my_coorddict!")
    return pathcoords

# Identify all the gaps:

# shortest_path_list = list of shortest paths for all possible contact-to-contact node combinations

shortest_path_list = []

if not os.path.exists("../data/chunks"):
    os.mkdir("../data/chunks")

# ALL CONTACT NODES FROM THE NETWORK
nodestack = [node.index for node in h.vs()]

count = 0

while nodestack:
    
    node = nodestack.pop()
    
    # ADDING SHORTEST PATHS FROM CURRENT NODE TO ALL OTHER NODES REMAINING IN THE STACK 
    shortest_path_list = shortest_path_list + h.get_shortest_paths(node, to=nodestack, weights="length", mode="out", output = "epath")
    
    # CHUNKWISE SAVING OF RESULTS (TO BE READ IN LATER)
    if len(shortest_path_list) >= 2*10**5:
        with open("../data/chunks/c" + str(count) + ".pickle", 'wb') as handle:
            pkl.dump(shortest_path_list, handle, protocol=pkl.HIGHEST_PROTOCOL)
        del(shortest_path_list)
        count += 1
        shortest_path_list = []

# SAVING LAST CHUNK (WITH LEN < 2*10**5)
with open("../data/c" + str(count) + ".pickle", 'wb') as handle:
    pkl.dump(shortest_path_list, handle, protocol=pkl.HIGHEST_PROTOCOL)

del(shortest_path_list)

### LOOP THROUGH ALL SHORTEST PATHS; KEEP ONLY THE PATHS THAT CONSIST ONLY OF HIGH STRESS LINKS

# cs: set of car edges
cs = set()
for edge in eids_conv["ig"]:
    if h.es[edge]["category_edge"] == "high":
        cs.add(edge)

mygaps = []
    
# CHUNKWISE:

mychunks = ["../data/chunks/" + filename for filename in os.listdir("../data/chunks/")]

for chunk in mychunks:
    
    with open(chunk, 'rb') as f:
        pathlist = pkl.load(f)

    # adding the item to the gaplist only if it consists of only-highstress-edges
    gaplist = [item for item in pathlist if set(item).issubset(cs)]

    mygaps = mygaps + gaplist
    
    del(gaplist, pathlist)
    
print(len(mygaps), "gaps found")

# remove chunks (not needed anymore)
for chunk in mychunks:
    os.remove(chunk)
os.rmdir("../data/chunks")

# CONVERT GAPS LIST TO DF AND ADD LENGTH, ORIGIN, DESTINATION

# to df
mygaps = pd.DataFrame({"path": mygaps})

# add length
mygaps["length"] = mygaps.apply(lambda x: np.sum([h.es[e]["length"] for e in x.path]), axis = 1)

# add path in nx edge id
mygaps["path_nx"] = mygaps.apply(lambda x: 
                                 [tuple(sorted(literal_eval(h.es[edge]["edge_id"]))) for edge in x.path], 
                                 axis = 1)


# add origin and destination nodes
# (separate procedure for gaps with edgenumber (enr) == 1 vs. gaps with enr > 1)
mygaps["enr"] = mygaps.apply(lambda x: len(x.path), axis = 1)
mygaps["o_nx"] = None
mygaps["d_nx"] = None
mygaps.loc[mygaps["enr"]==1, "o_nx"] = mygaps[mygaps["enr"] == 1].apply(lambda x: x.path_nx[0][0], axis = 1)
mygaps.loc[mygaps["enr"]==1, "d_nx"] = mygaps[mygaps["enr"] == 1].apply(lambda x: x.path_nx[0][1], axis = 1)
mygaps.loc[mygaps["enr"]!=1, "o_nx"] = mygaps[mygaps["enr"]!=1].apply(lambda x: set(x.path_nx[0]).difference(x.path_nx[1]).pop(), axis = 1)
mygaps.loc[mygaps["enr"]!=1, "d_nx"] = mygaps[mygaps["enr"]!=1].apply(lambda x: set(x.path_nx[-1]).difference(x.path_nx[-2]).pop(), axis = 1)
mygaps.drop(columns = "enr", inplace = True)

# add coordinates for  plotting
mygaps["gapcoord"] = mygaps.apply(lambda x: get_path_coords(x.path_nx, ced), axis = 1)


1079958 gaps found


KeyboardInterrupt: 

In [25]:
#Discard "parallel paths" (gaps connected on low stress network with a detour factor below d_min)

D_min = 1.5 # set minimum detour factor for path to count as gap

# compute detour factor on bike network
mygaps["length_b"] = mygaps.apply(lambda x: pathlength_if_connected(banw, x.o_nx, x.d_nx), axis = 1)
mygaps["detour"] = mygaps["length_b"]/mygaps["length"]
mygaps = mygaps[mygaps["detour"]>=D_min].reset_index(drop = True)
print(mygaps.describe())

             length  length_b   detour
count  14437.000000   14437.0  14437.0
mean    3420.104838       inf      inf
std     2394.433034       NaN      NaN
min        4.631000       inf      inf
25%     1469.910000       NaN      NaN
50%     2888.456000       NaN      NaN
75%     5115.456000       NaN      NaN
max    12763.125000       inf      inf


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


In [235]:
#Prioritize
# compute benefit metric B_star(g)
mygaps["B_star"] = mygaps.apply(lambda x: 
                                        np.sum([ebc.loc[ebc["edge_ig"]==i, "ebc_lambda"] * \
                                                h.es[i]["length"] \
                                                for i in x.path]), 
                                        axis = 1)
mygaps["B"] = mygaps["B_star"] / mygaps["length"] # B(g) normed to length

# sort gaps by descending benefit metric
mygaps = mygaps.sort_values(by = "B", ascending = False).reset_index(drop = True)

mygaps.to_pickle("../data/mygaps.pickle") 
