### Import

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import shapefile
import re
import pickle
import time
import networkx as nx
import matplotlib.pyplot as plt
from scipy.spatial import KDTree
from tqdm import tqdm
from collections import Counter
from scipy.spatial.distance import cdist
from scipy.spatial.distance import pdist

### Load Tract Data

In [None]:
tractdata = pd.read_csv('../data/acs/nyc_tracts/nyc_tracts_census_geo_node.csv')

### Load Chain Data

In [None]:
chaindata = pd.read_csv('../data/tables/chains_with_nodes.csv')

### Load Graph

In [None]:
X = pickle.load(open("../data/graphs/nyc.p","rb"))
O = X['O']
N = X['N']
edges = X['edges']
nodes = X['nodes']
nodenames = X['nodenames']
edgedatabase = X['edgedatabase']
G = X['G']
Nlist = X['Nlist'].tolist()
pickle.dump(X,open('../data/graphs/nyc.p',"wb"))

### Create Tract Index List

In [None]:
tractlist = []
for i in range(tractdata.shape[0]):
    tractdp = tractdata.iloc[i]
    tract_node_name = tractdp.node_name.split(' | ')
    tract_node_name = ' | '.join(tract_node_name[1:])
    index = Nlist.index(tract_node_name)
    tractlist.append(index)
tractlist = np.array(tractlist)
tractdata['node_index'] = tractlist

### Create Chain Index List

In [None]:
chainlist = []
for i in range(chaindata.shape[0]):
    chaindp = chaindata.iloc[i]
    chain_node_name = chaindp.nodename
    index = Nlist.index(chain_node_name)
    chainlist.append(index)
chainlist = np.array(chainlist)
chaindata['node_index'] = chainlist

### Find Isolated Nodes

In [None]:
#print(list(nx.isolates(G)))

### Random Tract -> Chain Distance Computing

In [None]:
for i in tqdm(range(100)):
    chainIndex = np.random.randint(len(chainlist))
    tractIndex = np.random.randint(len(tractlist))
    try:
        path = nx.shortest_path(G, target=tractIndex,source=chainIndex)
    except:
        print(chainIndex,tractIndex)

### Compute Length from Path

In [None]:
def computeLength(path,G,N):
    
    #Compute Length
    totalLength = 0
    for i in range(len(path)-1):
        orig = path[i]
        dest = path[i+1]
        weight = G[orig][dest]['weight']
        totalLength = totalLength + weight
#         node1 = N[orig]
#         node2 = N[dest]
#         elements1 = node1.split(' | ')
#         elements2 = node2.split(' | ')
#         route.append((float(elements1[-3]),node1,node2,float(elements1[-2]),weight))
#         route.append((float(elements1[-3]),node1,node2,float(elements1[-2]),weight))
    
    #To Dataframe
    #route = pd.DataFrame(route)

    #Return
    return totalLength

### Test Reachability of chain nodes

In [None]:
D = np.zeros((len(tractlist),len(chainlist)))
P = {}
starttime = time.time()
for i in tqdm(range(len(tractlist))):
    
    #Compute all possible paths from source node (tract)
    sourceIndex = tractlist[i]
    sourceNodename = Nlist[sourceIndex]
    paths = nx.shortest_path(G, source=sourceIndex)

    #Keep only paths that lead to a chain occupied node
    newpaths = {}
    keylist = list(paths.keys())
    for p in range(len(keylist)):
        targetIndex = keylist[p]
        path = paths[targetIndex]
        targetNodeName = Nlist[targetIndex]
        if (targetIndex in chainlist):
            indices = np.where(chainlist == targetIndex)[0]
            for index in indices:
                totalLength = computeLength(path,G,N)
#                 newpaths[index] = (targetIndex,path,totalLength)
                D[i,index] = totalLength

    #Add to bigger structure
#     P[i] = newpaths
endtime = time.time()
elapsed = endtime - starttime
print(elapsed)

### Store Intermediate

In [None]:
X = {}
# X['P'] = P
X['D'] = D
pickle.dump(X,open('../data/paths/nyc_path.p',"wb"))

In [None]:
startime = time.time()
paths = nx.shortest_path(G)
endtime = time.time()
elapsed = endtime - starttime
print(elapsed)