In [1]:
import pandas as pd
import numpy as np
import json

import re

import utilities 

import networkx as nx

from itertools import combinations, product


In [2]:
G = nx.MultiDiGraph()

In [3]:
# open neighborhoods json file

with open('dicts/neighborhood_polys.json','r') as f:
    neighborhoods = json.load(f)
    
properties_neighborhood_aggregated = pd.read_csv('../data/properties_neighborhood_aggregated.csv')

crime_neighborhood_aggregated = pd.read_csv('../data/crime_neighborhood_aggregated.csv', header=None)
crime_neighborhood_aggregated.rename(columns={0:"neighborhood",1:"crime_type",2:"count"}, inplace=True)
property_crime = crime_neighborhood_aggregated[crime_neighborhood_aggregated['crime_type']=="PROPERTY_CRIME"]
violent_crime = crime_neighborhood_aggregated[crime_neighborhood_aggregated['crime_type']=="VIOLENT_CRIME"]

In [4]:
# each neighborhood is a node

for i in range(len(list(neighborhoods.keys()))):
    G.add_node(list(neighborhoods.keys())[i], neighborhood={'name':list(neighborhoods.keys())[i]})
    
for i in range(len(properties_neighborhood_aggregated)):
    G.nodes()[properties_neighborhood_aggregated['neighborhood'].iloc[i]]['neighborhood'].update({"avg_property_value":properties_neighborhood_aggregated['unit_zestimate'].iloc[i]})

# dealing attributes for which we have no values
property_val_not_in = list(set(list(G.nodes())) - set(list(properties_neighborhood_aggregated['neighborhood'])))    
    
if len(property_val_not_in) > 0:
        for i in range(len(property_val_not_in)):
            G.nodes()[property_val_not_in[i]]['neighborhood']['avg_property_value'] = str(np.NaN)
    
for i in range(len(property_crime)):
    G.nodes()[property_crime['neighborhood'].iloc[i]]['neighborhood'].update({"n_property_crimes":property_crime['count'].iloc[i]})    

# dealing attributes for which we have no values
property_crime_not_in = list(set(list(G.nodes())) - set(list(property_crime['neighborhood'])))
        
if len(property_crime_not_in) > 0:
        for i in range(len(property_crime_not_in)):
            G.nodes()[property_crime_not_in[i]]['neighborhood']['n_property_crimes'] = str(np.NaN)

In [5]:
# 'is_next_to' edge relationship for neighborhood

hoods = list(G.nodes())
hood_combs = list(combinations(hoods, 2))

for i in range(len(hood_combs)):
  
    if utilities.intersection(neighborhoods, hood_combs[i][0], neighborhoods, hood_combs[i][1]):
        G.add_edge(hood_combs[i][0],hood_combs[i][1], NEXT_TO={}) # relationship is...
        G.add_edge(hood_combs[i][1],hood_combs[i][0], NEXT_TO={}) # bidirectional
            

TopologyException: side location conflict at -87.836580878737252 41.986396112425794
TopologyException: side location conflict at -87.664020791475323 41.947275771403895
TopologyException: side location conflict at -87.648784708051664 41.939987536474959
TopologyException: side location conflict at -87.624576496994393 41.896975362507106
TopologyException: side location conflict at -87.780022286293374 41.997413553897694
TopologyException: side location conflict at -87.780022286293374 41.997413553897694


In [6]:
# save to object

neo_neighborhoods = utilities.nx_to_neo_nodes(G, return_nodes=True, return_edges=True)

In [7]:
# reset the graph

G = nx.MultiDiGraph()


In [8]:
# opening census tracts json file

with open('dicts/tract_polys.json','r') as f:
    tracts = json.load(f)
    
properties_tract_aggregated = pd.read_csv('../data/properties_tract_aggregated.csv')


In [9]:
# each tract is a node

for i in range(len(list(tracts.keys()))):
    G.add_node(list(tracts.keys())[i], tract={'name':list(tracts.keys())[i]})
    
for i in range(len(properties_tract_aggregated)):
    G.nodes()[properties_tract_aggregated['tracts'].iloc[i]]['tract'].update({"avg_property_value":properties_tract_aggregated['unit_zestimate'].iloc[i]})

# dealing attributes for which we have no values
property_val_not_in = list(set(list(G.nodes())) - set(list(properties_tract_aggregated['tracts'])))

if len(property_val_not_in) > 0:
        for i in range(len(property_val_not_in)):
            G.nodes()[property_val_not_in[i]]['tract']['avg_property_value'] = str(np.NaN)


In [10]:
# 'is_next_to' edge relationship for tracts

tract_names = list(G.nodes())
tract_combs = list(combinations(tract_names, 2))

for i in range(len(tract_combs)):
  
    if utilities.intersection(tracts, tract_combs[i][0], tracts, tract_combs[i][1]):
        G.add_edge(tract_combs[i][0],tract_combs[i][1], NEXT_TO={}) # relationship is...
        G.add_edge(tract_combs[i][1],tract_combs[i][0], NEXT_TO={}) # bidirectional
            

In [11]:
neo_tracts = utilities.nx_to_neo_nodes(G, return_nodes=True, return_edges=True)

In [12]:
# reset the graph

G = nx.MultiDiGraph()
H = nx.MultiDiGraph()

In [13]:
# each neighborhood is a node

for i in range(len(list(neighborhoods.keys()))):
    G.add_node(list(neighborhoods.keys())[i], neighborhood={'name':list(neighborhoods.keys())[i]})
    
for i in range(len(properties_neighborhood_aggregated)):
    G.nodes()[properties_neighborhood_aggregated['neighborhood'].iloc[i]]['neighborhood'].update({"avg_property_value":properties_neighborhood_aggregated['unit_zestimate'].iloc[i]})

# dealing attributes for which we have no values    
not_in_neighborhoods = list(set(list(G.nodes())) - set(list(properties_neighborhood_aggregated['neighborhood'])))

if len(not_in_neighborhoods) > 0:
        for i in range(len(not_in_neighborhoods)):
            G.nodes()[not_in_neighborhoods[i]]['neighborhood']['avg_property_value'] = str(np.NaN)
              


In [14]:
# each tract is a node

for i in range(len(list(tracts.keys()))):
    H.add_node(list(tracts.keys())[i], tract={'name':list(tracts.keys())[i]})
    
for i in range(len(properties_tract_aggregated)):
    H.nodes()[properties_tract_aggregated['tracts'].iloc[i]]['tract'].update({"avg_property_value":properties_tract_aggregated['unit_zestimate'].iloc[i]})


# dealing attributes for which we have no values    
not_in_tracts = list(set(list(H.nodes())) - set(list(properties_tract_aggregated['tracts'])))

if len(not_in_tracts) > 0:
        for i in range(len(not_in_tracts)):
            H.nodes()[not_in_tracts[i]]['tract']['avg_property_value'] = str(np.NaN)

# add resulting H graph nodes to G graph

G.add_nodes_from(H.nodes(data=True))


In [15]:
# 'is_within' and 'contains' edge relationship for tract -> neighborhood

hood_names = list(neighborhoods.keys())  
hood_names = list(set(hood_names) - set(not_in_neighborhoods))
tract_names = list(tracts.keys())
tract_names = list(set(tract_names) - set(not_in_tracts))

combs = list(product(hood_names, tract_names))

for i in range(len(combs)):
  
    if utilities.intersection(neighborhoods, combs[i][0], tracts, combs[i][1]):
        G.add_edge(combs[i][0],combs[i][1], CONTAINS={}) # relationship is...
        G.add_edge(combs[i][1],combs[i][0], IS_WITHIN={}) # bidirectional
            

In [16]:
neo_contain_relationships = utilities.nx_to_neo_nodes(G, return_nodes=False, return_edges=True)

In [17]:
to_neo = neo_neighborhoods + neo_tracts + neo_contain_relationships


In [18]:
# save as txt file

with open('neo.txt', 'w') as neo_text:
    for listitem in to_neo:
        neo_text.write('%s\n' % listitem)