In [1]:
import sys
sys.path.append('/Users/chrisolen/Documents/uchicago_courses/optimization/project/urban-demand-allocation')

import pandas as pd
import numpy as np
import json
import re
import networkx as nx
from itertools import combinations, product

import utilities
import pynx_to_neo4j
from create_graph_model import aggregate_features 

In [2]:
with open('../../data/geo_shape_files/neighborhood_reformatted.json','r') as f:
    neighborhoods = json.load(f)
    
with open('../../data/geo_shape_files/tract_reformatted.json','r') as f:
    tracts = json.load(f)    

In [3]:
# produce aggregated figures for property values
properties = pd.read_csv("../../data/residential_standardized.csv")
properties_neighborhood_aggregated, properties_tract_aggregated = \
aggregate_features.aggregate_features(properties, "mean", "../../data/geo_shape_files", "zestimate", "lotSize")


In [4]:
# produce aggregated figures for crime
crime = pd.read_csv("../../data/crime_standardized.csv")
crime_neighborhood_aggregated, crime_tract_aggregated = \
aggregate_features.aggregate_features(crime, "count", "../../data/geo_shape_files", "primary_type")


In [5]:
# create neighborhood nodes with property value attributes
G = pynx_to_neo4j.create_pynx_nodes(properties_neighborhood_aggregated, node_category='neighborhood', \
                                    attribute_columns=list(properties_neighborhood_aggregated.columns))
# create neighborhood nodes with crime attributes
G = pynx_to_neo4j.create_pynx_nodes(crime_neighborhood_aggregated,node_category='neighborhood', \
                                    attribute_columns=list(crime_neighborhood_aggregated.columns), \
                                    existing_graph=G)
# create neighborhood to neighborhood edges
G = pynx_to_neo4j.add_edges_to_pynx(G, "NEXT_TO", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
                                    "neighborhood",bidirectional=True, polygon_dict_1=neighborhoods, \
                                    polygon_dict_2=neighborhoods)


100%|██████████| 98/98 [00:00<00:00, 252637.86it/s]
100%|██████████| 2/2 [00:00<00:00, 977.81it/s]
100%|██████████| 98/98 [00:00<00:00, 372658.02it/s]
100%|██████████| 1/1 [00:00<00:00, 817.76it/s]
 10%|█         | 480/4753 [00:00<00:00, 4775.41it/s]

creating nodes
creating attributes
creating nodes
creating attributes
iterating through all possible edge relationships


TopologyException: side location conflict at -87.648784708051664 41.939987536474959
 61%|██████    | 2885/4753 [00:00<00:00, 5591.88it/s]TopologyException: side location conflict at -87.780022286293374 41.997413553897694
 71%|███████   | 3371/4753 [00:00<00:00, 5153.54it/s]TopologyException: side location conflict at -87.664020791475323 41.947275771403895
TopologyException: side location conflict at -87.624576496994393 41.896975362507106
 81%|████████  | 3842/4753 [00:00<00:00, 4997.56it/s]TopologyException: side location conflict at -87.836580878737252 41.986396112425794
TopologyException: side location conflict at -87.780022286293374 41.997413553897694
100%|██████████| 4753/4753 [00:00<00:00, 5228.90it/s]


In [6]:
# create tract nodes with property value attributes
G = pynx_to_neo4j.create_pynx_nodes(properties_tract_aggregated, node_category='tract', \
                                    attribute_columns=list(properties_tract_aggregated.columns), existing_graph=G)
# create census tract to census tract edges
G = pynx_to_neo4j.add_edges_to_pynx(G, "NEXT_TO", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
                                    "tract", bidirectional=True, polygon_dict_1=tracts, \
                                    polygon_dict_2=tracts)





100%|██████████| 801/801 [00:00<00:00, 387099.61it/s]
100%|██████████| 2/2 [00:00<00:00, 146.31it/s]
  1%|          | 1703/320400 [00:00<00:18, 17029.37it/s]

creating nodes
creating attributes
iterating through all possible edge relationships


100%|██████████| 320400/320400 [00:15<00:00, 21126.45it/s]


In [7]:
# create unidirectional edges between census tract and neighborhood
G = pynx_to_neo4j.add_edges_to_pynx(G, "CONTAINS", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
                                    "neighborhood", "tract", bidirectional=False, polygon_dict_1=neighborhoods, \
                                    polygon_dict_2=tracts)
G = pynx_to_neo4j.add_edges_to_pynx(G, "IS_WITHIN", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
                                    "tract", "neighborhood", bidirectional=False, polygon_dict_1=tracts, \
                                    polygon_dict_2=neighborhoods)




  4%|▎         | 2807/78498 [00:00<00:06, 12402.91it/s]

iterating through all possible edge relationships


100%|██████████| 78498/78498 [00:06<00:00, 11524.67it/s]
  3%|▎         | 2091/78498 [00:00<00:07, 10264.52it/s]

iterating through all possible edge relationships


100%|██████████| 78498/78498 [00:06<00:00, 11326.40it/s]


In [8]:
# convert to neo4j query
neo = pynx_to_neo4j.pynx_to_neo4j_queries(G, return_nodes=True, return_edges=True)

100%|██████████| 899/899 [00:00<00:00, 106231.28it/s]
100%|██████████| 9332/9332 [00:00<00:00, 222832.77it/s]

creating node queries
creating edge queries





In [12]:
# save as txt file

with open('graph_models/neo.txt', 'w') as neo_text:
    for listitem in neo:
        neo_text.write('%s\n' % listitem)