In [64]:
import json
import csv
import numpy as np
import pandas as pd
import os
import pickle

## Load GeoJson

In [9]:
#load json file
with open("../Snowplow_data_orig/20170316.geojson","r") as f:
    data = json.load(f)

In [12]:
#number of data points
len(data['features'])

44768

In [13]:
#sample data point
data['features'][3456]

{'geometry': {'coordinates': [-76.11051293100526, 43.054771610537394],
  'type': 'Point'},
 'properties': {'FID': 9457,
  'activity_type': '    Stop Moving',
  'address': '1244 CANAL ST SYRACUSE 13210                      ',
  'date_fixed': '2017-03-16T07:25:18.000Z',
  'date_fixed1': '2017-03-16T07:25:19.000Z',
  'latitude': 43.0547716105374,
  'longitude': -76.1105129310053,
  'repair_type': 'SNOW & ICE',
  'truck_name': '285             '},
 'type': 'Feature'}

## Paths to files

In [63]:
march_files = [ ['../Snowplow/20170313.csv','../Snowplow_data_orig/20170313.geojson'] , 
                ['../Snowplow/20170314.csv','../Snowplow_data_orig/20170314.geojson'] ,
                ['../Snowplow/20170315.csv','../Snowplow_data_orig/20170315.geojson'] ,
                ['../Snowplow/20170316.csv','../Snowplow_data_orig/20170316.geojson']   ]

jan_files_1_4 = [   ['../Snowplow/20180101.csv', '../Snowplow_data_orig/20180101.geojson'] ,
                    ['../Snowplow/20180102.csv', '../Snowplow_data_orig/20180102.geojson'] ,
                    ['../Snowplow/20180103.csv', '../Snowplow_data_orig/20180103.geojson'] ,
                    ['../Snowplow/20180104.csv', '../Snowplow_data_orig/20180104.geojson']  ]

jan_files_6_9 = [   ['../Snowplow/20180106.csv', '../Snowplow_data_orig/20180106.geojson'] ,
                    ['../Snowplow/20180107.csv', '../Snowplow_data_orig/20180107.geojson'] ,
                    ['../Snowplow/20180108.csv', '../Snowplow_data_orig/20180108.geojson'] ,
                    ['../Snowplow/20180109.csv', '../Snowplow_data_orig/20180109.geojson']  ]


## Create CSV's (note: need to manually change iterable for each file path list)

In [62]:
for each in march_files:
    with open(each[1],"r") as f:
        data = json.load(f)
    csv_data = open(each[0], 'w', newline='')
    csvwriter = csv.writer(csv_data)
    csvwriter.writerow(["truck_name", "date_fixed", "date_fixed2","address","longitude", "latitude"])
    for each in data['features']:
        csvwriter.writerow([    each["properties"]["truck_name"], 
                                each["properties"]["date_fixed"],
                                each["properties"]["date_fixed1"],
                                each["properties"]["address"], 
                                each["properties"]["longitude"], 
                                each["properties"]["latitude"]    ])
    csv_data.close()
    

## Convert any multiple coordinate features to a bounding box (2 coords)
#### In order to map points to a corresponding line segment, we must reduce all features of the City Streets GeoJSON file to a bounding box of just two coords (i.e. upper right and lower left corner). There are 1691 features that have more than 2 coords (i.e. Not a line)

In [65]:
with open('City_Streets_2011.geojson') as f_in:
    data = json.load(f_in)

In [66]:
#How many features (i.e. road segments)
len(data['features'])

5650

In [67]:
#Number features with 2 coords
cnt = 0
for each in data['features']:
    if len(each['geometry']['coordinates']) == 2:
        cnt += 1
print (cnt)

3959


In [68]:
#Number features with >2 coords (e.g. circular road segments)
cnt = 0
for each in data['features']:
    if len(each['geometry']['coordinates']) > 2:
        cnt += 1
print (cnt)

1691


In [188]:
test_list = [[-76.1505499459102,43.0820257566861],[-76.1493841368104,43.0827927099946],[-76.148718240551,43.0833314101819],
             [-76.148097930336,43.0838404394666]]

In [69]:
#INPUT: list of coordinates forming a path
#OUTPUT: list of 2 coordinates which create the bounding box of all the input coordinates
def create_bound_box(arr):
    lon = []
    lat = []
    for each in arr:
        lon.append(each[0])
        lat.append(each[1])
    return [[max(lon),max(lat)],[min(lon),min(lat)]]
        
        

In [190]:
#test the function
create_bound_box(test_list)

[[-76.148097930336, 43.0838404394666], [-76.1505499459102, 43.0820257566861]]

In [191]:
#making sure 'OBJECTID' is unique and matches the number of features
cnt = 0
for each in data['features']:
    if each['properties']['OBJECTID'] > 5000:
        cnt += 1
print (cnt)

650


In [70]:
#NOTE: NOT USED! Initial implementation as a list. Dictionary below is better.
#for each feature (road segment), if a path of multiple coords, convert to a bounding box of 2 coords. Store results in a
#list of format: [ [OBJECTID, [ [lon1,lat1],[lon2,lon2] ] ] ]
#This is a mapping from the OBJECTID in the GeoJSON file to its bounding box

#objID_to_bnd_box = []
#for each in data['features']:
#   if len(each['geometry']['coordinates']) == 2:
#        objID_to_bnd_box.append([each['properties']['OBJECTID'],each['geometry']['coordinates']])
#    else:
#        objID_to_bnd_box.append([each['properties']['OBJECTID'],create_bound_box(each['geometry']['coordinates'])])


In [82]:
#USED
#for each feature (road segment), if a path of multiple coords, convert to a bounding box of 2 coords.
#This is a mapping from the OBJECTID in the GeoJSON file to its bounding box
objID_to_bnd_box = {}
for each in data['features']:
    if len(each['geometry']['coordinates']) == 2:
        objID_to_bnd_box[each['properties']['OBJECTID']] = each['geometry']['coordinates']
    else:
        objID_to_bnd_box[each['properties']['OBJECTID']] = create_bound_box(each['geometry']['coordinates'])


In [84]:
objID_to_bnd_box[5002]

[[-76.1162203864598, 43.0189868728735], [-76.1163481970452, 43.0179750500172]]

In [89]:
#check if exactly two items in each sublist (i.e. OBJECTID and list of coords)
for _,v in objID_to_bnd_box.items():
    if len(v) != 2:
        print ('F')
        break
print ('T')    

T


In [91]:
len(objID_to_bnd_box)

5650

## Save ID--->Bound Box dictionary as pickle

In [92]:
pickle.dump( objID_to_bnd_box, open( "ID_to_bound_box", "wb" ) )

## Load ID--->Bound Box dictionary from pickle

In [93]:
objID_to_bnd_box = pickle.load( open( "ID_to_bound_box", "rb" ) )

In [None]:
#INPUT: 1) DataFrame, which includes coordinates at certain times 2) file containing OBJECTID and corresponding bounding box 
#          info 
#OUTPUT: Dictionary of format: {[time_plowed,OBJECTID],[time_plowed,OBJECTID],......}


In [None]:
#Pickle template
pickle.dump( object_to_pickle, open( "filename_here", "wb" ) )