In [15]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas
import pysal as ps
import os
import json

In [16]:
path = 'ProgressDataOutputArchive/AMOEBA'
if not os.path.exists(path):
    os.mkdir(path)
    
path = 'ProgressDataOutputArchive/AMOEBA/DataPrepare'
if not os.path.exists(path):
    os.mkdir(path)
    
path = 'ProgressDataOutputArchive/AMOEBA/Results'
if not os.path.exists(path):
    os.mkdir(path)

In [17]:
data = pd.read_csv('ProgressDataOutputArchive/data_clean.csv')
del data['X']
data.head()

Unnamed: 0,InterIdNew,Weekend,TimeSlot,Left,Right,Straight,Latitude,Longitude
0,Atlanta_0,0,EveningBusy(16:00-20:00),0.0,5.202937,0.0,33.79166,-84.43003
1,Atlanta_0,0,Midnight(22:00-07:00),0.824444,0.0,0.0,33.79166,-84.43003
2,Atlanta_0,0,MorningBusy(07:00-10:00),9.147466,0.0,0.0,33.79166,-84.43003
3,Atlanta_0,0,NormalDay(10:00-16:00),41.956263,0.0,0.0,33.79166,-84.43003
4,Atlanta_0,0,NormalNight(20:00-22:00),0.0,0.0,0.0,33.79166,-84.43003


In [18]:
data_atl = data.loc[data['InterIdNew'].str.contains('Atlanta')].copy()
data_atl.sample(10)

Unnamed: 0,InterIdNew,Weekend,TimeSlot,Left,Right,Straight,Latitude,Longitude
292,Atlanta_132,1,EveningBusy(16:00-20:00),13.117755,1.445392,0.0,33.77952,-84.38263
3431,Atlanta_6,0,NormalNight(20:00-22:00),34.0,4.0,0.0,33.75712,-84.38424
918,Atlanta_211,1,NormalDay(10:00-16:00),63.0,0.0,0.0,33.65309,-84.36753
102,Atlanta_11,0,NormalDay(10:00-16:00),79.0,74.0,82.0,33.74027,-84.34924
1101,Atlanta_231,1,NormalDay(10:00-16:00),60.0,0.0,14.0,33.78644,-84.49231
480,Atlanta_155,0,Midnight(22:00-07:00),0.0,0.0,0.0,33.74457,-84.39448
1429,Atlanta_278,0,NormalDay(10:00-16:00),33.5,27.0,0.0,33.77129,-84.39072
1560,Atlanta_294,1,EveningBusy(16:00-20:00),37.0,0.0,30.0,33.75766,-84.37211
3044,Atlanta_487,1,EveningBusy(16:00-20:00),36.0,35.0,42.0,33.74005,-84.30968
1661,Atlanta_308,0,MorningBusy(07:00-10:00),26.0,29.5,19.0,33.73684,-84.3946


In [19]:
timewindow_map = {'Weekday_EveningBusy': [0, 'EveningBusy(16:00-20:00)'], 
                  'Weekday_Midnight': [0, 'Midnight(22:00-07:00)'],
                  'Weekday_Morningbusy': [0, 'MorningBusy(07:00-10:00)'],
                  'Weekday_Normalday': [0, 'NormalDay(10:00-16:00)'],
                  'Weekday_Normalnight': [0, 'NormalNight(20:00-22:00)'],
                  'Weekend_Eveningbusy': [1, 'EveningBusy(16:00-20:00)'],
                  'Weekend_Midnight': [1, 'Midnight(22:00-07:00)'],
                  'Weekend_Morningbusy': [1, 'MorningBusy(07:00-10:00)'],
                  'Weekend_Normalday': [1, 'NormalDay(10:00-16:00)'],
                  'Weekend_Normalnight': [1, 'NormalNight(20:00-22:00)'],}

In [20]:
def prepare_data(address, data, timewindow_map):
    # create folder
    path = 'ProgressDataOutputArchive/AMOEBA/DataPrepare/'+address
    if not os.path.exists(path):
        os.mkdir(path)
    
    # filter data
    weekend = timewindow_map[address][0]
    timeslot = timewindow_map[address][1]
    data_new = data.loc[(data['Weekend']==weekend) & (data['TimeSlot']==timeslot)].copy()
    
    # create polygon based on each intersection
    location = data_new.iloc[:, 6:8]
    points = location.to_numpy()
    regions, vertices = ps.lib.cg.voronoi.voronoi(points)
    region_df, point_df = ps.lib.cg.voronoi_frames(points,0.5)
    
    # calculate and save average waiting time
    data_new['Ave'] = (data_new['Left']+data_new['Right']+data_new['Straight'])/3
    csvp = path +'/'+address+ '.csv'
    data_new.to_csv(csvp)
    
    # get and save the neighbors to each polygon
    w = ps.lib.weights.Voronoi(points)
    dic = w.neighbors
    csvp = path +'/'+address+ '_neighbor.csv'
    dic_to_df = pd.DataFrame.from_dict(dic, orient='index')
    fillzero = dic_to_df.fillna(99999).astype(int)
    dic_to_df.to_csv(csvp)
    
    # output shapefile of the polygons
    outfp = path +'/'+address+ '.shp'
    region_df.to_file(outfp)
    
    # output json file of the polygons
    json_path = path +'/'+address+ '_json.json'
    json_str = region_df.to_json()
    json_dic = json.loads(json_str)
    geo_data = json_dic['features']
    for i in geo_data:
        location = i['geometry']['coordinates'][0]
        for cor in location:
            cor[0],cor[1] = cor[1],cor[0]
    with open(json_path,'w') as file:
        json.dump(json_dic,file)

In [21]:
# generate files for each condition
for timewindow in timewindow_map:
    prepare_data(timewindow, data_atl, timewindow_map)