In [1]:
import pandas as pd
import geopy.distance
import numpy as np

cityDf = pd.read_csv("city-basics_iowa.csv")
cityDf = cityDf.drop(['Unnamed: 0', 'Area (sqmi)', 'Area (sqkm)', 'WikiURL', 'Counties'], axis=1)

childCare = pd.read_csv('iowa-childcare.csv')
childCare = childCare.drop(['Unnamed: 0', 'Lat', 'Lon'], axis=1)

evictions = pd.read_csv('iowa-evictions.csv')
evictions = evictions.drop(['Unnamed: 0', 'Lat', 'Lon'], axis=1)

scalarDf = pd.merge(pd.merge(cityDf, childCare, on="City"), evictions, on="City")

def getDistance(row):
    hLat = row['lat home']
    hLon = row['lon home']
    wLat = row['lat work']
    wLon = row['lon work']
    d = geopy.distance.vincenty((hLat, hLon), (wLat, wLon)).km
    return d

jobsIn = pd.read_csv('iowa-work-flow.csv').drop(['Unnamed: 0'], axis=1)
jobsIn['Distance'] = jobsIn.apply(getDistance, axis=1)

def getPop(city):
    return float(cityDf[cityDf['City'] == city]['Population'])

jobsIn['Population'] = jobsIn['work city'].apply(getPop)

def calcMagnitude(row):
    return row['total jobs'] / row['City Jobs']

cities = list(jobsIn['work city'].unique())
totalJobs = dict()

for city in cities:
    totalJobs[city] = jobsIn[jobsIn['work city'] == city]['total jobs'].sum()
    
totalJobsDF = pd.DataFrame(list(totalJobs.items()), columns=["work city","City Jobs"])

jobsIn = pd.merge(totalJobsDF, jobsIn, on=["work city"])

jobsIn['normalized jobs'] = jobsIn.apply(calcMagnitude, axis=1)
jobsIn['normalized distance'] = jobsIn['Distance'] * jobsIn['normalized jobs']

jobsOut = pd.read_csv('iowa-job-out.csv').drop(["Unnamed: 0"], axis=1)
jobsOut['Distance'] = jobsOut.apply(getDistance, axis=1)
jobsOut['Population'] = jobsOut['home city'].apply(getPop)

cities = list(jobsOut['home city'].unique())
totalJobs = dict()

for city in cities:
    totalJobs[city] = jobsOut[jobsOut['home city'] == city]['total jobs'].sum()
    
totalJobsDF = pd.DataFrame(list(totalJobs.items()), columns=["home city","City Jobs"])

jobsOut = pd.merge(totalJobsDF, jobsOut, on=["home city"])

jobsOut['normalized jobs'] = jobsOut.apply(calcMagnitude, axis=1)
jobsOut['normalized distance'] = jobsOut["Distance"] * jobsOut["normalized jobs"]    

dist50 = dict()
dist90 = dict()
dist95 = dict()
dist99 = dict()
for city in cities:
    q = np.quantile(list(jobsIn[jobsIn['work city'] == city]['normalized distance']), [0.5, 0.9, 0.95, 0.99])
    dist50[city] = q[0]
    dist90[city] = q[1]
    dist95[city] = q[2]
    dist99[city] = q[3]
    
temp = pd.merge(pd.DataFrame(list(dist50.items()), columns=["City", "Q50 In"]), 
                pd.DataFrame(list(dist90.items()), columns=["City", "Q90 In"]), on="City")
temp = pd.merge(temp,
                pd.DataFrame(list(dist95.items()), columns=["City", "Q95 In"]), on="City")
temp = pd.merge(temp,
                pd.DataFrame(list(dist99.items()), columns=["City", "Q99 In"]), on="City")

scalarNew = pd.merge(scalarDf, temp, on="City")

dist50 = dict()
dist90 = dict()
dist95 = dict()
dist99 = dict()
for city in cities:
    q = np.quantile(list(jobsOut[jobsOut['home city'] == city]['normalized distance']), [0.5, 0.9, 0.95, 0.99])
    dist50[city] = q[0]
    dist90[city] = q[1]
    dist95[city] = q[2]
    dist99[city] = q[3]
    
temp = pd.merge(pd.DataFrame(list(dist50.items()), columns=["City", "Q50 Out"]), 
                pd.DataFrame(list(dist90.items()), columns=["City", "Q90 Out"]), on="City")
temp = pd.merge(temp,
                pd.DataFrame(list(dist95.items()), columns=["City", "Q95 Out"]), on="City")
temp = pd.merge(temp,
                pd.DataFrame(list(dist99.items()), columns=["City", "Q99 Out"]), on="City")

scalarNew = pd.merge(scalarNew, temp, on="City")

propLocalsWorking = jobsOut[jobsOut["home city"] == jobsOut["work city"]][['home city', 'normalized jobs']]
propWorkersLocal = jobsIn[jobsIn["home city"] == jobsIn["work city"]][['home city', 'normalized jobs']]
propLocalsWorking.columns = ['City', 'Prop Locals Working']
propWorkersLocal.columns = ['City', 'Prop Workers Local']
temp = pd.merge(propLocalsWorking, propWorkersLocal, on="City")
scalarNew = pd.merge(temp, scalarNew, on="City")
scalarNew = scalarNew.rename(columns={"Capacity" : "Child Care Capacity"})
scalarNew['Scaled Child Care'] = scalarNew['Child Care Capacity'] / scalarNew['Population']

In [5]:
import holoviews as hv
from bokeh.sampledata.airport_routes import routes, airports

hv.extension('bokeh', 'matplotlib')

city = "Le Mars, IA"
threshold = 0.05
dataSet = jobsIn

inChords = dataSet[["work city", "home city", "total jobs", "normalized jobs"]]
inChords = inChords[inChords["home city"] == city]
inChords = inChords[inChords["normalized jobs"] > threshold]
values = list(range(0, len(inChords["work city"].unique())))
keys = inChords["work city"].unique()
cityToId = dict(zip(keys, values))
idToCity = dict(zip(values, keys))
inChords["work id"] = inChords["work city"].apply(lambda x: int(cityToId[x]))
inChords["home id"] = inChords["home city"].apply(lambda x: int(cityToId[x]))
inChords["total jobs"] = inChords["total jobs"].apply(lambda x: int(x))
inChords = inChords[["work id", "home id", "total jobs"]]
citiesDF = pd.DataFrame.from_dict(cityToId, orient="index").reset_index()
citiesDF.columns = ["Cities", "ID"]
citiesDF["ID"] = citiesDF["ID"].apply(lambda x: int(x))
citiesDF["Cities"] = citiesDF["Cities"].astype('|S')
nodes = hv.Dataset(citiesDF, "ID", "Cities")
chords = hv.Chord((inChords, nodes), ["home id", "work id"], ["total jobs"])

%opts Chord [edge_color_index='work id' label_index='Cities' color_index='home id' width=1000 height=1000]
%opts Chord (cmap='Category20' edge_cmap='Category20')
%output filename="le_mars-plot-0.05" fig="png"
chords

  zip(columns, data)])
