In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [2]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [3]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [4]:
CITY = 'boston'

In [5]:
tracts_gdf = gpd.read_file('zip://../../data/boston/mobile-phone/Boston_blocks.geojson.zip')
tracts_gdf = tracts_gdf[['geometry', 'GEOID']]
tracts_gdf.head()

Unnamed: 0,geometry,GEOID
0,"MULTIPOLYGON (((-70.75132 42.22292, -70.74878 ...",250235051014
1,"MULTIPOLYGON (((-71.13758 42.34865, -71.13638 ...",250250007012
2,"MULTIPOLYGON (((-71.17155 42.25029, -71.17166 ...",250214024001
3,"MULTIPOLYGON (((-71.02917 42.56981, -71.02408 ...",250092101001
4,"MULTIPOLYGON (((-70.89446 42.48588, -70.89021 ...",250092031007


In [6]:
sql = """
SELECT b.*
FROM blocks_group b
WHERE b.city='{city}'
""".format(city=CITY)

blocks_gdf = gpd.GeoDataFrame.from_postgis(sql, engine, geom_col='geom')
blocks_gdf['GEOID'] = blocks_gdf['original_id']
blocks_gdf.head()

Unnamed: 0,bid,original_id,geom,city,GEOID
0,501727,250250102042,"MULTIPOLYGON (((-71.10680 42.34857, -71.10600 ...",boston1m,250250102042
1,501726,250250811002,"MULTIPOLYGON (((-71.11230 42.33200, -71.11024 ...",boston1m,250250811002
2,501725,250250809001,"MULTIPOLYGON (((-71.10243 42.33515, -71.10031 ...",boston1m,250250809001
3,501724,250250811001,"MULTIPOLYGON (((-71.10850 42.33060, -71.10646 ...",boston1m,250250811001
4,501723,250250812002,"MULTIPOLYGON (((-71.10955 42.32337, -71.10654 ...",boston1m,250250812002


## ODs

In [7]:
zip_file = ZipFile('../../data/boston/mobile-phone/travel_demand_Boston_blocks.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_Boston_blocks.csv' compress_type=deflate filemode='-rw-rw-r--' file_size=1909433090 compress_size=344647481>]

In [8]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Block'] = str
types['D_Block'] = str
types['HBW'] = np.float32
types['HBO'] = np.float32
types['NHB'] = np.float32
types['lon1'] = np.float32
types['lat1'] = np.float32
types['lon2'] = np.float32
types['lat2'] = np.float32

travel_df = pd.read_csv(zip_file.open('travel_demand_Boston_blocks.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)
travel_df.head()

Unnamed: 0,O_Block,D_Block,HBW,HBO,NHB,0,1,2,3,4,...,15,16,17,18,19,20,21,22,23,tot
0,250250504001,250173335013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,250173504001,250173835022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,250173746003,250092047022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,250173419023,250214225022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,250235307002,250092056002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
travel_df = travel_df[['O_Block', 'D_Block', 'HBW', 'HBO', 'NHB', 'tot']]

In [10]:
sql = """
SELECT b.bid, b.original_id as "GEOID", s.sp_id 
FROM blocks_group b
INNER JOIN spatial_groups s ON b.bid = ANY(s.lower_ids) AND b.city = s.city
WHERE b.city='{city}'
""".format(city=CITY)

blocks2spid_df = pd.read_sql(sql, engine)
blocks2spid_df.head()

Unnamed: 0,bid,GEOID,sp_id
0,501173,250250005023,349935
1,501175,250250007015,349935
2,501176,250250007011,349935
3,501178,250250005024,349935
4,501179,250250003022,349935


In [11]:
od_sp_groups_df = pd.merge(travel_df, blocks2spid_df[['sp_id', 'GEOID']].rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Block'], as_index=False).sum()

od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_df[['sp_id', 'GEOID']].rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
0,349935,349935,13920.0,55700.0,46600.0,116220.0
1,349935,349936,9720.0,29640.0,27480.0,66840.0
2,349935,349937,24500.0,76680.0,64080.0,165260.0
3,349935,349938,24320.0,70100.0,54460.0,148880.0
4,349935,349939,25020.0,63420.0,52960.0,141400.0


In [12]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [13]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
0,349935,349935,13920.0,55700.0,46600.0,116220.0
1,349935,349936,9720.0,29640.0,27480.0,66840.0
2,349935,349937,24500.0,76680.0,64080.0,165260.0
3,349935,349938,24320.0,70100.0,54460.0,148880.0
4,349935,349939,25020.0,63420.0,52960.0,141400.0


In [14]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot


In [15]:
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
0,349935,349935,13920.0,55700.0,46600.0,116220.0
1,349935,349936,9720.0,29640.0,27480.0,66840.0
2,349935,349937,24500.0,76680.0,64080.0,165260.0
3,349935,349938,24320.0,70100.0,54460.0,148880.0
4,349935,349939,25020.0,63420.0,52960.0,141400.0


## Blocks_attract

In [16]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['bid'])[['bid', 'GEOID']]
blocks2bid_unique_df.head()

Unnamed: 0,bid,GEOID
0,501173,250250005023
1,501175,250250007015
2,501176,250250007011
3,501178,250250005024
4,501179,250250003022


In [17]:
od_bid_groups_df = pd.merge(travel_df[['O_Block', 'D_Block', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Block'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,501173,501173,0.0,0.0,0.0
1,501173,501174,0.0,0.0,0.0
2,501173,501175,0.0,0.0,0.0
3,501173,501176,0.0,20.0,20.0
4,501173,501177,0.0,20.0,20.0


In [18]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='{city}'
""".format(city=CITY)

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,349935,501202
1,349935,501199
2,349935,501185
3,349935,501192
4,349935,501173


In [19]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df = attract_df.reset_index()    
attract_df.head()

Unnamed: 0,o_sp_id,attract
0,349935,210260.0
1,349936,121500.0
2,349937,330520.0
3,349938,321720.0
4,349939,313120.0


### Save "other" trips to out and to in

In [20]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'NHB', 'tot']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] #+ trips_other['NHB'] excluding because they do not live here
#trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,NHB,tot,ntrips
0,349935,349935,46600.0,116220.0,116220.0
1,349935,349936,27480.0,66840.0,66840.0
2,349935,349937,64080.0,165260.0,165260.0
3,349935,349938,54460.0,148880.0,148880.0
4,349935,349939,52960.0,141400.0,141400.0


In [21]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy() #
trips_attract['attract'] = trips_other['NHB']
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.drop(['tot'], axis=1)
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'})
trips_attract.head()

Unnamed: 0,o_sp_id,NHB,ntrips,attract
0,349935,13290600.0,26726500.0,13290600.0
1,349936,7220640.0,14215140.0,7220640.0
2,349937,23219220.0,47492660.0,23219220.0
3,349938,22123100.0,45582780.0,22123100.0
4,349939,22059140.0,46116500.0,22059140.0


In [22]:
trips_attract = attract_df[['o_sp_id', 'attract']]

In [23]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['tot'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,NHB,nout
0,349935,8018260.0,28171820.0
1,349936,4004400.0,15046480.0
2,349937,15562860.0,49375120.0
3,349938,15170840.0,47325740.0
4,349939,16184900.0,47383780.0


In [24]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['tot'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,NHB,nin
0,349935,46600.0,116220.0
1,349936,17320.0,42620.0
2,349937,125400.0,299540.0
3,349938,112880.0,268220.0
4,349939,119540.0,276500.0


In [25]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,NHB_x,nin,NHB_y,nout
0,349935,210260.0,46600.0,116220.0,8018260.0,28171820.0
1,349936,121500.0,17320.0,42620.0,4004400.0,15046480.0
2,349937,330520.0,125400.0,299540.0,15562860.0,49375120.0
3,349938,321720.0,112880.0,268220.0,15170840.0,47325740.0
4,349939,313120.0,119540.0,276500.0,16184900.0,47383780.0


In [26]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [27]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, '{city}', 'ego', c.nin, c.nout, c.attract
FROM temptable3 c 
""".format(city=CITY)

result = engine.execute(text(sql))

### Save OD

In [28]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,349935,349936,349937,349938,349939,349940,349941,349942,349943,349944,...,350480,350481,350482,350483,350484,350485,350486,350487,350488,350489
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
349935,116220.0,66840.0,165260.0,148880.0,141400.0,64160.0,110520.0,123020.0,149500.0,154880.0,...,40220.0,75100.0,85980.0,46040.0,90060.0,51800.0,63760.0,81660.0,50620.0,77560.0
349936,67800.0,42620.0,92640.0,81820.0,78080.0,35820.0,66520.0,73900.0,81480.0,86360.0,...,23960.0,37700.0,36040.0,28240.0,45340.0,29720.0,34840.0,42200.0,25840.0,35020.0
349937,161340.0,90600.0,299540.0,283760.0,284960.0,78140.0,147960.0,171500.0,240120.0,280320.0,...,67340.0,125060.0,185840.0,77960.0,170000.0,84760.0,104500.0,148760.0,84840.0,155520.0
349938,145040.0,79800.0,283080.0,268220.0,271920.0,70080.0,131240.0,153540.0,223480.0,264220.0,...,64100.0,120200.0,189820.0,73140.0,168000.0,80160.0,99040.0,145320.0,81620.0,157920.0
349939,136040.0,76140.0,282860.0,270840.0,276500.0,62080.0,121500.0,142840.0,217240.0,264420.0,...,65080.0,120520.0,199800.0,75440.0,174020.0,80860.0,100200.0,148940.0,80720.0,165280.0


In [29]:
ODs_matrix_df['city'] = CITY

In [30]:
ODs_matrix_df.to_csv('../../data/generated_files/{city}_ODs.csv'.format(city=CITY))

In [31]:
weights_df = pd.read_csv('../../data/generated_files/spatial_dmatrix.csv',
                                     names=['o_sp_id', 'd_sp_id', 'city', 'spatial_name', 'w'], dtype={
                    'w': np.float32,
                    'o_sp_id': str,
                    'd_sp_id': str
                })
weights_df = weights_df[(weights_df['spatial_name'] == 'ego')]
weights_df = weights_df.sort_values(['city', 'o_sp_id'])

weights_df = weights_df[weights_df.city == CITY]

weights_df = weights_df.sort_values(['city', 'o_sp_id']).reset_index(drop=True)
weights_df.head()

Unnamed: 0,o_sp_id,d_sp_id,city,spatial_name,w


### Ambient population

In [32]:
ambient_df = pd.read_csv('../../data/boston/mobile-phone/hourly_stay_Boston_blocks.csv', dtype={'tract': str})
ambient_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,16,17,18,19,20,21,22,23,lon,lat
0,250251101032,526.0,528.0,522.0,516.0,520.0,514.0,506.0,440.0,366.0,...,230.0,260.0,314.0,390.0,466.0,468.0,472.0,462.0,-71.122904,42.292395
1,250251101033,1248.0,1280.0,1288.0,1292.0,1292.0,1284.0,1266.0,1100.0,960.0,...,1150.0,1270.0,1322.0,1326.0,1296.0,1322.0,1350.0,1342.0,-71.117419,42.298185
2,250251101031,302.0,308.0,302.0,310.0,312.0,350.0,316.0,254.0,258.0,...,338.0,344.0,376.0,278.0,278.0,272.0,276.0,266.0,-71.109236,42.299882
3,250251101036,2182.0,2164.0,2160.0,2152.0,2146.0,2138.0,2038.0,1918.0,1706.0,...,1572.0,1834.0,1892.0,1906.0,1966.0,1966.0,2040.0,2034.0,-71.115564,42.291985
4,250251101037,1826.0,1814.0,1802.0,1796.0,1798.0,1726.0,1634.0,1360.0,816.0,...,1176.0,1356.0,1550.0,1580.0,1626.0,1600.0,1632.0,1622.0,-71.120349,42.29203


In [33]:
ambient_sp_id_df = pd.merge(ambient_df, blocks_gdf[['bid', 'GEOID']].rename(columns={'GEOID': 'tract'}), on='tract')
ambient_sp_id_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,17,18,19,20,21,22,23,lon,lat,bid
0,250251101032,526.0,528.0,522.0,516.0,520.0,514.0,506.0,440.0,366.0,...,260.0,314.0,390.0,466.0,468.0,472.0,462.0,-71.122904,42.292395,501569
1,250251101033,1248.0,1280.0,1288.0,1292.0,1292.0,1284.0,1266.0,1100.0,960.0,...,1270.0,1322.0,1326.0,1296.0,1322.0,1350.0,1342.0,-71.117419,42.298185,501666
2,250251101031,302.0,308.0,302.0,310.0,312.0,350.0,316.0,254.0,258.0,...,344.0,376.0,278.0,278.0,272.0,276.0,266.0,-71.109236,42.299882,501578
3,250251101036,2182.0,2164.0,2160.0,2152.0,2146.0,2138.0,2038.0,1918.0,1706.0,...,1834.0,1892.0,1906.0,1966.0,1966.0,2040.0,2034.0,-71.115564,42.291985,501560
4,250251101037,1826.0,1814.0,1802.0,1796.0,1798.0,1726.0,1634.0,1360.0,816.0,...,1356.0,1550.0,1580.0,1626.0,1600.0,1632.0,1622.0,-71.120349,42.29203,501682


In [34]:
ambient_sp_id_df['ambient_avg'] = ambient_sp_id_df[[str(x) for x in range(0,24)]].mean(axis=1)
ambient_sp_id_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,bid,ambient_avg
0,250251101032,526.0,528.0,522.0,516.0,520.0,514.0,506.0,440.0,366.0,...,314.0,390.0,466.0,468.0,472.0,462.0,-71.122904,42.292395,501569,390.083333
1,250251101033,1248.0,1280.0,1288.0,1292.0,1292.0,1284.0,1266.0,1100.0,960.0,...,1322.0,1326.0,1296.0,1322.0,1350.0,1342.0,-71.117419,42.298185,501666,1168.083333
2,250251101031,302.0,308.0,302.0,310.0,312.0,350.0,316.0,254.0,258.0,...,376.0,278.0,278.0,272.0,276.0,266.0,-71.109236,42.299882,501578,303.666667
3,250251101036,2182.0,2164.0,2160.0,2152.0,2146.0,2138.0,2038.0,1918.0,1706.0,...,1892.0,1906.0,1966.0,1966.0,2040.0,2034.0,-71.115564,42.291985,501560,1810.75
4,250251101037,1826.0,1814.0,1802.0,1796.0,1798.0,1726.0,1634.0,1360.0,816.0,...,1550.0,1580.0,1626.0,1600.0,1632.0,1622.0,-71.120349,42.29203,501682,1358.5


In [35]:
ambient_sp_id_df[['bid', 'ambient_avg']].to_sql('temptable3', engine, if_exists='replace', index=False)

In [36]:
sql = """
INSERT INTO ambient_population (bid, city, num_people) 
SELECT c.bid, '{city}', c.ambient_avg
FROM temptable3 c 
""".format(city=CITY)

result = engine.execute(text(sql))