In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [3]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [4]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [5]:
CITY = 'boston'
SPATIAL_NAME = 'core'

In [8]:
tracts_gdf = gpd.read_file('zip://../../data/boston/mobile-phone/Boston_blocks.geojson.zip')
tracts_gdf = tracts_gdf[['geometry', 'GEOID']]
tracts_gdf.head()

Unnamed: 0,geometry,GEOID
0,"MULTIPOLYGON (((-70.75132 42.22292, -70.74878 ...",250235051014
1,"MULTIPOLYGON (((-71.13758 42.34865, -71.13638 ...",250250007012
2,"MULTIPOLYGON (((-71.17155 42.25029, -71.17166 ...",250214024001
3,"MULTIPOLYGON (((-71.02917 42.56981, -71.02408 ...",250092101001
4,"MULTIPOLYGON (((-70.89446 42.48588, -70.89021 ...",250092031007


In [9]:
sql = """
SELECT b.*
FROM blocks_group b
WHERE b.city='{city}'
""".format(city=CITY)

blocks_gdf = gpd.GeoDataFrame.from_postgis(sql, engine, geom_col='geom')
blocks_gdf['GEOID'] = blocks_gdf['original_id']
blocks_gdf.head()

Unnamed: 0,bid,original_id,geom,city,GEOID
0,423768,250250005023,"MULTIPOLYGON (((-71.15753 42.33978, -71.15529 ...",boston,250250005023
1,423769,250250003013,"MULTIPOLYGON (((-71.17239 42.35436, -71.17029 ...",boston,250250003013
2,423770,250250007015,"MULTIPOLYGON (((-71.14152 42.34629, -71.14037 ...",boston,250250007015
3,423771,250250007011,"MULTIPOLYGON (((-71.13778 42.34834, -71.13596 ...",boston,250250007011
4,423772,250250008022,"MULTIPOLYGON (((-71.13820 42.35709, -71.13140 ...",boston,250250008022


## ODs

In [10]:
zip_file = ZipFile('../../data/boston/mobile-phone/travel_demand_Boston_blocks.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_Boston_blocks.csv' compress_type=deflate filemode='-rw-rw-r--' file_size=1909433090 compress_size=344647481>]

In [11]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Block'] = str
types['D_Block'] = str
types['HBW'] = np.float32
types['HBO'] = np.float32
types['NHB'] = np.float32
types['lon1'] = np.float32
types['lat1'] = np.float32
types['lon2'] = np.float32
types['lat2'] = np.float32

travel_df = pd.read_csv(zip_file.open('travel_demand_Boston_blocks.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)
travel_df.head()

Unnamed: 0,O_Block,D_Block,HBW,HBO,NHB,0,1,2,3,4,...,15,16,17,18,19,20,21,22,23,tot
0,250250504001,250173335013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,250173504001,250173835022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,250173746003,250092047022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,250173419023,250214225022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,250235307002,250092056002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
travel_df = travel_df[['O_Block', 'D_Block', 'HBW', 'HBO', 'NHB', 'tot']]

In [17]:
sql = """
SELECT b.bid, b.original_id as "GEOID", s.sp_id 
FROM blocks_group b
INNER JOIN spatial_groups s ON b.bid = ANY(s.lower_ids) AND b.city = s.city
WHERE b.city='{city}' and s.spatial_name = '{spname}'
""".format(city=CITY, spname=SPATIAL_NAME)

blocks2spid_df = pd.read_sql(sql, engine)
blocks2spid_df.head()

Unnamed: 0,bid,GEOID,sp_id
0,424192,250251201031,357031
1,424016,250250904003,357032
2,424183,250251002001,357012
3,424291,250251401022,357013
4,424179,250251304042,357014


In [18]:
od_sp_groups_df = pd.merge(travel_df, blocks2spid_df[['sp_id', 'GEOID']].rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Block'], as_index=False).sum()

od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_df[['sp_id', 'GEOID']].rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
0,357009,357009,0.0,60.0,100.0,160.0
1,357009,357010,0.0,20.0,0.0,20.0
2,357009,357011,0.0,20.0,0.0,20.0
3,357009,357012,0.0,100.0,0.0,100.0
4,357009,357013,0.0,60.0,180.0,240.0


In [19]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [20]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
0,357009,357009,0.0,60.0,100.0,160.0
1,357009,357010,0.0,20.0,0.0,20.0
2,357009,357011,0.0,20.0,0.0,20.0
3,357009,357012,0.0,100.0,0.0,100.0
4,357009,357013,0.0,60.0,180.0,240.0


In [21]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
6,357009,357015,0.0,0.0,0.0,0.0
7,357009,357016,0.0,0.0,0.0,0.0
8,357009,357017,0.0,0.0,0.0,0.0
12,357009,357021,0.0,0.0,0.0,0.0
13,357009,357022,0.0,0.0,0.0,0.0


In [22]:
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBW,HBO,NHB,tot
0,357009,357009,0.0,60.0,100.0,160.0
1,357009,357010,0.0,20.0,0.0,20.0
2,357009,357011,0.0,20.0,0.0,20.0
3,357009,357012,0.0,100.0,0.0,100.0
4,357009,357013,0.0,60.0,180.0,240.0


## Blocks_attract

In [23]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['bid'])[['bid', 'GEOID']]
blocks2bid_unique_df.head()

Unnamed: 0,bid,GEOID
0,424192,250251201031
1,424016,250250904003
2,424183,250251002001
3,424291,250251401022
4,424179,250251304042


In [24]:
od_bid_groups_df = pd.merge(travel_df[['O_Block', 'D_Block', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Block'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Block', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,423768,423768,0.0,0.0,0.0
1,423768,423769,0.0,0.0,0.0
2,423768,423770,0.0,0.0,0.0
3,423768,423771,0.0,20.0,20.0
4,423768,423772,0.0,20.0,20.0


In [27]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='{city}' and spatial_name = '{spname}'
""".format(city=CITY, spname=SPATIAL_NAME)

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,357031,424192
1,357032,424016
2,357012,424183
3,357013,424291
4,357014,424179


In [28]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df = attract_df.reset_index()    
attract_df.head()

Unnamed: 0,o_sp_id,attract
0,357009,21020.0
1,357010,2280.0
2,357011,4100.0
3,357012,8520.0
4,357013,15700.0


### Save "other" trips to out and to in

In [29]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'NHB', 'tot']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] #+ trips_other['NHB'] excluding because they do not live here
#trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,NHB,tot,ntrips
0,357009,357009,100.0,160.0,160.0
1,357009,357010,0.0,20.0,20.0
2,357009,357011,0.0,20.0,20.0
3,357009,357012,0.0,100.0,100.0
4,357009,357013,180.0,240.0,240.0


In [30]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy() #
trips_attract['attract'] = trips_other['NHB']
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.drop(['tot'], axis=1)
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'})
trips_attract.head()

Unnamed: 0,o_sp_id,NHB,ntrips,attract
0,357009,21020.0,57460.0,21020.0
1,357010,2280.0,8460.0,2280.0
2,357011,4100.0,8200.0,4100.0
3,357012,8520.0,17820.0,8520.0
4,357013,15700.0,37300.0,15700.0


In [31]:
trips_attract = attract_df[['o_sp_id', 'attract']]

In [32]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['tot'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,NHB,nout
0,357009,32820.0,54460.0
1,357010,440.0,9340.0
2,357011,2280.0,8160.0
3,357012,6420.0,16580.0
4,357013,13540.0,39660.0


In [33]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['tot'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,NHB,nin
0,357009,100.0,160.0
1,357010,0.0,0.0
2,357011,0.0,0.0
3,357012,0.0,60.0
4,357013,120.0,220.0


In [34]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,NHB_x,nin,NHB_y,nout
0,357009,21020.0,100.0,160.0,32820.0,54460.0
1,357010,2280.0,0.0,0.0,440.0,9340.0
2,357011,4100.0,0.0,0.0,2280.0,8160.0
3,357012,8520.0,0.0,60.0,6420.0,16580.0
4,357013,15700.0,120.0,220.0,13540.0,39660.0


In [35]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [36]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, '{city}', '{spname}', c.nin, c.nout, c.attract
FROM temptable3 c 
""".format(city=CITY, spname=SPATIAL_NAME)

result = engine.execute(text(sql))

### Save OD

In [37]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,357009,357010,357011,357012,357013,357014,357015,357016,357017,357018,...,357554,357555,357556,357557,357558,357559,357560,357561,357562,357563
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
357009,160.0,20.0,20.0,100.0,240.0,20.0,0.0,0.0,0.0,40.0,...,0.0,60.0,60.0,0.0,20.0,20.0,0.0,220.0,160.0,220.0
357010,20.0,0.0,0.0,0.0,60.0,0.0,0.0,80.0,0.0,0.0,...,0.0,40.0,0.0,0.0,20.0,100.0,0.0,60.0,0.0,20.0
357011,60.0,0.0,0.0,20.0,60.0,0.0,0.0,0.0,0.0,0.0,...,320.0,140.0,20.0,0.0,0.0,20.0,0.0,20.0,0.0,0.0
357012,100.0,0.0,0.0,60.0,60.0,0.0,0.0,20.0,0.0,60.0,...,20.0,0.0,380.0,0.0,80.0,80.0,0.0,20.0,20.0,60.0
357013,240.0,40.0,40.0,120.0,220.0,20.0,0.0,0.0,0.0,60.0,...,40.0,40.0,180.0,20.0,80.0,240.0,0.0,240.0,0.0,220.0


In [38]:
ODs_matrix_df['city'] = CITY

In [39]:
ODs_matrix_df.to_csv('../../data/generated_files/{city}_{spname}_ODs.csv'.format(city=CITY, spname=SPATIAL_NAME))