In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [2]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [3]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [5]:
CITY = 'LA'
SPATIAL_NAME = 'core'

In [6]:
tracts_gdf = gpd.read_file('zip://../../data/LA/mobile-phone/census_tracts.zip')
tracts_gdf = tracts_gdf[['geometry', 'GEOID']]
tracts_gdf.head()

Unnamed: 0,geometry,GEOID
0,"POLYGON ((-118.30229 34.25870, -118.30079 34.2...",6037101110
1,"POLYGON ((-118.30333 34.27354, -118.30318 34.2...",6037101122
2,"POLYGON ((-118.29945 34.25598, -118.28592 34.2...",6037101210
3,"POLYGON ((-118.28592 34.24896, -118.28592 34.2...",6037101220
4,"POLYGON ((-118.27247 34.23253, -118.27194 34.2...",6037101300


In [8]:
sql = """
SELECT b.original_id, bid, sp_id
FROM blocks_group b
INNER JOIN spatial_groups as sp on b.bid = ANY(sp.lower_ids)
WHERE b.city='{city}' and sp.spatial_name = '{spname}'
""".format(city=CITY, spname=SPATIAL_NAME)

blocks2spid_df = pd.read_sql(sql, engine)
blocks2spid_df['GEOID'] = blocks2spid_df['original_id'].str[0:11]
blocks2spid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID
0,60372093002,462007,357692,6037209300
1,60372396011,459767,357577,6037239601
2,60372118041,452337,357693,6037211804
3,60371832212,461687,357578,6037183221
4,60371920022,472405,357730,6037192002


In [10]:
njoins_df = blocks2spid_df[['bid', 'GEOID']].drop_duplicates().groupby('GEOID').size().to_frame('count').reset_index()
njoins_df.head()

Unnamed: 0,GEOID,count
0,6037101110,3
1,6037101122,2
2,6037101210,2
3,6037101220,2
4,6037101300,4


In [11]:
blocks2spid_df = pd.merge(blocks2spid_df, njoins_df, on='GEOID')
blocks2spid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID,count
0,60372093002,462007,357692,6037209300,2
1,60372093001,472523,357923,6037209300,2
2,60372396011,459767,357577,6037239601,2
3,60372396012,465222,357899,6037239601,2
4,60372118041,452337,357693,6037211804,2


In [12]:
blocks2spid_df[blocks2spid_df.GEOID=='06037980028']

Unnamed: 0,original_id,bid,sp_id,GEOID,count
2173,60379800281,472486,358967,6037980028,1


In [13]:
blocks2spid_unique_df = blocks2spid_df.drop_duplicates(subset=['GEOID', 'sp_id'])[['GEOID', 'sp_id', 'count']]
blocks2spid_unique_df.head()

Unnamed: 0,GEOID,sp_id,count
0,6037209300,357692,2
1,6037209300,357923,2
2,6037239601,357577,2
3,6037239601,357899,2
4,6037211804,357693,2


In [14]:
blocks2spid_unique_df[blocks2spid_unique_df.GEOID == '06037262302']

Unnamed: 0,GEOID,sp_id,count
1152,6037262302,358084,2
1153,6037262302,359396,2


## ODs

In [15]:
zip_file = ZipFile('../../data/LA/mobile-phone/travel_demand_LA.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_LA.csv' compress_type=deflate filemode='-rw-rw-r--' file_size=991736528 compress_size=149183206>]

In [16]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Tract'] = str
types['D_Tract'] = str
types['HBW'] = np.float32
types['HBO'] = np.float32
types['NHB'] = np.float32
types['lon1'] = np.float32
types['lat1'] = np.float32
types['lon2'] = np.float32
types['lat2'] = np.float32

travel_df = pd.read_csv(zip_file.open('travel_demand_LA.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)
travel_df = travel_df[['O_Tract', 'D_Tract', 'HBW', 'HBO', 'NHB', 'tot']]
travel_df.head()

Unnamed: 0,O_Tract,D_Tract,HBW,HBO,NHB,tot
0,6037406200,6037264102,0.0,0.0,0.0,0.0
1,6037535101,6037570702,0.0,0.0,0.0,0.0
2,6037141202,6037139401,0.0,0.0,0.0,0.0
3,6037500600,6037601802,0.0,0.0,0.0,0.0
4,6037195100,6037405701,0.0,0.0,0.0,0.0


In [17]:
travel_df = travel_df[['O_Tract', 'D_Tract', 'HBW', 'HBO', 'NHB', 'tot']]

In [18]:
travel_df = travel_df[(travel_df['HBW']!=0) | (travel_df['HBO']!=0) | (travel_df['NHB']!=0) | (travel_df['tot']!=0)]
travel_df.head()

Unnamed: 0,O_Tract,D_Tract,HBW,HBO,NHB,tot
14,6037703100,6037651304,0.0,20.0,0.0,20.0
30,6037141400,6037800327,0.0,0.0,20.0,20.0
42,6037206200,6037212203,0.0,20.0,40.0,60.0
46,6037534501,6037534900,0.0,20.0,0.0,20.0
50,6037402600,6037407802,0.0,20.0,0.0,20.0


In [19]:
# Barrios to blockgroup
od_sp_groups_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'O_Tract'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Tract'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,D_Tract,HBO,NHB,tot
0,357564,6037104310,0.0,20.0,5.0
1,357564,6037111301,20.0,0.0,5.0
2,357564,6037115101,0.0,40.0,10.0
3,357564,6037115201,20.0,0.0,5.0
4,357564,6037124700,0.0,40.0,10.0


In [20]:
od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'D_Tract'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,357564,357564,40.0,0.0,2.5
1,357564,357574,40.0,0.0,5.0
2,357564,357587,20.0,0.0,1.25
3,357564,357593,20.0,0.0,2.5
4,357564,357599,20.0,0.0,2.5


In [21]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [22]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,357564,357564,40.0,0.0,2.5
1,357564,357565,0.0,0.0,0.0
2,357564,357566,0.0,0.0,0.0
3,357564,357567,0.0,0.0,0.0
4,357564,357568,0.0,0.0,0.0


In [23]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
1,357564,357565,0.0,0.0,0.0
2,357564,357566,0.0,0.0,0.0
3,357564,357567,0.0,0.0,0.0
4,357564,357568,0.0,0.0,0.0
5,357564,357569,0.0,0.0,0.0


## Extras

In [24]:
od_extra_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Tract', right_on='GEOID', how='left').drop(['GEOID'], axis=1)
od_extra_df = pd.merge(od_extra_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Tract', right_on='GEOID', how='left').drop(['GEOID'], axis=1)
od_extra_df.head()

Unnamed: 0,O_Tract,D_Tract,HBO,NHB,tot,o_sp_id,count_x,d_sp_id,count_y
0,6037703100,6037651304,20.0,0.0,20.0,,,,
1,6037141400,6037800327,0.0,20.0,20.0,358256.0,3.0,,
2,6037141400,6037800327,0.0,20.0,20.0,359817.0,3.0,,
3,6037141400,6037800327,0.0,20.0,20.0,359981.0,3.0,,
4,6037206200,6037212203,20.0,40.0,60.0,358540.0,2.0,359833.0,2.0


In [25]:
od_extra_df = od_extra_df[(od_extra_df.o_sp_id.isnull() & ~od_extra_df.d_sp_id.isnull()) | ((~od_extra_df.o_sp_id.isnull()) & od_extra_df.d_sp_id.isnull())]

# from out to LA
in_extra_df = od_extra_df[od_extra_df.o_sp_id.isnull()].groupby('d_sp_id', as_index=False).sum()
in_extra_df['d_sp_id'] = in_extra_df['d_sp_id'].astype(int).astype(str)
in_extra_df = in_extra_df[['d_sp_id', 'HBO', 'NHB', 'tot']]
in_extra_df['ntrips'] = in_extra_df['tot'] 
in_extra_df.head()

Unnamed: 0,d_sp_id,HBO,NHB,tot,ntrips
0,357564,3200.0,760.0,4200.0,4200.0
1,357565,1300.0,1140.0,2680.0,2680.0
2,357566,620.0,620.0,1320.0,1320.0
3,357567,1180.0,860.0,2140.0,2140.0
4,357568,560.0,340.0,1860.0,1860.0


In [26]:
# from LA to out
out_extra_df = od_extra_df[od_extra_df.d_sp_id.isnull()].groupby('o_sp_id', as_index=False).sum()
out_extra_df['o_sp_id'] = out_extra_df['o_sp_id'].astype(int).astype(str)
out_extra_df = out_extra_df[['o_sp_id', 'HBO', 'NHB', 'tot']]
out_extra_df['ntrips'] = out_extra_df['tot'] 
out_extra_df.head()

Unnamed: 0,o_sp_id,HBO,NHB,tot,ntrips
0,357564,3320.0,1080.0,4400.0,4400.0
1,357565,1220.0,480.0,2780.0,2780.0
2,357566,820.0,180.0,1600.0,1600.0
3,357567,1160.0,460.0,2060.0,2060.0
4,357568,580.0,1120.0,1860.0,1860.0


## Blocks_attract

In [27]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['bid'])[['bid', 'GEOID', 'count']]
blocks2bid_unique_df.head()

Unnamed: 0,bid,GEOID,count
0,462007,6037209300,2
1,472523,6037209300,2
2,459767,6037239601,2
3,465222,6037239601,2
4,452337,6037211804,2


In [28]:
od_bid_groups_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'O_Tract'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Tract'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'D_Tract'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

#od_bid_groups_df = od_bid_groups_df.set_index('d_bid')

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,452040,452040,40.0,20.0,6.666667
1,452040,452062,40.0,0.0,4.444444
2,452040,452130,20.0,0.0,3.333333
3,452040,452172,0.0,20.0,2.222222
4,452040,452185,0.0,0.0,6.666667


In [29]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='{city}' and spatial_name = '{spname}'
""".format(city=CITY, spname=SPATIAL_NAME)

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,357577,459767
1,357692,462007
2,357693,452337
3,357730,472405
4,357731,454419


In [30]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df = attract_df.reset_index()    
attract_df.head()

Unnamed: 0,o_sp_id,attract
0,357564,1620.0
1,357565,2440.0
2,357566,3260.0
3,357567,5100.0
4,357568,4120.0


### Save "other" trips to out and to in

In [31]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'tot', 'NHB']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] 
trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,NHB,ntrips
0,357564,357564,0.0,2.5
1,357564,357565,0.0,0.0
2,357564,357566,0.0,0.0
3,357564,357567,0.0,0.0
4,357564,357568,0.0,0.0


In [32]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy()
trips_attract = trips_attract.rename(columns={'NHB': 'attract'}).drop('o_sp_id', axis=1)
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'}).drop('ntrips', axis=1)
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,357564,1620.0
1,357565,2440.0
2,357566,3260.0
3,357567,5100.0
4,357568,4120.0


In [33]:
trips_attract = pd.concat((trips_attract, in_extra_df.rename(columns={'d_sp_id': 'o_sp_id', 'NHB': 'attract'})[['o_sp_id', 'attract']]))
trips_attract = trips_attract.groupby('o_sp_id', as_index=False).sum()
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,357564,2380.0
1,357565,3580.0
2,357566,3880.0
3,357567,5960.0
4,357568,4460.0


In [34]:
trips_attract = attract_df.reset_index()[['o_sp_id', 'attract']]

In [35]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id][['o_sp_id', 'NHB', 'ntrips']]
trips_out = pd.concat((trips_out, out_extra_df[['o_sp_id', 'NHB', 'ntrips']]))
trips_out = trips_out.groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['NHB'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,nout
0,357564,5107.5
1,357565,3563.75
2,357566,5480.0
3,357567,3575.0
4,357568,3920.0


In [36]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['NHB'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,nin
0,357564,2.5
1,357565,6.25
2,357566,0.0
3,357567,0.0
4,357568,40.0


In [37]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,nin,nout
0,357564,1620.0,2.5,5107.5
1,357565,2440.0,6.25,3563.75
2,357566,3260.0,0.0,5480.0
3,357567,5100.0,0.0,3575.0
4,357568,4120.0,40.0,3920.0


In [38]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,nin,nout
0,357564,1620.0,2.5,5107.5
1,357565,2440.0,6.25,3563.75
2,357566,3260.0,0.0,5480.0
3,357567,5100.0,0.0,3575.0
4,357568,4120.0,40.0,3920.0


In [39]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [40]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, '{city}', '{spname}', c.nin, c.nout, c.attract
FROM temptable3 c 
""".format(city=CITY, spname=SPATIAL_NAME)

result = engine.execute(text(sql))

### Save OD

In [41]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,357564,357565,357566,357567,357568,357569,357570,357571,357572,357573,...,360061,360062,360063,360064,360065,360066,360067,360068,360069,360070
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
357564,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
357565,0.0,6.25,0.0,1.25,0.0,0.0,30.0,2.5,0.0,0.0,...,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
357566,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
357567,0.0,1.25,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,...,0.0,0.0,3.75,0.0,2.5,0.0,0.0,0.0,0.0,0.0
357568,0.0,0.0,0.0,0.0,40.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
ODs_matrix_df['city'] = 'LA'

In [43]:
ODs_matrix_df.to_csv('../../data/generated_files/{city}_{spname}_ODs.csv'.format(city=CITY, spname=SPATIAL_NAME))