In [52]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [53]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [54]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [55]:
CITY = 'bogota'
SPATIAL_NAME = 'core'

## ODs

In [56]:
tracts_gdf = gpd.read_file('../../data/bogota/mobile-phone/bogota_barrios_cadastrales.geojson')
tracts_gdf = tracts_gdf[['geometry', 'OBJECTID']]
tracts_gdf.head()

Unnamed: 0,geometry,OBJECTID
0,"POLYGON ((-74.07088 4.83141, -74.07087 4.83138...",1
1,"POLYGON ((-74.05806 4.82926, -74.05781 4.82925...",2
2,"POLYGON ((-74.03441 4.82510, -74.03432 4.82544...",3
3,"POLYGON ((-74.09394 4.79092, -74.09387 4.79088...",4
4,"POLYGON ((-74.04113 4.78365, -74.04077 4.78343...",5


In [57]:
ins_gdf = process_geometry_SQL_insert(tracts_gdf)
ins_gdf.to_sql('temptable', engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [58]:
sql = """
DROP TABLE IF EXISTS temptable2;
CREATE TABLE temptable2 AS 
SELECT oid, bid, sp_id
FROM (
    SELECT oid, bid, sp_id, ROW_NUMBER() OVER (PARTITION BY bid, sp_id ORDER BY area DESC) AS r
    from (
        SELECT ST_Area(ST_Intersection(s.geom, b.geom)::geography) as area, b.\"OBJECTID\" as oid, s.bid, sp.sp_id
        FROM blocks_group as s
        INNER JOIN temptable as b on ST_Intersects(s.geom, b.geom) AND NOT ST_Touches(s.geom, b.geom)
        INNER JOIN spatial_groups as sp on s.bid = ANY(sp.lower_ids)
        WHERE s.city = '{city}' and sp.spatial_name = '{spname}'
        ) as dtable
    order by area
) x
WHERE x.r = 1;
""".format(city=CITY, spname=SPATIAL_NAME)

result = engine.execute(text(sql))

In [59]:
sql = """
SELECT oid::text, bid::text, sp_id::text FROM temptable2
"""

blocks2spid_df = pd.read_sql(sql, engine)
njoins_df = blocks2spid_df[['bid', 'oid']].drop_duplicates().groupby('oid').size().to_frame('count').reset_index()
blocks2spid_df = pd.merge(blocks2spid_df, njoins_df, on='oid')
blocks2spid_df.head()

Unnamed: 0,oid,bid,sp_id,count
0,868,499995,356787,1
1,873,499998,356966,2
2,873,499534,356389,2
3,879,499358,356304,1
4,922,499811,356771,2


In [60]:
blocks2spid_unique_df = blocks2spid_df.drop_duplicates(subset=['oid', 'sp_id'])[['oid', 'sp_id', 'count']]
blocks2spid_unique_df.head()

Unnamed: 0,oid,sp_id,count
0,868,356787,1
1,873,356966,2
2,873,356389,2
3,879,356304,1
4,922,356771,2


In [61]:
len(set(blocks2spid_df.bid.values))

916

In [62]:
zip_file = ZipFile('../../data/bogota/mobile-phone/travel_demand_Bogota.csv.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_Bogota.csv' compress_type=deflate filemode='-rw-r--r--' external_attr=0x4000 file_size=150133682 compress_size=23376029>]

In [63]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Block'] = str
types['D_Block'] = str

travel_df = pd.read_csv(zip_file.open('travel_demand_Bogota.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)

travel_df.head()

Unnamed: 0,O_Block,D_Block,HBW,HBO,NHB,0,1,2,3,4,...,15,16,17,18,19,20,21,22,23,tot
0,103,290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,253,415,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,461,224,0.0,0.0,3.075779,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.53789,0.0,0.0,0.0,0.0,3.075779
3,268,494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,788,695,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [64]:
len(blocks2spid_unique_df)

916

In [65]:
# Barrios to blockgroup
od_sp_groups_df = pd.merge(travel_df[['O_Block', 'D_Block', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Block', right_on='oid').drop(['oid'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'O_Block'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Block'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,D_Block,HBO,NHB,tot
0,356093,1,0.0,0.0,0.0
1,356093,10,0.0,0.0,0.0
2,356093,100,0.0,0.0,0.0
3,356093,101,0.0,0.0,0.0
4,356093,102,0.0,0.0,0.0


In [66]:
len(od_sp_groups_df), len(travel_df)

(725040, 731025)

In [67]:
od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Block', right_on='oid').drop(['oid'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'D_Block'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,356093,356093,0.0,0.0,0.0
1,356093,356094,0.0,0.0,0.0
2,356093,356095,0.0,0.0,0.0
3,356093,356096,0.0,0.0,0.0
4,356093,356097,0.0,0.0,0.0


In [68]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [69]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,356093,356093,0.0,0.0,0.0
1,356093,356094,0.0,0.0,0.0
2,356093,356095,0.0,0.0,0.0
3,356093,356096,0.0,0.0,0.0
4,356093,356097,0.0,0.0,0.0


In [70]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,356093,356093,0.0,0.0,0.0
1,356093,356094,0.0,0.0,0.0
2,356093,356095,0.0,0.0,0.0
3,356093,356096,0.0,0.0,0.0
4,356093,356097,0.0,0.0,0.0


## Blocks_attract

In [71]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['oid', 'bid'])[['oid', 'bid', 'count']]
blocks2bid_unique_df.head()

Unnamed: 0,oid,bid,count
0,868,499995,1
1,873,499998,2
2,873,499534,2
3,879,499358,1
4,922,499811,2


In [72]:
od_bid_groups_df = pd.merge(travel_df[['O_Block', 'D_Block', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Block', right_on='oid').drop(['oid'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'O_Block'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Block'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Block', right_on='oid').drop(['oid'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'D_Block'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

#od_bid_groups_df = od_bid_groups_df.set_index('d_bid')

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,499323,499323,0.0,0.0,0.0
1,499323,499324,1.53789,0.0,1.53789
2,499323,499325,3.075779,1.53789,4.613669
3,499323,499326,0.0,0.0,0.0
4,499323,499327,1.53789,0.0,1.53789


In [73]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='{city}' and spatial_name='{spname}'
""".format(city=CITY, spname=SPATIAL_NAME)

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,357008,500186
1,357007,499554
2,357006,499474
3,357005,499872
4,357004,500011


In [74]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df.head()

Unnamed: 0_level_0,attract
o_sp_id,Unnamed: 1_level_1
356093,5028.899424
356094,4373.758398
356095,1434.85112
356096,213.766673
356097,0.0


### Save "other" trips to out and to in

In [75]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'tot', 'NHB']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] #+ trips_other['NHB']
#trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,tot,NHB,ntrips
0,356093,356093,0.0,0.0,0.0
1,356093,356094,0.0,0.0,0.0
2,356093,356095,0.0,0.0,0.0
3,356093,356096,0.0,0.0,0.0
4,356093,356097,0.0,0.0,0.0


In [76]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy()
trips_attract['attract'] = trips_other['NHB']
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.drop(['tot'], axis=1)
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'})[['o_sp_id', 'attract']]
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,356093,5028.899424
1,356094,4373.758398
2,356095,1434.85112
3,356096,213.766673
4,356097,0.0


In [77]:
trips_attract = attract_df.reset_index()[['o_sp_id', 'attract']]

In [78]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['NHB'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,tot,nout
0,356093,7358.802408,7358.802408
1,356094,13507.285571,13507.285571
2,356095,2563.662185,2563.662185
3,356096,327.570521,327.570521
4,356097,0.0,0.0


In [79]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['NHB'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,tot,nin
0,356093,0.0,0.0
1,356094,0.0,0.0
2,356095,0.0,0.0
3,356096,0.0,0.0
4,356097,0.0,0.0


In [80]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,tot_x,nin,tot_y,nout
0,356093,5028.899424,0.0,0.0,7358.802408,7358.802408
1,356094,4373.758398,0.0,0.0,13507.285571,13507.285571
2,356095,1434.85112,0.0,0.0,2563.662185,2563.662185
3,356096,213.766673,0.0,0.0,327.570521,327.570521
4,356097,0.0,0.0,0.0,0.0,0.0


In [81]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [82]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, '{city}', '{spname}', c.nin, c.nout, c.attract
FROM temptable3 c 
""".format(city=CITY, spname=SPATIAL_NAME)

result = engine.execute(text(sql))

### Save OD

In [83]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,356093,356094,356095,356096,356097,356098,356099,356100,356101,356102,...,356999,357000,357001,357002,357003,357004,357005,357006,357007,357008
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
356093,0.0,0.0,0.0,0.0,0.0,4.613669,1.53789,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.53789,0.0,0.0,0.0
356094,0.0,0.0,4.613669,0.0,0.0,4.613669,0.0,0.0,0.0,43.060913,...,3.075779,0.0,0.0,0.0,1.53789,0.0,0.0,13.841007,0.0,0.0
356095,0.0,4.613669,0.0,0.0,0.0,0.0,1.53789,0.0,81.508156,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
356096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.909355,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
356097,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
ODs_matrix_df['city'] = CITY

In [86]:
ODs_matrix_df.to_csv('../../data/generated_files/{city}_{spname}_ODs.csv'.format(city=CITY, spname=SPATIAL_NAME))