In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [2]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [3]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [4]:
CITY = 'LA'

In [5]:
tracts_gdf = gpd.read_file('zip://../../data/LA/mobile-phone/census_tracts.zip')
tracts_gdf = tracts_gdf[['geometry', 'GEOID']]
tracts_gdf.head()

Unnamed: 0,geometry,GEOID
0,"POLYGON ((-118.30229 34.25870, -118.30079 34.2...",6037101110
1,"POLYGON ((-118.30333 34.27354, -118.30318 34.2...",6037101122
2,"POLYGON ((-118.29945 34.25598, -118.28592 34.2...",6037101210
3,"POLYGON ((-118.28592 34.24896, -118.28592 34.2...",6037101220
4,"POLYGON ((-118.27247 34.23253, -118.27194 34.2...",6037101300


In [6]:
sql = """
SELECT b.original_id, bid, sp_id
FROM blocks_group b
INNER JOIN spatial_groups as sp on b.bid = ANY(sp.lower_ids)
WHERE b.city='{city}'
""".format(city=CITY)

blocks2spid_df = pd.read_sql(sql, engine)
blocks2spid_df['GEOID'] = blocks2spid_df['original_id'].str[0:11]
blocks2spid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID
0,60372071021,518291,354294,6037207102
1,60372071021,518291,354305,6037207102
2,60372071021,518291,354326,6037207102
3,60372071021,518291,354331,6037207102
4,60372071021,518291,354399,6037207102


In [7]:
njoins_df = blocks2spid_df[['bid', 'GEOID']].drop_duplicates().groupby('GEOID').size().to_frame('count').reset_index()
njoins_df.head()

Unnamed: 0,GEOID,count
0,6037101110,3
1,6037101122,2
2,6037101210,2
3,6037101220,2
4,6037101300,4


In [8]:
blocks2spid_df = pd.merge(blocks2spid_df, njoins_df, on='GEOID')
blocks2spid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID,count
0,60372071021,518291,354294,6037207102,1
1,60372071021,518291,354305,6037207102,1
2,60372071021,518291,354326,6037207102,1
3,60372071021,518291,354331,6037207102,1
4,60372071021,518291,354399,6037207102,1


In [9]:
blocks2spid_df[blocks2spid_df.GEOID=='06037980028']

Unnamed: 0,original_id,bid,sp_id,GEOID,count
20569,60379800281,517865,354636,6037980028,1
20570,60379800281,517865,354668,6037980028,1
20571,60379800281,517865,354686,6037980028,1
20572,60379800281,517865,354692,6037980028,1
20573,60379800281,517865,354734,6037980028,1
20574,60379800281,517865,354735,6037980028,1
20575,60379800281,517865,354747,6037980028,1
20576,60379800281,517865,352849,6037980028,1
20577,60379800281,517865,352850,6037980028,1
20578,60379800281,517865,352884,6037980028,1


In [10]:
blocks2spid_unique_df = blocks2spid_df.drop_duplicates(subset=['GEOID', 'sp_id'])[['GEOID', 'sp_id', 'count']]
blocks2spid_unique_df.head()

Unnamed: 0,GEOID,sp_id,count
0,6037207102,354294,1
1,6037207102,354305,1
2,6037207102,354326,1
3,6037207102,354331,1
4,6037207102,354399,1


In [11]:
blocks2spid_unique_df[blocks2spid_unique_df.GEOID == '06037262302']

Unnamed: 0,GEOID,sp_id,count
73795,6037262302,352806,2
73796,6037262302,352813,2
73797,6037262302,352842,2
73798,6037262302,352863,2
73799,6037262302,352908,2
73800,6037262302,352915,2
73801,6037262302,352938,2
73802,6037262302,352948,2
73803,6037262302,352980,2
73804,6037262302,352990,2


## ODs

In [12]:
zip_file = ZipFile('../../data/LA/mobile-phone/travel_demand_LA.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_LA.csv' compress_type=deflate filemode='-rw-rw-r--' file_size=991736528 compress_size=149183206>]

In [13]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Tract'] = str
types['D_Tract'] = str
types['HBW'] = np.float32
types['HBO'] = np.float32
types['NHB'] = np.float32
types['lon1'] = np.float32
types['lat1'] = np.float32
types['lon2'] = np.float32
types['lat2'] = np.float32

travel_df = pd.read_csv(zip_file.open('travel_demand_LA.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)
travel_df = travel_df[['O_Tract', 'D_Tract', 'HBW', 'HBO', 'NHB', 'tot']]
travel_df.head()

Unnamed: 0,O_Tract,D_Tract,HBW,HBO,NHB,tot
0,6037406200,6037264102,0.0,0.0,0.0,0.0
1,6037535101,6037570702,0.0,0.0,0.0,0.0
2,6037141202,6037139401,0.0,0.0,0.0,0.0
3,6037500600,6037601802,0.0,0.0,0.0,0.0
4,6037195100,6037405701,0.0,0.0,0.0,0.0


In [14]:
travel_df = travel_df[['O_Tract', 'D_Tract', 'HBW', 'HBO', 'NHB', 'tot']]

In [15]:
travel_df = travel_df[(travel_df['HBW']!=0) | (travel_df['HBO']!=0) | (travel_df['NHB']!=0) | (travel_df['tot']!=0)]
travel_df.head()

Unnamed: 0,O_Tract,D_Tract,HBW,HBO,NHB,tot
14,6037703100,6037651304,0.0,20.0,0.0,20.0
30,6037141400,6037800327,0.0,0.0,20.0,20.0
42,6037206200,6037212203,0.0,20.0,40.0,60.0
46,6037534501,6037534900,0.0,20.0,0.0,20.0
50,6037402600,6037407802,0.0,20.0,0.0,20.0


In [16]:
# Barrios to blockgroup
od_sp_groups_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'O_Tract'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Tract'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,D_Tract,HBO,NHB,tot
0,352667,6037103300,20.0,0.0,6.666667
1,352667,6037104124,20.0,0.0,6.666667
2,352667,6037104821,20.0,0.0,6.666667
3,352667,6037113212,0.0,0.0,6.666667
4,352667,6037113301,60.0,0.0,20.0


In [17]:
od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'D_Tract'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,352667,352667,2860.0,1400.0,693.888889
1,352667,352668,600.0,140.0,113.333333
2,352667,352669,1540.0,540.0,467.222222
3,352667,352670,1100.0,340.0,450.555556
4,352667,352671,2940.0,1560.0,823.888889


In [18]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [19]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,352667,352667,2860.0,1400.0,693.888889
1,352667,352668,600.0,140.0,113.333333
2,352667,352669,1540.0,540.0,467.222222
3,352667,352670,1100.0,340.0,450.555556
4,352667,352671,2940.0,1560.0,823.888889


In [20]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
126,352667,352793,0.0,0.0,0.0
129,352667,352796,0.0,0.0,0.0
131,352667,352798,0.0,0.0,0.0
139,352667,352806,0.0,0.0,0.0
146,352667,352813,0.0,0.0,0.0


## Extras

In [21]:
od_extra_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Tract', right_on='GEOID', how='left').drop(['GEOID'], axis=1)
od_extra_df = pd.merge(od_extra_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Tract', right_on='GEOID', how='left').drop(['GEOID'], axis=1)
od_extra_df.head()

Unnamed: 0,O_Tract,D_Tract,HBO,NHB,tot,o_sp_id,count_x,d_sp_id,count_y
0,6037703100,6037651304,20.0,0.0,20.0,,,,
1,6037141400,6037800327,0.0,20.0,20.0,354875.0,3.0,,
2,6037141400,6037800327,0.0,20.0,20.0,354884.0,3.0,,
3,6037141400,6037800327,0.0,20.0,20.0,354888.0,3.0,,
4,6037141400,6037800327,0.0,20.0,20.0,354895.0,3.0,,


In [22]:
od_extra_df = od_extra_df[(od_extra_df.o_sp_id.isnull() & ~od_extra_df.d_sp_id.isnull()) | ((~od_extra_df.o_sp_id.isnull()) & od_extra_df.d_sp_id.isnull())]

# from out to LA
in_extra_df = od_extra_df[od_extra_df.o_sp_id.isnull()].groupby('d_sp_id', as_index=False).sum()
in_extra_df['d_sp_id'] = in_extra_df['d_sp_id'].astype(int).astype(str)
in_extra_df = in_extra_df[['d_sp_id', 'HBO', 'NHB', 'tot']]
in_extra_df['ntrips'] = in_extra_df['tot'] 
in_extra_df.head()

Unnamed: 0,d_sp_id,HBO,NHB,tot,ntrips
0,352667,18480.0,9880.0,33260.0,33260.0
1,352668,16260.0,8160.0,31260.0,31260.0
2,352669,17660.0,9900.0,33220.0,33220.0
3,352670,16560.0,10100.0,31540.0,31540.0
4,352671,20700.0,12400.0,41320.0,41320.0


In [23]:
# from LA to out
out_extra_df = od_extra_df[od_extra_df.d_sp_id.isnull()].groupby('o_sp_id', as_index=False).sum()
out_extra_df['o_sp_id'] = out_extra_df['o_sp_id'].astype(int).astype(str)
out_extra_df = out_extra_df[['o_sp_id', 'HBO', 'NHB', 'tot']]
out_extra_df['ntrips'] = out_extra_df['tot'] 
out_extra_df.head()

Unnamed: 0,o_sp_id,HBO,NHB,tot,ntrips
0,352667,19620.0,10040.0,34600.0,34600.0
1,352668,15760.0,13360.0,30320.0,30320.0
2,352669,18120.0,11500.0,33480.0,33480.0
3,352670,16580.0,11040.0,31480.0,31480.0
4,352671,21500.0,15320.0,42200.0,42200.0


## Blocks_attract

In [24]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['bid'])[['bid', 'GEOID', 'count']]
blocks2bid_unique_df.head()

Unnamed: 0,bid,GEOID,count
0,518291,6037207102,1
29,518290,6037224700,3
76,518266,6037224700,3
127,518225,6037224700,3
175,518289,6037209104,1


In [25]:
od_bid_groups_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'O_Tract'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Tract'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'D_Tract'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

#od_bid_groups_df = od_bid_groups_df.set_index('d_bid')

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,515785,515785,20.0,0.0,2.222222
1,515785,515786,20.0,20.0,3.333333
2,515785,515787,20.0,20.0,6.666667
3,515785,515788,40.0,20.0,10.0
4,515785,515789,20.0,0.0,2.222222


In [26]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='{city}'
""".format(city=CITY)

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,354204,517170
1,354204,517178
2,354204,517184
3,354204,517185
4,354204,517191


In [27]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df = attract_df.reset_index()    
attract_df.head()

Unnamed: 0,o_sp_id,attract
0,352667,22920.0
1,352668,28900.0
2,352669,17220.0
3,352670,15940.0
4,352671,26800.0


### Save "other" trips to out and to in

In [28]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'tot', 'NHB']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] 
trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,NHB,ntrips
0,352667,352667,1400.0,693.888889
1,352667,352668,140.0,113.333333
2,352667,352669,540.0,467.222222
3,352667,352670,340.0,450.555556
4,352667,352671,1560.0,823.888889


In [29]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy()
trips_attract = trips_attract.rename(columns={'NHB': 'attract'}).drop('o_sp_id', axis=1)
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'}).drop('ntrips', axis=1)
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,352667,160320.0
1,352668,125800.0
2,352669,141360.0
3,352670,147400.0
4,352671,200760.0


In [30]:
trips_attract = pd.concat((trips_attract, in_extra_df.rename(columns={'d_sp_id': 'o_sp_id', 'NHB': 'attract'})[['o_sp_id', 'attract']]))
trips_attract = trips_attract.groupby('o_sp_id', as_index=False).sum()
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,352667,170200.0
1,352668,133960.0
2,352669,151260.0
3,352670,157500.0
4,352671,213160.0


In [31]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id][['o_sp_id', 'NHB', 'ntrips']]
trips_out = pd.concat((trips_out, out_extra_df[['o_sp_id', 'NHB', 'ntrips']]))
trips_out = trips_out.groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['NHB'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,nout
0,352667,136616.055556
1,352668,90006.555556
2,352669,151796.222222
3,352670,183610.055556
4,352671,167804.444444


In [32]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['NHB'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,nin
0,352667,693.888889
1,352668,137.361111
2,352669,906.111111
3,352670,1481.111111
4,352671,1152.222222


In [33]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,nin,nout
0,352667,170200.0,693.888889,136616.055556
1,352668,133960.0,137.361111,90006.555556
2,352669,151260.0,906.111111,151796.222222
3,352670,157500.0,1481.111111,183610.055556
4,352671,213160.0,1152.222222,167804.444444


In [34]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,nin,nout
0,352667,170200.0,693.888889,136616.055556
1,352668,133960.0,137.361111,90006.555556
2,352669,151260.0,906.111111,151796.222222
3,352670,157500.0,1481.111111,183610.055556
4,352671,213160.0,1152.222222,167804.444444


In [35]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [36]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, '{city}', 'ego', c.nin, c.nout, c.attract
FROM temptable3 c 
""".format(city=CITY)

result = engine.execute(text(sql))

### Save OD

In [37]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,352667,352668,352669,352670,352671,352672,352673,352674,352675,352676,...,355164,355165,355166,355167,355168,355169,355170,355171,355172,355173
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
352667,693.888889,113.333333,467.222222,450.555556,823.888889,208.888889,130.0,611.111111,1221.666667,40.0,...,65.555556,82.222222,68.333333,49.444444,102.222222,105.555556,133.333333,78.333333,55.0,91.666667
352668,100.555556,137.361111,151.944444,147.777778,100.0,61.722222,285.694444,50.0,192.5,9.777778,...,37.222222,46.666667,47.5,60.555556,79.444444,63.888889,95.0,65.833333,52.083333,53.333333
352669,451.111111,164.722222,906.111111,1161.111111,416.666667,296.5,284.722222,281.666667,1066.666667,71.777778,...,43.333333,70.0,33.333333,70.0,135.0,96.666667,163.333333,56.666667,110.0,98.333333
352670,464.444444,168.055556,1066.111111,1481.111111,483.333333,541.666667,278.055556,273.333333,1310.0,164.444444,...,23.333333,38.333333,35.0,61.666667,95.0,63.333333,156.666667,58.333333,101.666667,63.333333
352671,771.666667,113.055556,461.111111,472.777778,1152.222222,188.277778,129.722222,908.888889,1441.111111,63.555556,...,57.222222,70.555556,96.666667,52.777778,88.888889,102.222222,140.0,101.666667,66.666667,80.0


In [38]:
ODs_matrix_df['city'] = CITY

In [39]:
ODs_matrix_df.to_csv('../../data/generated_files/{city}_ODs.csv'.format(city=CITY))

### Ambient population

In [40]:
sql = """
SELECT b.original_id, bid, sp_id
FROM blocks_group b
INNER JOIN spatial_groups as sp on b.bid = sp.core_id
WHERE b.city='{city}'
""".format(city=CITY)

blocks2coreid_df = pd.read_sql(sql, engine)
blocks2coreid_df['GEOID'] = blocks2coreid_df['original_id'].str[0:11]
blocks2coreid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID
0,60372071021,518291,355173,6037207102
1,60372247002,518290,355172,6037224700
2,60372091041,518289,355171,6037209104
3,60372079002,518288,355170,6037207900
4,60372071011,518287,355169,6037207101


In [41]:
njoins_coreid_df = blocks2coreid_df[['bid', 'GEOID']].drop_duplicates().groupby('GEOID').size().to_frame('count').reset_index()
njoins_coreid_df.head()

Unnamed: 0,GEOID,count
0,6037101110,3
1,6037101122,2
2,6037101210,2
3,6037101220,2
4,6037101300,4


In [42]:
blocks2coreid_df = pd.merge(blocks2coreid_df, njoins_coreid_df, on='GEOID')
blocks2coreid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID,count
0,60372071021,518291,355173,6037207102,1
1,60372247002,518290,355172,6037224700,3
2,60372247003,518266,355148,6037224700,3
3,60372247001,518225,355107,6037224700,3
4,60372091041,518289,355171,6037209104,1


In [43]:
ambient_df = pd.read_csv('../../data/LA/mobile-phone/hourly_stay_LA.csv', dtype={'tract': str})
ambient_df[ambient_df.tract == '06037101110'].head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,16,17,18,19,20,21,22,23,lon,lat
869,6037101110,5002.0,4954.0,4924.0,4900.0,4874.0,4834.0,4716.0,4330.0,3606.0,...,3612.0,3886.0,4212.0,4314.0,4454.0,4420.0,4414.0,4410.0,-118.292987,34.259474


In [44]:
blocks2coreid_unique_df = blocks2coreid_df.drop_duplicates(subset=['GEOID', 'sp_id'])[['GEOID', 'sp_id', 'count']]
blocks2coreid_unique_df.head()

Unnamed: 0,GEOID,sp_id,count
0,6037207102,355173,1
1,6037224700,355172,3
2,6037224700,355148,3
3,6037224700,355107,3
4,6037209104,355171,1


In [45]:
ambient_sp_id_df = pd.merge(ambient_df, blocks2coreid_df[['GEOID', 'count', 'bid']].drop_duplicates(subset=['GEOID', 'bid', 'count']).rename(columns={'GEOID': 'tract'}), on='tract')

columns = [str(x) for x in range(0,24)]
for c in columns:
    ambient_sp_id_df.loc[:, c] = ambient_sp_id_df.loc[:, c]/ambient_sp_id_df['count']

ambient_sp_id_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,count,bid
0,6037183810,1175.0,1169.0,1151.0,1150.0,1146.0,1137.0,1148.0,1209.0,1213.0,...,1153.0,1173.0,1165.0,1160.0,1152.0,1139.0,-118.19785,34.107365,2,517435
1,6037183810,1175.0,1169.0,1151.0,1150.0,1146.0,1137.0,1148.0,1209.0,1213.0,...,1153.0,1173.0,1165.0,1160.0,1152.0,1139.0,-118.19785,34.107365,2,517366
2,6037137301,2138.0,2126.0,2104.0,2106.0,2096.0,2098.0,2100.0,2094.0,2084.0,...,1894.0,1880.0,1874.0,1930.0,1960.0,1936.0,-118.634042,34.176253,1,517296
3,6037137302,2289.0,2265.0,2241.0,2234.0,2229.0,2227.0,2206.0,2003.0,1912.0,...,1868.0,1883.0,1875.0,1860.0,1999.0,1985.0,-118.654196,34.177348,2,517343
4,6037137302,2289.0,2265.0,2241.0,2234.0,2229.0,2227.0,2206.0,2003.0,1912.0,...,1868.0,1883.0,1875.0,1860.0,1999.0,1985.0,-118.654196,34.177348,2,517237


In [46]:
ambient_sp_id_df = ambient_sp_id_df.groupby('bid', as_index=False).sum()
ambient_sp_id_df['ambient_avg'] = ambient_sp_id_df[[str(x) for x in range(0,24)]].mean(axis=1)
ambient_sp_id_df.head()

Unnamed: 0,bid,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,count,ambient_avg
0,515785,989.333333,982.666667,978.666667,969.333333,967.333333,966.666667,984.666667,995.333333,1044.666667,...,1003.333333,986.0,966.0,959.333333,948.0,940.666667,-118.260391,33.797993,3,1009.055556
1,515786,983.0,983.0,986.0,984.0,987.0,995.5,1080.5,1270.5,1629.0,...,1274.5,1194.5,1183.0,1170.0,1144.5,1100.0,-118.302154,33.848473,4,1430.916667
2,515787,2747.0,2713.0,2679.0,2671.0,2652.0,2631.0,2589.0,2450.0,2144.0,...,2457.0,2482.0,2452.0,2450.0,2449.0,2442.0,-118.289893,33.794128,2,2304.291667
3,515788,1256.0,1251.0,1247.0,1237.0,1234.0,1237.0,1258.0,1424.0,1587.0,...,1513.0,1323.0,1310.0,1279.0,1273.0,1254.0,-118.304315,33.792617,2,1435.541667
4,515789,796.666667,794.666667,792.666667,790.0,790.0,816.0,864.0,1079.333333,1218.0,...,823.333333,803.333333,798.666667,780.666667,786.0,789.333333,-118.247927,33.78747,3,991.25


In [47]:
ambient_sp_id_df[['bid', 'ambient_avg']].to_sql('temptable3', engine, if_exists='replace', index=False)

In [48]:
sql = """
INSERT INTO ambient_population (bid, city, num_people) 
SELECT c.bid, '{city}', c.ambient_avg
FROM temptable3 c 
""".format(city=CITY)

result = engine.execute(text(sql))