In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [2]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [3]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [4]:
tracts_gdf = gpd.read_file('zip://../../data/LA/mobile-phone/census_tracts.zip')
tracts_gdf = tracts_gdf[['geometry', 'GEOID']]
tracts_gdf.head()

Unnamed: 0,geometry,GEOID
0,"POLYGON ((-118.30229 34.25870, -118.30079 34.2...",6037101110
1,"POLYGON ((-118.30333 34.27354, -118.30318 34.2...",6037101122
2,"POLYGON ((-118.29945 34.25598, -118.28592 34.2...",6037101210
3,"POLYGON ((-118.28592 34.24896, -118.28592 34.2...",6037101220
4,"POLYGON ((-118.27247 34.23253, -118.27194 34.2...",6037101300


In [5]:
sql = """
SELECT b.original_id, bid, sp_id
FROM blocks_group b
INNER JOIN spatial_groups as sp on b.bid = ANY(sp.lower_ids)
WHERE b.city='LA'
"""

blocks2spid_df = pd.read_sql(sql, engine)
blocks2spid_df['GEOID'] = blocks2spid_df['original_id'].str[0:11]
blocks2spid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID
0,60371393012,452172,343088,6037139301
1,60371375012,452185,343088,6037137501
2,60372403002,452080,343080,6037240300
3,60371320011,452040,343071,6037132001
4,60371394021,456434,343088,6037139402


In [6]:
njoins_df = blocks2spid_df[['bid', 'GEOID']].drop_duplicates().groupby('GEOID').size().to_frame('count').reset_index()
njoins_df.head()

Unnamed: 0,GEOID,count
0,6037101110,3
1,6037101122,2
2,6037101210,2
3,6037101220,2
4,6037101300,4


In [7]:
blocks2spid_df = pd.merge(blocks2spid_df, njoins_df, on='GEOID')
blocks2spid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID,count
0,60371393012,452172,343088,6037139301,3
1,60371393013,459744,343088,6037139301,3
2,60371393011,465885,343088,6037139301,3
3,60371393013,459744,343073,6037139301,3
4,60371393012,452172,343091,6037139301,3


In [8]:
blocks2spid_df[blocks2spid_df.GEOID=='06037980028']

Unnamed: 0,original_id,bid,sp_id,GEOID,count
11656,60379800281,472486,343109,6037980028,1
11657,60379800281,472486,343455,6037980028,1
11658,60379800281,472486,343737,6037980028,1
11659,60379800281,472486,343749,6037980028,1
11660,60379800281,472486,343928,6037980028,1
11661,60379800281,472486,344042,6037980028,1
11662,60379800281,472486,344151,6037980028,1
11663,60379800281,472486,344326,6037980028,1
11664,60379800281,472486,344998,6037980028,1
11665,60379800281,472486,345036,6037980028,1


In [9]:
blocks2spid_unique_df = blocks2spid_df.drop_duplicates(subset=['GEOID', 'sp_id'])[['GEOID', 'sp_id', 'count']]
blocks2spid_unique_df.head()

Unnamed: 0,GEOID,sp_id,count
0,6037139301,343088,3
3,6037139301,343073,3
4,6037139301,343091,3
6,6037139301,343153,3
7,6037139301,343258,3


In [10]:
blocks2spid_unique_df[blocks2spid_unique_df.GEOID == '06037262302']

Unnamed: 0,GEOID,sp_id,count
32276,6037262302,343315,2
32277,6037262302,343631,2
32278,6037262302,343853,2
32280,6037262302,344492,2
32282,6037262302,344544,2
32284,6037262302,344702,2
32285,6037262302,344765,2
32286,6037262302,345125,2
32287,6037262302,345282,2
32288,6037262302,345445,2


## ODs

In [11]:
zip_file = ZipFile('../../data/LA/mobile-phone/travel_demand_LA.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_LA.csv' compress_type=deflate filemode='-rw-rw-r--' file_size=991736528 compress_size=149183206>]

In [12]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Tract'] = str
types['D_Tract'] = str
types['HBW'] = np.float32
types['HBO'] = np.float32
types['NHB'] = np.float32
types['lon1'] = np.float32
types['lat1'] = np.float32
types['lon2'] = np.float32
types['lat2'] = np.float32

travel_df = pd.read_csv(zip_file.open('travel_demand_LA.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)
travel_df = travel_df[['O_Tract', 'D_Tract', 'HBW', 'HBO', 'NHB', 'tot']]
travel_df.head()

Unnamed: 0,O_Tract,D_Tract,HBW,HBO,NHB,tot
0,6037406200,6037264102,0.0,0.0,0.0,0.0
1,6037535101,6037570702,0.0,0.0,0.0,0.0
2,6037141202,6037139401,0.0,0.0,0.0,0.0
3,6037500600,6037601802,0.0,0.0,0.0,0.0
4,6037195100,6037405701,0.0,0.0,0.0,0.0


In [13]:
travel_df = travel_df[['O_Tract', 'D_Tract', 'HBW', 'HBO', 'NHB', 'tot']]

In [14]:
travel_df = travel_df[(travel_df['HBW']!=0) | (travel_df['HBO']!=0) | (travel_df['NHB']!=0) | (travel_df['tot']!=0)]
travel_df.head()

Unnamed: 0,O_Tract,D_Tract,HBW,HBO,NHB,tot
14,6037703100,6037651304,0.0,20.0,0.0,20.0
30,6037141400,6037800327,0.0,0.0,20.0,20.0
42,6037206200,6037212203,0.0,20.0,40.0,60.0
46,6037534501,6037534900,0.0,20.0,0.0,20.0
50,6037402600,6037407802,0.0,20.0,0.0,20.0


In [15]:
# Barrios to blockgroup
od_sp_groups_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'O_Tract'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Tract'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,D_Tract,HBO,NHB,tot
0,343071,6037103200,20.0,0.0,6.666667
1,343071,6037103300,20.0,0.0,6.666667
2,343071,6037104105,40.0,0.0,13.333333
3,343071,6037104108,20.0,20.0,11.666667
4,343071,6037104124,20.0,20.0,11.666667


In [16]:
od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'D_Tract'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,343071,343071,900.0,640.0,159.722222
1,343071,343073,820.0,280.0,148.333333
2,343071,343075,40.0,0.0,10.833333
3,343071,343076,100.0,0.0,18.055556
4,343071,343077,20.0,0.0,3.333333


In [17]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [18]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,343071,343071,900.0,640.0,159.722222
1,343071,343072,0.0,0.0,0.0
2,343071,343073,820.0,280.0,148.333333
3,343071,343074,0.0,0.0,0.0
4,343071,343075,40.0,0.0,10.833333


In [19]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
1,343071,343072,0.0,0.0,0.0
3,343071,343074,0.0,0.0,0.0
10,343071,343081,0.0,0.0,0.0
15,343071,343086,0.0,0.0,0.0
23,343071,343094,0.0,0.0,0.0


## Extras

In [20]:
od_extra_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Tract', right_on='GEOID', how='left').drop(['GEOID'], axis=1)
od_extra_df = pd.merge(od_extra_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Tract', right_on='GEOID', how='left').drop(['GEOID'], axis=1)
od_extra_df.head()

Unnamed: 0,O_Tract,D_Tract,HBO,NHB,tot,o_sp_id,count_x,d_sp_id,count_y
0,6037703100,6037651304,20.0,0.0,20.0,343242.0,6.0,,
1,6037703100,6037651304,20.0,0.0,20.0,343456.0,6.0,,
2,6037703100,6037651304,20.0,0.0,20.0,343817.0,6.0,,
3,6037703100,6037651304,20.0,0.0,20.0,343879.0,6.0,,
4,6037703100,6037651304,20.0,0.0,20.0,343914.0,6.0,,


In [21]:
od_extra_df = od_extra_df[(od_extra_df.o_sp_id.isnull() & ~od_extra_df.d_sp_id.isnull()) | ((~od_extra_df.o_sp_id.isnull()) & od_extra_df.d_sp_id.isnull())]

# from out to LA
in_extra_df = od_extra_df[od_extra_df.o_sp_id.isnull()].groupby('d_sp_id', as_index=False).sum()
in_extra_df['d_sp_id'] = in_extra_df['d_sp_id'].astype(int).astype(str)
in_extra_df = in_extra_df[['d_sp_id', 'HBO', 'NHB', 'tot']]
in_extra_df['ntrips'] = in_extra_df['tot'] 
in_extra_df.head()

Unnamed: 0,d_sp_id,HBO,NHB,tot,ntrips
0,343071,1840.0,1220.0,3560.0,3560.0
1,343072,10980.0,6840.0,21820.0,21820.0
2,343073,3400.0,2260.0,6460.0,6460.0
3,343074,7240.0,4580.0,13040.0,13040.0
4,343075,8300.0,5680.0,16080.0,16080.0


In [22]:
# from LA to out
out_extra_df = od_extra_df[od_extra_df.d_sp_id.isnull()].groupby('o_sp_id', as_index=False).sum()
out_extra_df['o_sp_id'] = out_extra_df['o_sp_id'].astype(int).astype(str)
out_extra_df = out_extra_df[['o_sp_id', 'HBO', 'NHB', 'tot']]
out_extra_df['ntrips'] = out_extra_df['tot'] 
out_extra_df.head()

Unnamed: 0,o_sp_id,HBO,NHB,tot,ntrips
0,343071,1760.0,1100.0,3420.0,3420.0
1,343072,11540.0,8160.0,22320.0,22320.0
2,343073,3220.0,2360.0,6280.0,6280.0
3,343074,8400.0,3000.0,14140.0,14140.0
4,343075,8460.0,4080.0,16360.0,16360.0


## Blocks_attract

In [23]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['bid'])[['bid', 'GEOID', 'count']]
blocks2bid_unique_df.head()

Unnamed: 0,bid,GEOID,count
0,452172,6037139301,3
1,459744,6037139301,3
2,465885,6037139301,3
32,452185,6037137501,2
34,466081,6037137501,2


In [24]:
od_bid_groups_df = pd.merge(travel_df[['O_Tract', 'D_Tract', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'O_Tract'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Tract'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Tract', right_on='GEOID').drop(['GEOID'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'D_Tract'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

#od_bid_groups_df = od_bid_groups_df.set_index('d_bid')

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,452040,452040,40.0,20.0,6.666667
1,452040,452062,40.0,0.0,4.444444
2,452040,452072,0.0,20.0,1.111111
3,452040,452130,20.0,0.0,3.333333
4,452040,452172,0.0,20.0,2.222222


In [25]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='LA'
"""

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,343071,453306
1,343071,467996
2,343071,468050
3,343071,458067
4,343071,469727


In [26]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df = attract_df.reset_index()    
attract_df.head()

Unnamed: 0,o_sp_id,attract
0,343071,53640.0
1,343072,91300.0
2,343073,80340.0
3,343074,41720.0
4,343075,106140.0


### Save "other" trips to out and to in

In [27]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'tot', 'NHB']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] 
trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,NHB,ntrips
0,343071,343071,640.0,159.722222
1,343071,343072,0.0,0.0
2,343071,343073,280.0,148.333333
3,343071,343074,0.0,0.0
4,343071,343075,0.0,10.833333


In [28]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy()
trips_attract = trips_attract.rename(columns={'NHB': 'attract'}).drop('o_sp_id', axis=1)
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'}).drop('ntrips', axis=1)
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,343071,151760.0
1,343072,322880.0
2,343073,273160.0
3,343074,167040.0
4,343075,437240.0


In [29]:
trips_attract = pd.concat((trips_attract, in_extra_df.rename(columns={'d_sp_id': 'o_sp_id', 'NHB': 'attract'})[['o_sp_id', 'attract']]))
trips_attract = trips_attract.groupby('o_sp_id', as_index=False).sum()
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,343071,152980.0
1,343072,329720.0
2,343073,275420.0
3,343074,171620.0
4,343075,442920.0


In [30]:
trips_attract = attract_df.reset_index()[['o_sp_id', 'attract']]

In [31]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id][['o_sp_id', 'NHB', 'ntrips']]
trips_out = pd.concat((trips_out, out_extra_df[['o_sp_id', 'NHB', 'ntrips']]))
trips_out = trips_out.groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['NHB'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,nout
0,343071,71788.345238
1,343072,188961.0
2,343073,159490.055556
3,343074,98340.833333
4,343075,320766.5


In [32]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['NHB'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,nin
0,343071,159.722222
1,343072,566.111111
2,343073,525.0
3,343074,344.444444
4,343075,1162.777778


In [33]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,nin,nout
0,343071,53640.0,159.722222,71788.345238
1,343072,91300.0,566.111111,188961.0
2,343073,80340.0,525.0,159490.055556
3,343074,41720.0,344.444444,98340.833333
4,343075,106140.0,1162.777778,320766.5


In [34]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,nin,nout
0,343071,53640.0,159.722222,71788.345238
1,343072,91300.0,566.111111,188961.0
2,343073,80340.0,525.0,159490.055556
3,343074,41720.0,344.444444,98340.833333
4,343075,106140.0,1162.777778,320766.5


In [35]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [38]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, 'LA', 'ego', c.nin, c.nout, c.attract
FROM temptable3 c 
"""

result = engine.execute(text(sql))

### Save OD

In [23]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,181963,181964,181965,181966,181967,181968,181969,181970,181971,181972,...,184460,184461,184462,184463,184464,184465,184466,184467,184468,184469
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
181963,77.777778,13.888889,72.222222,68.888889,102.222222,31.333333,11.111111,60.0,146.666667,5.777778,...,11.111111,6.666667,18.888889,6.666667,6.666667,11.111111,20.0,30.0,13.333333,6.666667
181964,13.888889,84.027778,29.722222,48.888889,17.777778,16.666667,60.0,7.222222,58.333333,3.888889,...,5.0,15.0,17.222222,18.055556,11.666667,5.0,20.0,15.833333,18.333333,15.0
181965,54.444444,30.0,297.777778,394.444444,70.555556,75.0,42.222222,40.0,213.333333,17.777778,...,0.0,20.0,0.0,10.0,23.333333,10.0,33.333333,0.0,18.333333,30.0
181966,57.777778,69.166667,437.777778,544.444444,72.222222,81.333333,102.222222,30.0,331.666667,19.111111,...,3.333333,13.333333,0.0,15.0,25.0,13.333333,58.333333,0.0,33.333333,23.333333
181967,120.0,16.944444,62.777778,62.777778,222.222222,48.888889,27.777778,131.111111,290.0,3.333333,...,4.444444,0.0,18.333333,10.0,3.333333,11.111111,13.333333,22.777778,10.0,6.666667


In [24]:
ODs_matrix_df['city'] = 'LA'

In [25]:
ODs_matrix_df.to_csv('../../data/generated_files/LA_ODs.csv')

### Ambient population

In [5]:
sql = """
SELECT b.original_id, bid, sp_id
FROM blocks_group b
INNER JOIN spatial_groups as sp on b.bid = sp.core_id
WHERE b.city='LA'
"""

blocks2coreid_df = pd.read_sql(sql, engine)
blocks2coreid_df['GEOID'] = blocks2coreid_df['original_id'].str[0:11]
blocks2coreid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID
0,60372753113,235767,182084,6037275311
1,60372270202,238114,184431,6037227020
2,60372932022,235652,181969,6037293202
3,60372941203,235653,181970,6037294120
4,60371892012,236061,182378,6037189201


In [9]:
njoins_coreid_df = blocks2coreid_df[['bid', 'GEOID']].drop_duplicates().groupby('GEOID').size().to_frame('count').reset_index()
njoins_coreid_df.head()

Unnamed: 0,GEOID,count
0,6037101110,3
1,6037101122,2
2,6037101210,2
3,6037101220,2
4,6037101300,4


In [10]:
blocks2coreid_df = pd.merge(blocks2coreid_df, njoins_coreid_df, on='GEOID')
blocks2coreid_df.head()

Unnamed: 0,original_id,bid,sp_id,GEOID,count
0,60372753113,235767,182084,6037275311,4
1,60372753112,235780,182097,6037275311,4
2,60372753114,235827,182144,6037275311,4
3,60372753111,235915,182232,6037275311,4
4,60372270202,238114,184431,6037227020,2


In [11]:
ambient_df = pd.read_csv('../../data/LA/mobile-phone/hourly_stay_LA.csv', dtype={'tract': str})
ambient_df[ambient_df.tract == '06037101110'].head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,16,17,18,19,20,21,22,23,lon,lat
869,6037101110,5002.0,4954.0,4924.0,4900.0,4874.0,4834.0,4716.0,4330.0,3606.0,...,3612.0,3886.0,4212.0,4314.0,4454.0,4420.0,4414.0,4410.0,-118.292987,34.259474


In [12]:
blocks2coreid_unique_df = blocks2coreid_df.drop_duplicates(subset=['GEOID', 'sp_id'])[['GEOID', 'sp_id', 'count']]
blocks2coreid_unique_df.head()

Unnamed: 0,GEOID,sp_id,count
0,6037275311,182084,4
1,6037275311,182097,4
2,6037275311,182144,4
3,6037275311,182232,4
4,6037227020,184431,2


In [13]:
ambient_sp_id_df = pd.merge(ambient_df, blocks2coreid_df[['GEOID', 'count', 'bid']].drop_duplicates(subset=['GEOID', 'bid', 'count']).rename(columns={'GEOID': 'tract'}), on='tract')

columns = [str(x) for x in range(0,24)]
for c in columns:
    ambient_sp_id_df.loc[:, c] = ambient_sp_id_df.loc[:, c]/ambient_sp_id_df['count']

ambient_sp_id_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,count,bid
0,6037183810,1175.0,1169.0,1151.0,1150.0,1146.0,1137.0,1148.0,1209.0,1213.0,...,1153.0,1173.0,1165.0,1160.0,1152.0,1139.0,-118.19785,34.107365,2,237227
1,6037183810,1175.0,1169.0,1151.0,1150.0,1146.0,1137.0,1148.0,1209.0,1213.0,...,1153.0,1173.0,1165.0,1160.0,1152.0,1139.0,-118.19785,34.107365,2,237296
2,6037137301,2138.0,2126.0,2104.0,2106.0,2096.0,2098.0,2100.0,2094.0,2084.0,...,1894.0,1880.0,1874.0,1930.0,1960.0,1936.0,-118.634042,34.176253,1,237157
3,6037137302,2289.0,2265.0,2241.0,2234.0,2229.0,2227.0,2206.0,2003.0,1912.0,...,1868.0,1883.0,1875.0,1860.0,1999.0,1985.0,-118.654196,34.177348,2,237098
4,6037137302,2289.0,2265.0,2241.0,2234.0,2229.0,2227.0,2206.0,2003.0,1912.0,...,1868.0,1883.0,1875.0,1860.0,1999.0,1985.0,-118.654196,34.177348,2,237204


In [14]:
ambient_sp_id_df = ambient_sp_id_df.groupby('bid', as_index=False).sum()
ambient_sp_id_df['ambient_avg'] = ambient_sp_id_df[[str(x) for x in range(0,24)]].mean(axis=1)
ambient_sp_id_df.head()

Unnamed: 0,bid,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,count,ambient_avg
0,235646,989.333333,982.666667,978.666667,969.333333,967.333333,966.666667,984.666667,995.333333,1044.666667,...,1003.333333,986.0,966.0,959.333333,948.0,940.666667,-118.260391,33.797993,3,1009.055556
1,235647,983.0,983.0,986.0,984.0,987.0,995.5,1080.5,1270.5,1629.0,...,1274.5,1194.5,1183.0,1170.0,1144.5,1100.0,-118.302154,33.848473,4,1430.916667
2,235648,2747.0,2713.0,2679.0,2671.0,2652.0,2631.0,2589.0,2450.0,2144.0,...,2457.0,2482.0,2452.0,2450.0,2449.0,2442.0,-118.289893,33.794128,2,2304.291667
3,235649,1256.0,1251.0,1247.0,1237.0,1234.0,1237.0,1258.0,1424.0,1587.0,...,1513.0,1323.0,1310.0,1279.0,1273.0,1254.0,-118.304315,33.792617,2,1435.541667
4,235650,796.666667,794.666667,792.666667,790.0,790.0,816.0,864.0,1079.333333,1218.0,...,823.333333,803.333333,798.666667,780.666667,786.0,789.333333,-118.247927,33.78747,3,991.25


In [15]:
ambient_sp_id_df[['bid', 'ambient_avg']].to_sql('temptable3', engine, if_exists='replace', index=False)

In [16]:
sql = """
INSERT INTO ambient_population (bid, city, num_people) 
SELECT c.bid, 'LA', c.ambient_avg
FROM temptable3 c 
"""

result = engine.execute(text(sql))