In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys

In [2]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [3]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [4]:
CITY = 'bogota'
SPATIAL_NAME = 'ego'

## ODs

In [5]:
tracts_gdf = gpd.read_file('../../data/bogota/mobile-phone/bogota_barrios_cadastrales.geojson')
tracts_gdf = tracts_gdf[['geometry', 'OBJECTID']]
tracts_gdf.head()

Unnamed: 0,geometry,OBJECTID
0,"POLYGON ((-74.07088 4.83141, -74.07087 4.83138...",1
1,"POLYGON ((-74.05806 4.82926, -74.05781 4.82925...",2
2,"POLYGON ((-74.03441 4.82510, -74.03432 4.82544...",3
3,"POLYGON ((-74.09394 4.79092, -74.09387 4.79088...",4
4,"POLYGON ((-74.04113 4.78365, -74.04077 4.78343...",5


In [6]:
ins_gdf = process_geometry_SQL_insert(tracts_gdf)
ins_gdf.to_sql('temptable', engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [7]:
sql = """
DROP TABLE IF EXISTS temptable2;
CREATE TABLE temptable2 AS 
SELECT oid, bid, sp_id
FROM (
    SELECT oid, bid, sp_id, ROW_NUMBER() OVER (PARTITION BY bid, sp_id ORDER BY area DESC) AS r
    from (
        SELECT ST_Area(ST_Intersection(s.geom, b.geom)::geography) as area, b.\"OBJECTID\" as oid, s.bid, sp.sp_id
        FROM blocks_group as s
        INNER JOIN temptable as b on ST_Intersects(s.geom, b.geom) AND NOT ST_Touches(s.geom, b.geom)
        INNER JOIN spatial_groups as sp on s.bid = ANY(sp.lower_ids)
        WHERE s.city = '{city}' and sp.spatial_name = '{spname}'
        ) as dtable
    order by area
) x
WHERE x.r = 1;
""".format(city=CITY, spname=SPATIAL_NAME)

result = engine.execute(text(sql))

In [8]:
sql = """
SELECT oid::text, bid::text, sp_id::text FROM temptable2
"""

blocks2spid_df = pd.read_sql(sql, engine)
njoins_df = blocks2spid_df[['bid', 'oid']].drop_duplicates().groupby('oid').size().to_frame('count').reset_index()
blocks2spid_df = pd.merge(blocks2spid_df, njoins_df, on='oid')
blocks2spid_df.head()

Unnamed: 0,oid,bid,sp_id,count
0,868,500920,349139,1
1,868,500920,349113,1
2,868,500920,349093,1
3,868,500920,349058,1
4,868,500920,349490,1


In [9]:
blocks2spid_unique_df = blocks2spid_df.drop_duplicates(subset=['oid', 'sp_id'])[['oid', 'sp_id', 'count']]
blocks2spid_unique_df.head()

Unnamed: 0,oid,sp_id,count
0,868,349139,1
1,868,349113,1
2,868,349093,1
3,868,349058,1
4,868,349490,1


In [10]:
len(set(blocks2spid_df.bid.values))

918

In [11]:
zip_file = ZipFile('../../data/bogota/mobile-phone/travel_demand_Bogota.csv.zip')
zip_file.infolist()

[<ZipInfo filename='travel_demand_Bogota.csv' compress_type=deflate filemode='-rw-r--r--' external_attr=0x4000 file_size=150133682 compress_size=23376029>]

In [12]:
types = {str(x): np.float32 for x in range(0,24)}
types['O_Block'] = str
types['D_Block'] = str

travel_df = pd.read_csv(zip_file.open('travel_demand_Bogota.csv'), dtype=types)
travel_df = travel_df.drop(['lon1', 'lat1', 'lon2', 'lat2'], axis=1)
travel_df['tot'] = travel_df[[str(x) for x in range(0,24)]].sum(axis=1)

travel_df.head()

Unnamed: 0,O_Block,D_Block,HBW,HBO,NHB,0,1,2,3,4,...,15,16,17,18,19,20,21,22,23,tot
0,103,290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,253,415,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,461,224,0.0,0.0,3.075779,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.53789,0.0,0.0,0.0,0.0,3.075779
3,268,494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,788,695,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
len(blocks2spid_unique_df)

26738

In [14]:
# Barrios to blockgroup
od_sp_groups_df = pd.merge(travel_df[['O_Block', 'D_Block', 'HBO', 'NHB', 'tot']], blocks2spid_unique_df.rename(columns={'sp_id': 'o_sp_id'}), left_on='O_Block', right_on='oid').drop(['oid'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'O_Block'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'D_Block'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,D_Block,HBO,NHB,tot
0,349010,1,172.24365,1.53789,173.781544
1,349010,10,1.53789,0.0,1.53789
2,349010,100,0.0,0.0,0.0
3,349010,101,0.0,0.0,38.447243
4,349010,102,50.750361,69.205038,1018.083009


In [15]:
len(od_sp_groups_df), len(travel_df)

(784890, 731025)

In [16]:
od_sp_groups_df = pd.merge(od_sp_groups_df, blocks2spid_unique_df.rename(columns={'sp_id': 'd_sp_id'}), left_on='D_Block', right_on='oid').drop(['oid'], axis=1)
od_sp_groups_df.loc[:, 'tot'] = od_sp_groups_df['tot'] / od_sp_groups_df['count']
od_sp_groups_df = od_sp_groups_df.drop(['count', 'D_Block'], axis=1)
od_sp_groups_df = od_sp_groups_df.groupby(['o_sp_id', 'd_sp_id'], as_index=False).sum()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,349010,349010,22626.971628,39785.20737,97611.399663
1,349010,349011,10494.559532,24137.179345,50356.661885
2,349010,349012,15737.225629,28990.75934,65604.069871
3,349010,349013,17442.745342,32463.314355,77386.611961
4,349010,349014,14982.12177,30344.102304,65764.779286


In [17]:
all_sp_ids = sorted([str(x) for x in list(set(blocks2spid_df.sp_id.values))])

### Fix missing links

In [18]:
import itertools
tuples = list(itertools.product(all_sp_ids, all_sp_ids))

od_sp_groups_df['o_sp_id'] = od_sp_groups_df['o_sp_id'].astype(str)
od_sp_groups_df['d_sp_id'] = od_sp_groups_df['d_sp_id'].astype(str)
od_sp_groups_df = od_sp_groups_df.set_index(['o_sp_id', 'd_sp_id']).reindex(tuples).fillna(0).reset_index()
od_sp_groups_df.head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
0,349010,349010,22626.971628,39785.20737,97611.399663
1,349010,349011,10494.559532,24137.179345,50356.661885
2,349010,349012,15737.225629,28990.75934,65604.069871
3,349010,349013,17442.745342,32463.314355,77386.611961
4,349010,349014,14982.12177,30344.102304,65764.779286


In [19]:
#Tot 0 ?
od_sp_groups_df[od_sp_groups_df.tot == 0].head()

Unnamed: 0,o_sp_id,d_sp_id,HBO,NHB,tot
272,349010,349284,0.0,0.0,0.0
600,349010,349615,0.0,0.0,0.0
601,349010,349616,0.0,0.0,0.0
1190,349011,349284,0.0,0.0,0.0
1518,349011,349615,0.0,0.0,0.0


## Blocks_attract

In [20]:
blocks2bid_unique_df = blocks2spid_df.drop_duplicates(subset=['oid', 'bid'])[['oid', 'bid', 'count']]
blocks2bid_unique_df.head()

Unnamed: 0,oid,bid,count
0,868,500920,1
29,873,500923,2
48,873,500459,2
65,879,500283,1
95,922,500736,2


In [21]:
od_bid_groups_df = pd.merge(travel_df[['O_Block', 'D_Block', 'HBO', 'NHB', 'tot']], blocks2bid_unique_df.rename(columns={'bid': 'o_bid'}), left_on='O_Block', right_on='oid').drop(['oid'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'O_Block'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'D_Block'], as_index=False).sum()

od_bid_groups_df = pd.merge(od_bid_groups_df, blocks2bid_unique_df.rename(columns={'bid': 'd_bid'}), left_on='D_Block', right_on='oid').drop(['oid'], axis=1)
od_bid_groups_df.loc[:, 'tot'] = od_bid_groups_df['tot'] / od_bid_groups_df['count']
od_bid_groups_df = od_bid_groups_df.drop(['count', 'D_Block'], axis=1)
od_bid_groups_df = od_bid_groups_df.groupby(['o_bid', 'd_bid'], as_index=False).sum()

#od_bid_groups_df = od_bid_groups_df.set_index('d_bid')

od_bid_groups_df.head()

Unnamed: 0,o_bid,d_bid,HBO,NHB,tot
0,500248,500248,0.0,0.0,0.0
1,500248,500249,1.53789,0.0,1.53789
2,500248,500250,3.075779,1.53789,4.613669
3,500248,500251,0.0,0.0,0.0
4,500248,500252,1.53789,0.0,1.53789


In [22]:
sql = """
SELECT sp_id::text, unnest(lower_ids)::text as bid FROM spatial_groups where city='{city}' and spatial_name='{spname}'
""".format(city=CITY, spname=SPATIAL_NAME)

blocks_spatial_df = pd.read_sql(sql, engine)
blocks_spatial_df.head()

Unnamed: 0,sp_id,bid
0,349010,500664
1,349010,500565
2,349010,500268
3,349010,501110
4,349010,500256


In [23]:
attract_df = od_sp_groups_df[['o_sp_id']].drop_duplicates().set_index('o_sp_id')

attract_df['attract'] = 0.
for i, spid in enumerate(attract_df.index.values):
    bids = blocks_spatial_df[blocks_spatial_df.sp_id == spid]['bid'].values
    
    s = od_bid_groups_df[(od_bid_groups_df.d_bid.isin(bids)) & (~(od_bid_groups_df.o_bid.isin(bids)))]['NHB'].sum()
    attract_df.loc[spid, 'attract'] = s

attract_df.head()

Unnamed: 0_level_0,attract
o_sp_id,Unnamed: 1_level_1
349010,309486.467579
349011,144034.13864
349012,194429.247271
349013,224396.566591
349014,259563.491095


### Save "other" trips to out and to in

In [24]:
trips_other = od_sp_groups_df[['o_sp_id', 'd_sp_id', 'tot', 'NHB']].copy() #[od_sp_groups_df.o_sp_id == od_sp_groups_df.d_sp_id]
trips_other['ntrips'] = trips_other['tot'] #+ trips_other['NHB']
#trips_other = trips_other.drop(['tot'], axis=1)
trips_other.head()

Unnamed: 0,o_sp_id,d_sp_id,tot,NHB,ntrips
0,349010,349010,97611.399663,39785.20737,97611.399663
1,349010,349011,50356.661885,24137.179345,50356.661885
2,349010,349012,65604.069871,28990.75934,65604.069871
3,349010,349013,77386.611961,32463.314355,77386.611961
4,349010,349014,65764.779286,30344.102304,65764.779286


In [25]:
trips_attract = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].copy()
trips_attract['attract'] = trips_other['NHB']
trips_attract = trips_attract.groupby('d_sp_id', as_index=False).sum()
trips_attract = trips_attract.drop(['tot'], axis=1)
trips_attract = trips_attract.rename(columns={'d_sp_id': 'o_sp_id'})[['o_sp_id', 'attract']]
trips_attract.head()

Unnamed: 0,o_sp_id,attract
0,349010,10496600.0
1,349011,4722819.0
2,349012,6464138.0
3,349013,7592519.0
4,349014,8437372.0


In [26]:
trips_attract = attract_df.reset_index()[['o_sp_id', 'attract']]

In [27]:
trips_out = trips_other[trips_other.o_sp_id != trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_out = trips_out.rename(columns={'ntrips': 'nout'})
trips_out = trips_out.drop(['NHB'], axis=1)
trips_out.head()

Unnamed: 0,o_sp_id,tot,nout
0,349010,16594870.0,16594870.0
1,349011,7284610.0,7284610.0
2,349012,10143020.0,10143020.0
3,349013,11989160.0,11989160.0
4,349014,11953470.0,11953470.0


In [28]:
trips_in = trips_other[trips_other.o_sp_id == trips_other.d_sp_id].groupby('o_sp_id', as_index=False).sum()
trips_in = trips_in.rename(columns={'ntrips': 'nin'})
trips_in = trips_in.drop(['NHB'], axis=1)
trips_in.head()

Unnamed: 0,o_sp_id,tot,nin
0,349010,97611.399663,97611.399663
1,349011,30362.557537,30362.557537
2,349012,49884.529948,49884.529948
3,349013,64933.549923,64933.549923
4,349014,54908.046788,54908.046788


In [29]:
df_all = pd.merge(trips_in, trips_out, on='o_sp_id')
df_all = pd.merge(trips_attract, df_all, on='o_sp_id')
df_all.head()

Unnamed: 0,o_sp_id,attract,tot_x,nin,tot_y,nout
0,349010,309486.467579,97611.399663,97611.399663,16594870.0,16594870.0
1,349011,144034.13864,30362.557537,30362.557537,7284610.0,7284610.0
2,349012,194429.247271,49884.529948,49884.529948,10143020.0,10143020.0
3,349013,224396.566591,64933.549923,64933.549923,11989160.0,11989160.0
4,349014,259563.491095,54908.046788,54908.046788,11953470.0,11953470.0


In [30]:
df_all.to_sql('temptable3', engine, if_exists='replace', index=False)

In [31]:
sql = """
INSERT INTO spatial_groups_trips (sp_id, city, spatial_name, num_Otrips_in, num_Otrips_out, attract) 
SELECT c.o_sp_id::int, '{city}', '{spname}', c.nin, c.nout, c.attract
FROM temptable3 c 
""".format(city=CITY, spname=SPATIAL_NAME)

result = engine.execute(text(sql))

### Save OD

In [32]:
ODs_matrix_df = od_sp_groups_df.copy()
ODs_matrix_df = ODs_matrix_df.pivot(index='o_sp_id', columns='d_sp_id', values='tot')
ODs_matrix_df.head()

d_sp_id,349010,349011,349012,349013,349014,349015,349016,349017,349018,349019,...,349925,349926,349927,349928,349929,349930,349931,349932,349933,349934
o_sp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
349010,97611.399663,50356.661885,65604.069871,77386.611961,65764.779286,83241.358116,94618.666418,85522.048486,79560.41877,85642.004187,...,2036.934958,2401.41481,4552.922555,2409.488734,8.458394,19793.025327,1666.303531,4602.134979,71265.041822,6076.202304
349011,51926.078269,30362.557537,39714.465127,45436.183843,40067.4108,49801.483782,54496.661036,45776.057261,38573.350608,46945.622591,...,1165.720428,1099.591156,1354.111914,878.135016,1.53789,6942.034305,486.742113,2545.207471,39216.957644,1257.993804
349012,65888.579172,38690.230424,49884.529948,56933.447505,51671.55782,62286.07266,69770.981919,61337.194548,53117.173762,61601.711804,...,1322.585176,1590.177967,2076.920081,2051.544902,4.613669,10212.356816,718.963457,3271.091432,54247.523005,2181.496549
349013,77621.14001,44194.33779,56895.000358,64933.549923,57875.405013,70606.056153,79218.238749,70852.887469,62301.4514,70730.625451,...,1352.574027,1908.521143,2800.49719,2303.374349,4.613669,12150.482329,1145.727853,4103.858724,62795.883191,2976.585553
349014,67105.434493,39872.098793,52738.853337,59019.979475,54908.046788,64492.175555,72558.407095,64466.031212,57180.278369,64792.064087,...,1537.889761,1951.582044,2278.383643,1782.798674,6.536031,13716.43866,1048.071862,3805.50813,61197.24674,2335.66999


In [33]:
ODs_matrix_df['city'] = CITY

In [34]:
ODs_matrix_df.to_csv('../../data/generated_files/{city}_ODs.csv'.format(city=CITY))

In [35]:
weights_df = pd.read_csv('../../data/generated_files/spatial_dmatrix.csv',
                                     names=['o_sp_id', 'd_sp_id', 'city', 'spatial_name', 'w'], dtype={
                    'w': np.float32,
                    'o_sp_id': str,
                    'd_sp_id': str
                })
weights_df = weights_df[(weights_df['spatial_name'] == SPATIAL_NAME)]
weights_df = weights_df.sort_values(['city', 'o_sp_id'])

weights_df = weights_df[weights_df.city == CITY]

weights_df = weights_df.sort_values(['city', 'o_sp_id']).reset_index(drop=True)
weights_df.head()

Unnamed: 0,o_sp_id,d_sp_id,city,spatial_name,w


### Ambient population

In [36]:
sql = """
DROP TABLE IF EXISTS temptable2;
CREATE TABLE temptable2 AS 
SELECT oid, bid
FROM (
    SELECT oid, bid, ROW_NUMBER() OVER (PARTITION BY bid ORDER BY area DESC) AS r
    from (
        SELECT ST_Area(ST_Intersection(s.geom, b.geom)::geography) as area, b.\"OBJECTID\" as oid, s.bid
        FROM blocks_group as s
        INNER JOIN temptable as b on ST_Intersects(s.geom, b.geom) AND NOT ST_Touches(s.geom, b.geom)
        WHERE s.city = '{city}'
        ) as dtable
    order by area
) x
WHERE x.r = 1;
""".format(city=CITY)

result = engine.execute(text(sql))

In [37]:
sql = """
SELECT oid::text, bid::text FROM temptable2
"""

blocks2coreid_df = pd.read_sql(sql, engine)
njoins_coreid_df = blocks2coreid_df[['bid', 'oid']].drop_duplicates().groupby('oid').size().to_frame('count').reset_index()
blocks2coreid_df = pd.merge(blocks2coreid_df, njoins_coreid_df, on='oid')
blocks2coreid_df.head()

Unnamed: 0,oid,bid,count
0,868,500920,1
1,873,500923,2
2,873,500459,2
3,879,500283,1
4,922,500736,2


In [38]:
blocks2coreid_unique_df = blocks2coreid_df.drop_duplicates(subset=['oid', 'bid'])[['oid', 'bid', 'count']]
blocks2coreid_unique_df.head()

Unnamed: 0,oid,bid,count
0,868,500920,1
1,873,500923,2
2,873,500459,2
3,879,500283,1
4,922,500736,2


In [39]:
ambient_df = pd.read_csv('../../data/bogota/mobile-phone/hourly_stay_Bogota_blocks.csv', dtype={'tract': str})
ambient_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,16,17,18,19,20,21,22,23,lon,lat
0,344,7198.861836,7191.172387,7194.248167,7198.861836,7249.612197,7558.728033,9394.968373,15838.726351,21147.521706,...,21132.142809,13851.772817,11349.626223,9251.944628,8873.623754,8064.693755,7835.548185,7623.319402,-74.06162,4.65065
1,345,1333.350398,1336.426177,1334.888287,1333.350398,1348.729295,1730.125949,1928.513724,2460.623571,3277.243019,...,3041.94589,2326.827165,1979.264085,1879.301253,1719.36072,1707.057603,1607.09477,1391.790208,-74.051592,4.650857
2,346,3992.361745,4179.984292,4303.01547,4378.372067,4472.183341,5194.991515,8150.81558,14756.05198,30490.201829,...,29930.409966,17642.671007,13395.019567,11694.113523,11557.241337,11421.90704,11288.110634,11005.138923,-74.10067,4.645555
3,340,3952.376612,3952.376612,3927.770376,3880.095794,3946.225053,4239.961991,7128.118908,8785.96404,11215.829816,...,8636.788736,7311.127786,5278.037561,4815.132751,4592.13874,4239.961991,4212.279976,3680.170129,-74.055013,4.652804
4,341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.53789,1.53789,1.53789,1.53789,0.0,0.0,0.0,0.0,-74.190215,4.649933


In [40]:
ambient_sp_id_df = pd.merge(ambient_df, blocks2coreid_df.rename(columns={'oid': 'tract'}), on='tract')
columns = [str(x) for x in range(0,24)]
for c in columns:
    ambient_sp_id_df.loc[:, c] = ambient_sp_id_df.loc[:, c]/ambient_sp_id_df['count']

ambient_sp_id_df.head()

Unnamed: 0,tract,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,bid,count
0,344,7198.861836,7191.172387,7194.248167,7198.861836,7249.612197,7558.728033,9394.968373,15838.726351,21147.521706,...,11349.626223,9251.944628,8873.623754,8064.693755,7835.548185,7623.319402,-74.06162,4.65065,500877,1
1,345,1333.350398,1336.426177,1334.888287,1333.350398,1348.729295,1730.125949,1928.513724,2460.623571,3277.243019,...,1979.264085,1879.301253,1719.36072,1707.057603,1607.09477,1391.790208,-74.051592,4.650857,500627,1
2,346,3992.361745,4179.984292,4303.01547,4378.372067,4472.183341,5194.991515,8150.81558,14756.05198,30490.201829,...,13395.019567,11694.113523,11557.241337,11421.90704,11288.110634,11005.138923,-74.10067,4.645555,500950,1
3,340,3952.376612,3952.376612,3927.770376,3880.095794,3946.225053,4239.961991,7128.118908,8785.96404,11215.829816,...,5278.037561,4815.132751,4592.13874,4239.961991,4212.279976,3680.170129,-74.055013,4.652804,501094,1
4,342,7834.010295,7778.646265,7723.282235,7690.98655,7675.607653,7643.311969,7597.175277,7497.212444,7321.893015,...,6566.789156,6532.955582,6499.122008,6448.371647,6419.151742,6413.000183,-74.068835,4.652221,500954,1


In [41]:
ambient_sp_id_df = ambient_sp_id_df.groupby('bid', as_index=False).sum()
ambient_sp_id_df['ambient_avg'] = ambient_sp_id_df[[str(x) for x in range(0,24)]].mean(axis=1)
ambient_sp_id_df.head()

Unnamed: 0,bid,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,lon,lat,count,ambient_avg
0,500248,11797.152135,11735.636546,11687.961964,11655.66628,11626.446375,11604.915918,11558.779227,8332.286569,1925.437945,...,10433.043943,10445.34706,10283.868639,10160.83746,10099.321871,10036.268392,-74.175662,4.63164,1,7176.498356
1,500249,31992.720097,31755.885078,31605.171885,31539.042626,31482.140706,31120.736619,28872.341831,24466.287748,19983.339179,...,27637.416376,27988.055235,28609.362686,28580.142781,28495.558846,28640.120481,-74.175715,4.646673,1,24042.919521
2,500250,64969.689623,64503.709034,64200.744757,64036.190555,63908.545707,63624.036107,61906.213276,52242.1142,44546.51398,...,49566.186066,51733.072698,52440.501975,53449.357639,54422.84184,54225.991954,-74.172303,4.641123,1,48861.960726
3,500251,0.0,1.53789,1.53789,1.53789,1.53789,1.53789,1.53789,3.075779,3.075779,...,9.227338,7.689449,7.689449,7.689449,7.689449,7.689449,-74.175329,4.640527,1,36.07633
4,500252,3970.831288,3961.60395,3953.914501,3953.914501,3947.762942,3964.679729,3949.300832,3515.615928,2862.012791,...,3118.840377,3375.667962,3417.190985,3515.615928,3618.65454,3609.427201,-74.16724,4.643535,1,3107.690676


In [42]:
ambient_sp_id_df[['bid', 'ambient_avg']].to_sql('temptable3', engine, if_exists='replace', index=False)

In [43]:
sql = """
INSERT INTO ambient_population (bid, city, num_people) 
SELECT c.bid::int, '{city}', c.ambient_avg
FROM temptable3 c 
""".format(city=CITY)

result = engine.execute(text(sql))