In [1]:
import geopandas as gpd
import pandas as pd
from h3 import h3
from sqlalchemy import create_engine
from shapely.geometry import Polygon, Point
import numpy as np
import psycopg2

In [2]:
def h3_from_row(row,res,x,y):
    '''
    Esta funcion toma una fila, un nivel de resolucion de h3
    y los nombres que contienen las coordenadas xy
    y devuelve un id de indice h3
    '''
    return h3.geo_to_h3(row[y],row[x],res = res)

def h3_indexing(df,res_list,lat='LATITUDE',lon='LONGITUDE'):
    """
    Esta funcion toma una tabla con dos pares de coordenadas para origen y destino
    Un nivel de resolucion h3
    y devuelve la tabla con los ids de h3
    """
    
    if len(res_list) == 1:
        res_list.append(res_list[0])
        
    for res in range(res_list[0],res_list[1] + 1):
        df['h3_res_' + str(res)] = df.apply(h3_from_row, axis=1, args=[res,lon,lat])

    return df

def vertices_cada_Xmetros(geom,metros):
    n_puntos = int((geom.length/metros)+1)
    percentiles = np.linspace(0,geom.length,n_puntos)
    return [geom.interpolate(percentil,normalized=False) for percentil in percentiles]

def convertir_recorridos_buses_paradas(fila,metros=400):
    paradas = vertices_cada_Xmetros(fila.geometry,metros = metros)
    paradas = gpd.GeoSeries(paradas).map(lambda g: Point(g.coords[0][0:2]))
    crs = 'EPSG:3857'
    gdf = gpd.GeoDataFrame(np.repeat(fila.LINEA,len(paradas)),crs=crs,geometry=paradas)
    gdf.columns=['LINEA','geometry']
    return gdf

In [3]:
DB_USERNAME = 'sube_user'
DB_PASSWORD = 'sube_pass'
DB_HOST = 'localhost'
DB_PORT = '5432'
DB_NAME = 'sube'
DB_SCHEMA = 'public'

In [4]:
# Conectar a la db
conn = psycopg2.connect(user = DB_USERNAME,
                                      password = DB_PASSWORD,
                                      host = DB_HOST,
                                      port = DB_PORT,
                                      database = DB_NAME)

In [5]:
# traer modos
sql = """
select distinct t."LINEA"
from public.trx t
where t."MEDIO" = 'SUB'
"""
lineas_subte_trx = pd.read_sql(sql, conn)

In [6]:
lineas_subte_trx

Unnamed: 0,LINEA
0,LINEA A
1,LINEA E
2,LINEA C
3,SUBTE
4,LINEA D
5,LINEA H
6,LINEA PREMETRO
7,LINEA B


In [7]:
subte = gpd.read_file('../carto/insumos/subterraneo-estaciones')
subte = subte.reindex(columns = ['LINEA','geometry'])

premetro = pd.read_csv('../carto/insumos/estaciones-premetro.csv')
premetro = gpd.GeoDataFrame(
    premetro, geometry=gpd.points_from_xy(premetro['long'], premetro.lat),crs='EPSG:4326')
premetro = premetro.reindex(columns = ['linea','geometry'])
premetro.columns = ['LINEA','geometry']

subte = pd.concat([subte,premetro])
subte['MEDIO'] = 'SUB'
#todas las lineas de subte se van a llamar subte porque puedo tener cualquier estacion como destino
#, sin importar la linea. A los efectos del algoritmo, el SUBTE es una unica linea
subte['LINEA'] = 'SUBTE'

In [10]:
# traer modos
sql = """
select distinct t."LINEA"
from public.trx t
where t."MEDIO" = 'TRE'
"""
lineas_ffcc_trx = pd.read_sql(sql, conn)
lineas_ffcc_trx

Unnamed: 0,LINEA
0,FERROVIAS S.A.
1,SOFSE- SARMIENTO
2,SOFSE - Mitre
3,SOFSE - San Martin
4,METROVIAS S.A. (URQUIZA)
5,SOFSE - Roca
6,SOFSE - Belgrano Sur


In [11]:
ffcc = gpd.read_file('../carto/insumos/rmba-ferrocarril-estaciones/')
ffcc = ffcc.loc[ffcc.Tipo=='Estación',['Línea','geometry']]
ffcc.columns = ['LINEA','geometry']
ffcc['MEDIO'] = 'TRE'

In [12]:
ffcc.LINEA.unique()

array(['Mitre', 'Sarmiento', 'Roca', 'Belgrano Sur', 'San Martín',
       'Belgrano Norte', 'Urquiza'], dtype=object)

In [13]:
ffcc_equivalencias = {'Mitre':'SOFSE - Mitre',
                      'Sarmiento':'SOFSE- SARMIENTO',
                      'Roca':'SOFSE - Roca',
                      'Belgrano Sur':'SOFSE - Belgrano Sur',
                      'San Martín':'SOFSE - San Martin',
                      'Belgrano Norte':'FERROVIAS S.A.',
                      'Urquiza':'METROVIAS S.A. (URQUIZA)'}

In [14]:
ffcc.LINEA = ffcc.LINEA.replace(ffcc_equivalencias)

In [15]:
lineas_ffcc_trx.isin(ffcc.LINEA.unique())

Unnamed: 0,LINEA
0,True
1,True
2,True
3,True
4,True
5,True
6,True


## Buses

In [16]:
# nacionales
bus_nac = gpd.read_file('../carto/insumos/lineas-nacionales/')
bus_nac = bus_nac.reindex(columns = ['LINEA','geometry'])
bus_nac = bus_nac.to_crs('EPSG:3857')

paradas_bus_nac = pd.concat([convertir_recorridos_buses_paradas(fila) for i,fila in bus_nac.iterrows()])
paradas_bus_nac.crs = 'EPSG:3857'
paradas_bus_nac = paradas_bus_nac.to_crs('EPSG:4326')
paradas_bus_nac['MEDIO'] = 'COL'

# provinciales
bus_prov = gpd.read_file('../carto/insumos/lineas-provinciales/')
bus_prov = bus_prov.reindex(columns = ['LINEA','geometry'])
bus_prov = bus_prov.to_crs('EPSG:3857')

paradas_bus_prov = pd.concat([convertir_recorridos_buses_paradas(fila) for i,fila in bus_prov.iterrows()])
paradas_bus_prov.crs = 'EPSG:3857'
paradas_bus_prov = paradas_bus_prov.to_crs('EPSG:4326')
paradas_bus_prov['MEDIO'] = 'COL'
paradas_bus_prov.head()

# municipales
bus_muni = gpd.read_file('../carto/insumos/lineas-municipales/')
bus_muni = bus_muni.reindex(columns = ['LINEA','geometry'])
bus_muni = bus_muni.to_crs('EPSG:3857')

paradas_bus_muni = pd.concat([convertir_recorridos_buses_paradas(fila) for i,fila in bus_muni.iterrows()])
paradas_bus_muni.crs = 'EPSG:3857'
paradas_bus_muni = paradas_bus_muni.to_crs('EPSG:4326')
paradas_bus_muni['MEDIO'] = 'COL'

In [17]:
bus = pd.concat([paradas_bus_muni,paradas_bus_prov,paradas_bus_nac])

In [18]:
bus['LINEA'] = 'LINEA '+bus['LINEA'].map(str)

In [19]:
# traer modos
sql = """
select distinct t."LINEA"
from public.trx t
where t."MEDIO" = 'COL'
"""
lineas_bus_trx = pd.read_sql(sql, conn)
lineas_bus_trx

Unnamed: 0,LINEA
0,LINEA 56
1,LINEA 34
2,LINEA 532
3,LINEA 511B
4,LINEA 26
...,...
378,LINEA 175
379,LINEA 84
380,LINEA 278
381,LINEA_NORTE MUNICIPAL


In [20]:
#faltan 1/3
lineas_faltantes = lineas_bus_trx[~lineas_bus_trx.isin(bus.LINEA.unique()).values]
len(lineas_faltantes)

111

In [21]:
#separar las municipales y ver si se puede usar alguna forma de detectar a que linea pertenecen

In [22]:
no_500 = lineas_faltantes.LINEA.map(lambda s: s[:7] != 'LINEA 5')
si_500 = ~no_500

In [23]:
lineas_faltantes[si_500].head()

Unnamed: 0,LINEA
2,LINEA 532
3,LINEA 511B
7,LINEA 505A
15,LINEA 561B
27,LINEA 503A BS AS


In [24]:
lineas_faltantes[no_500].head()

Unnamed: 0,LINEA
16,LINEA 202
19,LINEA 174 BS AS
23,LINEA 355
28,LINEA 288
43,LINEA 237A


In [25]:
paradas = pd.concat([bus,ffcc,subte])
paradas.head()

Unnamed: 0,LINEA,geometry,MEDIO
0,LINEA 501,POINT (-58.38407 -34.88737),COL
1,LINEA 501,POINT (-58.38771 -34.88767),COL
2,LINEA 501,POINT (-58.39012 -34.88754),COL
3,LINEA 501,POINT (-58.39024 -34.89055),COL
4,LINEA 501,POINT (-58.39197 -34.89216),COL


In [26]:
paradas.shape

(178178, 3)

In [30]:
paradas.to_file('../carto/paradas.geojson',driver='GeoJSON')

In [31]:
paradas['LATITUDE'] = paradas.geometry.y
paradas['LONGITUDE'] = paradas.geometry.x
paradas.drop('geometry',axis=1,inplace=True)

In [32]:
conn.close()

In [33]:
engine = create_engine('postgresql://{}:{}@{}:{}/{}'
    .format(DB_USERNAME, DB_PASSWORD, DB_HOST,
            DB_PORT, DB_NAME))

In [34]:
%time h3_paradas = h3_indexing(paradas.copy(),res_list = [5,12])

CPU times: user 37.9 s, sys: 144 ms, total: 38 s
Wall time: 38 s


In [35]:
h3_paradas.head()

Unnamed: 0,LINEA,MEDIO,LATITUDE,LONGITUDE,h3_res_5,h3_res_6,h3_res_7,h3_res_8,h3_res_9,h3_res_10,h3_res_11,h3_res_12
0,LINEA 501,COL,-34.887373,-58.384068,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e39945fffff,89c2e399453ffff,8ac2e39945affff,8bc2e39945adfff,8cc2e39945ad1ff
1,LINEA 501,COL,-34.88767,-58.387713,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e39945fffff,89c2e3994cfffff,8ac2e3994cdffff,8bc2e3994cdbfff,8cc2e3994cdb1ff
2,LINEA 501,COL,-34.887542,-58.390125,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e39941fffff,89c2e39941bffff,8ac2e3994197fff,8bc2e39941b3fff,8cc2e3994194bff
3,LINEA 501,COL,-34.890546,-58.390244,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e3994dfffff,89c2e3994c7ffff,8ac2e3994c77fff,8bc2e3994c76fff,8cc2e3994c763ff
4,LINEA 501,COL,-34.892161,-58.39197,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e3994dfffff,89c2e3994d7ffff,8ac2e3994d47fff,8bc2e3994d45fff,8cc2e3994d45bff


In [38]:
h3_paradas.to_csv('../data/h3_paradas.csv',index=False)

In [42]:
h3_paradas.to_sql('paradas', engine, schema=DB_SCHEMA,method='multi')

In [43]:
h3_paradas = h3_paradas.reindex(columns = ['LINEA','MEDIO','h3_res_11'])
h3_paradas = h3_paradas.drop_duplicates()

In [44]:
# plotear el mapa de hexagrillas de paradas
lista_indices_global = h3_paradas.h3_res_11.unique()
geo_df = gpd.GeoDataFrame(lista_indices_global,
                          geometry = [Polygon(h3.h3_to_geo_boundary(h3_address=h, geo_json=True)
                                                                   ) for h in lista_indices_global],
                          crs = 'EPSG:4326')
geo_df.columns=['h3_index','geometry']
geo_df = geo_df.merge(h3_paradas,left_on = 'h3_index',right_on = 'h3_res_11',how='inner')
geo_df.to_file('../carto/carto_paradas_hex_res_11.geojson',driver='GeoJSON')

geo_df.head()

Unnamed: 0,h3_index,geometry,LINEA,MEDIO,h3_res_11
0,8bc2e39945adfff,"POLYGON ((-58.38390 -34.88754, -58.38375 -34.8...",LINEA 501,COL,8bc2e39945adfff
1,8bc2e39945adfff,"POLYGON ((-58.38390 -34.88754, -58.38375 -34.8...",LINEA 506,COL,8bc2e39945adfff
2,8bc2e3994cdbfff,"POLYGON ((-58.38767 -34.88785, -58.38752 -34.8...",LINEA 501,COL,8bc2e3994cdbfff
3,8bc2e3994cdbfff,"POLYGON ((-58.38767 -34.88785, -58.38752 -34.8...",LINEA 506,COL,8bc2e3994cdbfff
4,8bc2e39941b3fff,"POLYGON ((-58.39017 -34.88783, -58.39002 -34.8...",LINEA 501,COL,8bc2e39941b3fff
