In [1]:
import geopandas as gpd
import pandas as pd
from h3 import h3
from sqlalchemy import create_engine
from shapely.geometry import Polygon, Point
import numpy as np
import psycopg2

In [2]:
def h3_from_row(row,res,x,y):
    '''
    Esta funcion toma una fila, un nivel de resolucion de h3
    y los nombres que contienen las coordenadas xy
    y devuelve un id de indice h3
    '''
    return h3.geo_to_h3(row[y],row[x],res = res)

def h3_indexing(df,res_list,lat='latitude',lon='longitude'):
    """
    Esta funcion toma una tabla con dos pares de coordenadas para origen y destino
    Un nivel de resolucion h3
    y devuelve la tabla con los ids de h3
    """
    
    if len(res_list) == 1:
        res_list.append(res_list[0])
        
    for res in range(res_list[0],res_list[1] + 1):
        df['h3_res_' + str(res)] = df.apply(h3_from_row, axis=1, args=[res,lon,lat])

    return df

def vertices_cada_Xmetros(geom,metros):
    n_puntos = int((geom.length/metros)+1)
    percentiles = np.linspace(0,geom.length,n_puntos)
    return [geom.interpolate(percentil,normalized=False) for percentil in percentiles]

def convertir_recorridos_buses_paradas(fila,metros=400):
    paradas = vertices_cada_Xmetros(fila.geometry,metros = metros)
    paradas = gpd.GeoSeries(paradas).map(lambda g: Point(g.coords[0][0:2]))
    crs = 'EPSG:3857'
    gdf = gpd.GeoDataFrame(np.repeat(fila.linea,len(paradas)),crs=crs,geometry=paradas)
    gdf.columns=['linea','geometry']
    return gdf

In [3]:
DB_USERNAME = 'sube_user'
DB_PASSWORD = 'sube_pass'
DB_HOST = 'localhost'
DB_PORT = '5432'
DB_NAME = 'sube'
DB_SCHEMA = 'public'

In [4]:
# Conectar a la db
conn = psycopg2.connect(user = DB_USERNAME,
                                      password = DB_PASSWORD,
                                      host = DB_HOST,
                                      port = DB_PORT,
                                      database = DB_NAME)

In [5]:
# traer modos
sql = """
select distinct t."LINEA"
from public.trx t
where t."MEDIO" = 'SUB'
"""
lineas_subte_trx = pd.read_sql(sql, conn)

In [6]:
lineas_subte_trx

Unnamed: 0,LINEA
0,LINEA A
1,LINEA E
2,LINEA C
3,SUBTE
4,LINEA D
5,LINEA H
6,LINEA PREMETRO
7,LINEA B


In [7]:
premetro = pd.read_csv('../carto/insumos/estaciones-premetro.csv')
premetro.head()

Unnamed: 0,long,lat,id,linea,nombre
0,-58.461491,-34.643757,1,PREMETRO,INTENDENTE SAGUIER
1,-58.456415,-34.648601,2,PREMETRO,BALBASTRO (Cementerio de Flores)
2,-58.450047,-34.659499,3,PREMETRO,ANA MARÍA JANER
3,-58.446607,-34.662225,4,PREMETRO,FERNANDEZ DE LA CRUZ
4,-58.448513,-34.665368,5,PREMETRO,PTE. ILLIA (Lacarra)


In [8]:
subte = gpd.read_file('../carto/insumos/subterraneo-estaciones')
subte['nombre'] = subte['ESTACION'] + ' | Linea ' + subte.LINEA
subte = subte.reindex(columns = ['LINEA','nombre','geometry'])
subte.columns = ['linea','nombre','geometry']

premetro = pd.read_csv('../carto/insumos/estaciones-premetro.csv')
premetro = gpd.GeoDataFrame(
    premetro, geometry=gpd.points_from_xy(premetro['long'], premetro.lat),crs='EPSG:4326')
premetro['nombre'] = premetro['nombre'] + ' | Linea ' + premetro.linea
premetro = premetro.reindex(columns = ['linea','nombre','geometry'])
premetro.columns = ['linea','nombre','geometry']

subte = pd.concat([subte,premetro])
subte['modo'] = 'SUB'
#todas las lineas de subte se van a llamar subte porque puedo tener cualquier estacion como destino
#, sin importar la linea. A los efectos del algoritmo, el SUBTE es una unica linea
subte['linea'] = 'SUBTE'

In [9]:
# traer modos
sql = """
select distinct t."LINEA"
from public.trx t
where t."MEDIO" = 'TRE'
"""
lineas_ffcc_trx = pd.read_sql(sql, conn)
lineas_ffcc_trx

Unnamed: 0,LINEA
0,FERROVIAS S.A.
1,SOFSE- SARMIENTO
2,SOFSE - Mitre
3,SOFSE - San Martin
4,METROVIAS S.A. (URQUIZA)
5,SOFSE - Roca
6,SOFSE - Belgrano Sur


In [10]:
ffcc = gpd.read_file('../carto/insumos/rmba-ferrocarril-estaciones/')
ffcc['nombre'] = ffcc.ETIQUETA + ' | Linea ' + ffcc['Línea']

ffcc = ffcc.loc[ffcc.Tipo=='Estación',['Línea','nombre','geometry']]
ffcc.columns = ['linea','nombre','geometry']
ffcc['modo'] = 'TRE'

In [11]:
ffcc.linea.unique()

array(['Mitre', 'Sarmiento', 'Roca', 'Belgrano Sur', 'San Martín',
       'Belgrano Norte', 'Urquiza'], dtype=object)

In [12]:
ffcc_equivalencias = {'Mitre':'SOFSE - Mitre',
                      'Sarmiento':'SOFSE- SARMIENTO',
                      'Roca':'SOFSE - Roca',
                      'Belgrano Sur':'SOFSE - Belgrano Sur',
                      'San Martín':'SOFSE - San Martin',
                      'Belgrano Norte':'FERROVIAS S.A.',
                      'Urquiza':'METROVIAS S.A. (URQUIZA)'}

In [13]:
ffcc.linea = ffcc.linea.replace(ffcc_equivalencias)

In [14]:
lineas_ffcc_trx.isin(ffcc.linea.unique())

Unnamed: 0,LINEA
0,True
1,True
2,True
3,True
4,True
5,True
6,True


In [15]:
ffcc.head()

Unnamed: 0,linea,nombre,geometry,modo
0,SOFSE - Mitre,San Martin | Linea Mitre,POINT (-58.53165 -34.57368),TRE
1,SOFSE - Mitre,San Andres | Linea Mitre,POINT (-58.54055 -34.56368),TRE
2,SOFSE - Mitre,Villa Ballester | Linea Mitre,POINT (-58.55608 -34.55091),TRE
5,SOFSE - Mitre,José Leon Suarez | Linea Mitre,POINT (-58.57567 -34.53500),TRE
7,SOFSE - Mitre,Urquiza | Linea Mitre,POINT (-58.48792 -34.57472),TRE


## Buses

In [16]:
# nacionales
bus_nac = gpd.read_file('../carto/insumos/lineas-nacionales/')
bus_nac.rename(columns = {'LINEA':'linea'},inplace=True)
bus_nac = bus_nac.reindex(columns = ['linea','geometry'])
bus_nac = bus_nac.to_crs('EPSG:3857')

paradas_bus_nac = pd.concat([convertir_recorridos_buses_paradas(fila) for i,fila in bus_nac.iterrows()])
paradas_bus_nac.crs = 'EPSG:3857'
paradas_bus_nac = paradas_bus_nac.to_crs('EPSG:4326')
paradas_bus_nac['modo'] = 'COL'

# provinciales
bus_prov = gpd.read_file('../carto/insumos/lineas-provinciales/')
bus_prov.rename(columns = {'LINEA':'linea'},inplace=True)

bus_prov = bus_prov.reindex(columns = ['linea','geometry'])
bus_prov = bus_prov.to_crs('EPSG:3857')

paradas_bus_prov = pd.concat([convertir_recorridos_buses_paradas(fila) for i,fila in bus_prov.iterrows()])
paradas_bus_prov.crs = 'EPSG:3857'
paradas_bus_prov = paradas_bus_prov.to_crs('EPSG:4326')
paradas_bus_prov['modo'] = 'COL'
paradas_bus_prov.head()

# municipales
bus_muni = gpd.read_file('../carto/insumos/lineas-municipales/')
bus_muni.rename(columns = {'LINEA':'linea'},inplace=True)

bus_muni = bus_muni.reindex(columns = ['linea','geometry'])
bus_muni = bus_muni.to_crs('EPSG:3857')

paradas_bus_muni = pd.concat([convertir_recorridos_buses_paradas(fila) for i,fila in bus_muni.iterrows()])
paradas_bus_muni.crs = 'EPSG:3857'
paradas_bus_muni = paradas_bus_muni.to_crs('EPSG:4326')
paradas_bus_muni['modo'] = 'COL'

In [17]:
bus = pd.concat([paradas_bus_muni,paradas_bus_prov,paradas_bus_nac])

In [18]:
bus['linea'] = 'Linea '+bus['linea'].map(str)

In [19]:
bus['nombre'] = np.nan

In [20]:
bus.head()

Unnamed: 0,linea,geometry,modo,nombre
0,Linea 501,POINT (-58.38407 -34.88737),COL,
1,Linea 501,POINT (-58.38771 -34.88767),COL,
2,Linea 501,POINT (-58.39012 -34.88754),COL,
3,Linea 501,POINT (-58.39024 -34.89055),COL,
4,Linea 501,POINT (-58.39197 -34.89216),COL,


In [21]:
# traer modos
sql = """
select distinct t."LINEA"
from public.trx t
where t."MEDIO" = 'COL'
"""
lineas_bus_trx = pd.read_sql(sql, conn)
lineas_bus_trx

Unnamed: 0,LINEA
0,LINEA 56
1,LINEA 34
2,LINEA 532
3,LINEA 511B
4,LINEA 26
...,...
378,LINEA 175
379,LINEA 84
380,LINEA 278
381,LINEA_NORTE MUNICIPAL


In [None]:
#faltan 1/3
lineas_faltantes = lineas_bus_trx[~lineas_bus_trx.isin(bus.linea.unique()).values]
len(lineas_faltantes)

In [None]:
#separar las municipales y ver si se puede usar alguna forma de detectar a que linea pertenecen

In [None]:
no_500 = lineas_faltantes.LINEA.map(lambda s: s[:7] != 'LINEA 5')
si_500 = ~no_500

In [None]:
lineas_faltantes[si_500].head()

In [None]:
lineas_faltantes[no_500].head()

In [22]:
paradas = pd.concat([bus,ffcc,subte])
paradas.head()

Unnamed: 0,linea,geometry,modo,nombre
0,Linea 501,POINT (-58.38407 -34.88737),COL,
1,Linea 501,POINT (-58.38771 -34.88767),COL,
2,Linea 501,POINT (-58.39012 -34.88754),COL,
3,Linea 501,POINT (-58.39024 -34.89055),COL,
4,Linea 501,POINT (-58.39197 -34.89216),COL,


In [23]:
paradas.shape

(178178, 4)

In [24]:
paradas.to_file('../carto/paradas.geojson',driver='GeoJSON')

In [25]:
paradas['latitude'] = paradas.geometry.y
paradas['longitude'] = paradas.geometry.x
paradas.drop('geometry',axis=1,inplace=True)

In [26]:
engine = create_engine('postgresql://{}:{}@{}:{}/{}'
    .format(DB_USERNAME, DB_PASSWORD, DB_HOST,
            DB_PORT, DB_NAME))

In [27]:
%time h3_paradas = h3_indexing(paradas.copy(),res_list = [5,12])

CPU times: user 38.4 s, sys: 192 ms, total: 38.6 s
Wall time: 38.6 s


In [28]:
h3_paradas.head()

Unnamed: 0,linea,modo,nombre,latitude,longitude,h3_res_5,h3_res_6,h3_res_7,h3_res_8,h3_res_9,h3_res_10,h3_res_11,h3_res_12
0,Linea 501,COL,,-34.887373,-58.384068,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e39945fffff,89c2e399453ffff,8ac2e39945affff,8bc2e39945adfff,8cc2e39945ad1ff
1,Linea 501,COL,,-34.88767,-58.387713,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e39945fffff,89c2e3994cfffff,8ac2e3994cdffff,8bc2e3994cdbfff,8cc2e3994cdb1ff
2,Linea 501,COL,,-34.887542,-58.390125,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e39941fffff,89c2e39941bffff,8ac2e3994197fff,8bc2e39941b3fff,8cc2e3994194bff
3,Linea 501,COL,,-34.890546,-58.390244,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e3994dfffff,89c2e3994c7ffff,8ac2e3994c77fff,8bc2e3994c76fff,8cc2e3994c763ff
4,Linea 501,COL,,-34.892161,-58.39197,85c2e39bfffffff,86c2e3997ffffff,87c2e3994ffffff,88c2e3994dfffff,89c2e3994d7ffff,8ac2e3994d47fff,8bc2e3994d45fff,8cc2e3994d45bff


In [29]:
h3_paradas.to_csv('../data/h3_paradas.csv',index=False)

In [30]:
h3_paradas.to_sql('paradas', engine, schema=DB_SCHEMA,method='multi')

In [None]:
h3_paradas = h3_paradas.reindex(columns = ['LINEA','MEDIO','h3_res_11'])
h3_paradas = h3_paradas.drop_duplicates()

In [None]:
# plotear el mapa de hexagrillas de paradas
lista_indices_global = h3_paradas.h3_res_11.unique()
geo_df = gpd.GeoDataFrame(lista_indices_global,
                          geometry = [Polygon(h3.h3_to_geo_boundary(h3_address=h, geo_json=True)
                                                                   ) for h in lista_indices_global],
                          crs = 'EPSG:4326')
geo_df.columns=['h3_index','geometry']
geo_df = geo_df.merge(h3_paradas,left_on = 'h3_index',right_on = 'h3_res_11',how='inner')
geo_df.to_file('../carto/carto_paradas_hex_res_11.geojson',driver='GeoJSON')

geo_df.head()