# Procesando grandes volumenes de pings de dispositivos mobiles via Athena para generar matrices O-D de alta resolución espacial y temporal


Librerías a utilizar:

In [2]:
import sys
!{sys.executable} -m pip install pyathena
!{sys.executable} -m pip install h3
!{sys.executable} -m pip install awswrangler

Collecting pyathena
  Downloading PyAthena-2.1.2-py3-none-any.whl (37 kB)
Collecting tenacity>=4.1.0
  Downloading tenacity-7.0.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: tenacity, pyathena
Successfully installed pyathena-2.1.2 tenacity-7.0.0
Collecting h3
  Downloading h3-3.7.2-cp36-cp36m-manylinux2010_x86_64.whl (795 kB)
[K     |████████████████████████████████| 795 kB 18.6 MB/s eta 0:00:01
[?25hInstalling collected packages: h3
Successfully installed h3-3.7.2
Collecting awswrangler
  Downloading awswrangler-2.5.0-py3-none-any.whl (172 kB)
[K     |████████████████████████████████| 172 kB 18.7 MB/s eta 0:00:01
[?25hCollecting redshift-connector~=2.0.0
  Downloading redshift_connector-2.0.876-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 8.8 MB/s  eta 0:00:01
[?25hCollecting pg8000<1.18.0,>=1.16.0
  Downloading pg8000-1.17.0-py3-none-any.whl (34 kB)
Collecting pymysql<1.1.0,>=0.9.0
  Downloading PyMySQL-1.0.2-py3-none-any.whl (43 kB)


In [3]:
import pandas as pd
import boto3

from pyathena.pandas.cursor import PandasCursor
from pyathena import connect
import h3
import awswrangler as wr

Parámetros de la conexión a recursos en AWS:

In [4]:
s3 = boto3.resource('s3')
s3_staging = 's3://iadbprod-csd-hub-analyticaldata/graphdata-mobility-temporal/athena-results/'
region = 'us-east-1'
schema = 'graphdata'
pings_table = 'historico_pings'

# el bucket donde guardaremos tablas nuevas
s3_bucket = 's3://iadbprod-csd-hub-analyticaldata'

Conectamos a la base:

In [5]:
cursor = connect(s3_staging_dir = s3_staging, region_name = region, schema_name = schema, cursor_class=PandasCursor).cursor()

Verificando que la conexión funcione bien:

In [6]:
query_test = f'''SELECT * FROM {pings_table} LIMIT 10'''

cursor.execute(query_test).as_pandas()


Unnamed: 0,caid,utc_timestamp,latitude,longitude,id_type,hour_of_day,year,month,day,iso_country_code
0,a809e975716146d237ba56a8abc45897303c79a5ce59ca...,1580945569,14.617996,-90.540871,ANDROID,17,2020,2,5,GT
1,276fac1556e49347643820be9c28b5e697f1112476d8dd...,1580927112,14.504391,-90.574762,ANDROID,12,2020,2,5,GT
2,b89af31530ba821de762604dae0a3976f7e5196c7302ae...,1580910176,14.391015,-91.189755,IOS,7,2020,2,5,GT
3,b89af31530ba821de762604dae0a3976f7e5196c7302ae...,1580930508,14.516926,-90.778477,IOS,13,2020,2,5,GT
4,b89af31530ba821de762604dae0a3976f7e5196c7302ae...,1580934392,14.595722,-90.708721,IOS,14,2020,2,5,GT
5,24ed856403d83e3413e36d5c8f0f5805ec63b4e3538c7c...,1580922254,14.555545,-90.733471,ANDROID,11,2020,2,5,GT
6,076cefc964726ad1a98ac7556016a3f0bf38214fa42302...,1580884887,15.47384,-88.845203,ANDROID,0,2020,2,5,GT
7,076cefc964726ad1a98ac7556016a3f0bf38214fa42302...,1580946671,15.47384,-88.845203,ANDROID,17,2020,2,5,GT
8,301c7b5346bdb553ae71a22fce693630cf5352f7351793...,1580929034,14.703437,-91.862091,ANDROID,12,2020,2,5,GT
9,301c7b5346bdb553ae71a22fce693630cf5352f7351793...,1580945619,14.70331,-91.862132,ANDROID,17,2020,2,5,GT


Ahora resolveremos la extracción de pings en el área de interés, y la asignación de una celda H3 a cada uno, minimizando el consumo de recursos de la base de datos.

## Recorte de coordenadas

Un [estudio sistemático](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0219890) observó que la precisión de un dispositivo GPS celular, en entorno urbano, varía en el rango de los 7 a 13 metros de error . [La resolución de coordenadas Mercator](https://en.wikipedia.org/wiki/Decimal_degrees) con 4 puntos decimales va de 10,2 metros en el Ecuador a 4,3 metros en las cercanías del círculo polar. Por lo tanto las coordenadas de los pings pueden redondearse a 4 decimales de precisión sin pérdida de resolución, y simplificando su procesamiento en forma considerable.


Definimos el lugar de intéres y los límites de su _bounding box_ de coordenadas:

In [8]:
place_name =  "peru_departamento_lima"

extracted_table_location = f'{s3_bucket}/graphdata-mobility-OD/{place_name}'

country_code = 'PE'


xmin = -77.88659
xmax = -75.5075
ymin = -13.32351
ymax = -10.27419


Ejecutamos una query que realiza el recorte.

In [53]:
query_recortar_por_coordenadas = f'''CREATE TABLE {place_name}_pings
                    WITH (external_location = '{extracted_table_location}/{place_name}_pings', 
                        format = 'PARQUET', 
                        parquet_compression = 'SNAPPY') AS
                    SELECT caid,
                           id_type,
                           ROUND(latitude, 4) AS latitude,
                           ROUND(longitude, 4) AS longitude,
                           year,
                           month,
                           day,
                           hour_of_day
                    FROM {pings_table}
                    WHERE ((longitude > {xmin}) AND (longitude < {xmax}) AND 
                           (latitude > {ymin}) AND (latitude < {ymax}) AND
                           iso_country_code = '{country_code}')'''

In [55]:
cursor.execute(query_recortar_por_coordenadas)

<pyathena.pandas.cursor.PandasCursor at 0x7fa323a050f0>

Verificamos resultados:

In [7]:
query_test = f'''SELECT * FROM {place_name}_pings LIMIT 10'''

cursor.execute(query_test).as_pandas()

Unnamed: 0,caid,id_type,latitude,longitude,year,month,day,hour_of_day
0,bde4dc008380ef792932d1ac685f1245f03ca75f47ab5b...,ANDROID,-12.0449,-77.1262,2020,9,30,15
1,5eb9cf63a6d29187d3312f5dc8850ed84e98c770a7f0ad...,IOS,-12.1089,-77.0262,2020,9,30,16
2,5eb9cf63a6d29187d3312f5dc8850ed84e98c770a7f0ad...,IOS,-12.0736,-76.9559,2020,9,30,16
3,5eb9cf63a6d29187d3312f5dc8850ed84e98c770a7f0ad...,IOS,-12.0792,-76.9532,2020,9,30,16
4,6f0ade2465eef869b2f809f18a2e7e3737dd5e0cad6c95...,IOS,-12.1035,-76.9891,2020,9,30,18
5,52da68f63547d18ac3c8d0d8f4222878b8107ecfdcb7b3...,ANDROID,-12.0464,-77.0428,2020,9,30,0
6,52da68f63547d18ac3c8d0d8f4222878b8107ecfdcb7b3...,ANDROID,-12.0464,-77.0428,2020,9,30,2
7,52da68f63547d18ac3c8d0d8f4222878b8107ecfdcb7b3...,ANDROID,-12.0464,-77.0428,2020,9,30,13
8,22347579ffe24affcfe087fee2434ae3b9e34a9a09f9a5...,ANDROID,-11.9445,-77.0807,2020,9,30,9
9,155a172b40ba49a40df0d2d2e484269f751ca85f10b4fa...,ANDROID,-11.9974,-77.0554,2020,9,30,7


## Identificar la celda H3 de donde proviene cada ping

El [sistema de indexado geoespacial H3](https://eng.uber.com/h3/) fue  diseñado para permitir análisis geográfico por área utilizando escala de resolución arbitraria y uniforme, evitando los problemas asociados al análisis agrupado por áreas administrativas  que varían en tamaño y forma. A pesar de su aparición relativamente reciente, cuenta con casos de aplicación en la literatura de  estudios de movilidad, incluso en ciudades latinoamericanas (véase el proyecto [Acesso a Oportunidades](https://www.ipea.gov.br/acessooportunidades/en/sobre/), que estudia la accesibilidad a oportunidades de empleo y servicios básicos en las 20 principales ciudades de Brasil)

El sistema H3 particiona la superficie de la Tierra en una grilla hexagonal con una escala jerárquica en [16 niveles de resolución](https://github.com/uber/h3/blob/master/docs/core-library/restable.md). Tomaremos como referencia la grilla de nivel 9, resultando en celdas de un promedio aproximado de 0.08 km^2 -unas cuatro manzanas de extensión.

Las celdas H3 serán nuestra unidad de análisis: cuantificaremos cuantos usuarios residen en cada uno, cuantos las visitan a cada hora del dia, de que otra celda provienen los visitantes, etc.



Comenzamos por obtener las instancias distintas de pares de coordenadas -con redondeo a 4 decimales- de la tabla de pings en el área de interés:

In [10]:
query_unique_coords = f'''SELECT DISTINCT latitude, longitude FROM {place_name}_pings'''

In [11]:
unique_coords = cursor.execute(query_unique_coords).as_pandas()

Agregamos a cada par de coordenadas su celda H3 en la escala elegida:



In [13]:
h3_resolution = 9

unique_coords['h3idx'] = unique_coords.apply(lambda x: h3.geo_to_h3(x.latitude, x.longitude, h3_resolution), axis=1)

In [14]:
unique_coords.head()

Unnamed: 0,latitude,longitude,h3idx
0,-11.9598,-77.0722,898e62c2647ffff
1,-11.9162,-77.0603,898e62d1d3bffff
2,-12.0407,-77.1202,898e62c11d7ffff
3,-12.2017,-77.0083,898e62ce91bffff
4,-12.1969,-76.9505,898e62c5953ffff


Ahora guardamos en S3 la tabla de equivalencia entre coordenadas y id H3, como objeto en formato parquet 

In [15]:
wr.s3.to_parquet(  
    df = unique_coords,
    dataset = True,
    mode = "overwrite",
    database = schema,
    table = f"{place_name}_coords_to_h3idx",
    path = f'{extracted_table_location}/{place_name}_coords_to_h3idx'
)

{'paths': ['s3://iadbprod-csd-hub-analyticaldata/graphdata-mobility-OD/peru_departamento_lima/peru_departamento_lima_coords_to_h3idx/146340edfae347d1a7b60d2cb9129e08.snappy.parquet'],
 'partitions_values': {}}

Comprobamos que se haya guardado:

In [16]:
query_test = f'''SELECT * FROM {place_name}_coords_to_h3idx LIMIT 10'''

cursor.execute(query_test).as_pandas()

Unnamed: 0,latitude,longitude,h3idx
0,-11.9598,-77.0722,898e62c2647ffff
1,-11.9162,-77.0603,898e62d1d3bffff
2,-12.0407,-77.1202,898e62c11d7ffff
3,-12.2017,-77.0083,898e62ce91bffff
4,-12.1969,-76.9505,898e62c5953ffff
5,-12.0839,-77.0572,898e62c0a8bffff
6,-12.1027,-76.9973,898e62c548bffff
7,-12.0685,-77.1124,898e62c1e87ffff
8,-11.1196,-77.6113,898e7593337ffff
9,-11.9601,-77.0859,898e62c3583ffff


Ahora podemos realizar un join entre la tabla de pings y la de ids H3, de modo que a cada registro le quede asociada su celda correspondiente.

Aquí las columnas compartidas entre las tablas son de tipo float, latitude y longitude.

Es muy poco aconsejable realizar joins en base a datos de punto flotante, por lo que haremos un ajuste para unir en base a texto: convertiremos las coordenadas a una especie de "geohash", concatenando latitud y longitud como cadena de caracteres, y haremos el join en base a ese identificador.

In [29]:
query_agregar_h3idx = f'''CREATE TABLE {place_name}_pings_h3idx
                          WITH (external_location = '{extracted_table_location}/{place_name}_pings_h3idx', 
                                format = 'PARQUET', 
                                parquet_compression = 'SNAPPY') AS
                          SELECT {place_name}_pings.*, h3idx 
                          FROM {place_name}_pings LEFT JOIN {place_name}_coords_to_h3idx
                          ON CAST({place_name}_pings.longitude AS VARCHAR) || CAST({place_name}_pings.latitude AS VARCHAR) = 
                             CAST({place_name}_coords_to_h3idx.longitude AS VARCHAR) || CAST({place_name}_coords_to_h3idx.latitude AS VARCHAR)'''

In [31]:
cursor.execute(query_agregar_h3idx)

<pyathena.pandas.cursor.PandasCursor at 0x7fee64eef080>

In [9]:
#Verificamos:
query_test = f'''SELECT * FROM {place_name}_pings_h3idx LIMIT 10'''
cursor.execute(query_test).as_pandas()

Unnamed: 0,caid,id_type,latitude,longitude,year,month,day,hour_of_day,h3idx
0,857f4e1c50492fca34aeec3a11eb3551152fe2fa618649...,ANDROID,-11.8767,-77.0724,2020,4,27,9,898e62d02afffff
1,532e67c429b5bf200b7ca49ad9e359e3c50ba48ee7fdf3...,ANDROID,-12.0592,-77.0741,2020,4,27,9,898e62c033bffff
2,9f274c776cb3616d0ec89a9dcb6e5608f802af12b164f2...,ANDROID,-12.088,-77.0083,2020,4,27,9,898e62c724bffff
3,4dbeb881086dc5a08a67e4e44ee8803dbc90a989e451bb...,ANDROID,-12.0177,-77.03,2020,4,27,12,898e62c2e77ffff
4,e56cdea155f6a5953badfdc66bbd587f32f0dcd9b20258...,ANDROID,-12.0137,-77.0962,2020,4,27,12,898e62c061bffff
5,5a914571830d1bd5a35c8fc15374182f98a0736adf7b2c...,ANDROID,-12.0069,-76.9928,2020,4,27,7,898e62c2dc3ffff
6,5a914571830d1bd5a35c8fc15374182f98a0736adf7b2c...,ANDROID,-12.0069,-76.9928,2020,4,27,13,898e62c2dc3ffff
7,4dbeb881086dc5a08a67e4e44ee8803dbc90a989e451bb...,ANDROID,-12.0177,-77.03,2020,4,27,14,898e62c2e77ffff
8,a6a76d2fcc931e59a876d6493010d8ad95efa19a3d77f1...,ANDROID,-12.1857,-76.9906,2020,4,23,14,898e62c5ac3ffff
9,aacb0cfefeee68d97d4bdfe0dea9deefa4e2e02f567045...,ANDROID,-11.7732,-77.1702,2020,4,23,12,898e75a4187ffff


## Detección de _celda hogar_ y de _celda de ocupación_ 

Definidas como:


- Celda hogar: aquella donde un usuario reside. Se infiere en al base al origen mas frecuente de pings desde las 20 y hasta las 8 horas.

- Celda de ocupación: aquella donde un usuario concurre con frecuencia en horario diurno (para trabajar, estudiar). Se infiere en al base al origen mas frecuente de pings entre las 9 y las 17 horas de días hábiles.

Para descartar eventos esporádicos que no representan domicilio ni lugar de actividad, consideramos sólo las celdas donde un usuario registró actividad en al menos dos días distintos. Si un usuario registra actividad en multiples días en más de una celda, se le asigna aquella donde haya aparecido en mayor cantidad de días.

En caso de que encontremos que el sitio hogar y el sitio de ocupación más frecuente son el mismo, tomaremos como sitio de ocupación el siguiente en cantidad de días con presencia.


Calculamos:


In [20]:
query_identificar_sitios = f'''CREATE TABLE {place_name}_user_locations
                          WITH (external_location = '{extracted_table_location}/{place_name}_user_locations', 
                                format = 'PARQUET', 
                                parquet_compression = 'SNAPPY') AS
                            SELECT 
                              caid, 
                              h3idx_home, 
                              distinct_days_home, 
                              total_pings_home, 
                              h3idx_occupation, 
                              distinct_days_occupation, 
                              total_pings_occupation 
                            FROM 
                              (
                                SELECT 
                                  caid, 
                                  h3idx_home, 
                                  distinct_days_home, 
                                  total_pings_home, 
                                  h3idx_occupation, 
                                  distinct_days_occupation, 
                                  total_pings_occupation, 
                                  ROW_NUMBER() OVER (
                                    PARTITION BY caid 
                                    ORDER BY 
                                      distinct_days_occupation DESC
                                  ) AS q01 
                                FROM 
                                  (
                                    SELECT 
                                      LHS.caid AS caid, 
                                      h3idx_home, 
                                      distinct_days_home, 
                                      total_pings_home, 
                                      h3idx_occupation, 
                                      distinct_days_occupation, 
                                      total_pings_occupation 
                                    FROM 
                                      (
                                        SELECT 
                                          caid, 
                                          h3idx_home, 
                                          distinct_days_home, 
                                          total_pings_home 
                                        FROM 
                                          (
                                            SELECT 
                                              caid, 
                                              h3idx_home, 
                                              distinct_days_home, 
                                              total_pings_home, 
                                              ROW_NUMBER() OVER (
                                                PARTITION BY caid 
                                                ORDER BY 
                                                  distinct_days_home DESC
                                              ) AS q01 
                                            FROM 
                                              (
                                                SELECT 
                                                  caid, 
                                                  h3idx_home, 
                                                  COUNT(*) AS distinct_days_home, 
                                                  SUM(freq_home) AS total_pings_home 
                                                FROM 
                                                  (
                                                    SELECT 
                                                      caid, 
                                                      year, 
                                                      month, 
                                                      day, 
                                                      h3idx_home, 
                                                      COUNT(*) AS freq_home 
                                                    FROM 
                                                      (
                                                        SELECT 
                                                          caid, 
                                                          year, 
                                                          month, 
                                                          day, 
                                                          h3idx AS h3idx_home 
                                                        FROM 
                                                          {place_name}_pings_h3idx 
                                                        WHERE 
                                                          (
                                                            (
                                                              hour_of_day IN (20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7)
                                                            ) 
                                                            AND (
                                                              NOT(
                                                                (
                                                                  (h3idx) IS NULL
                                                                )
                                                              )
                                                            )
                                                          )
                                                      ) 
                                                    GROUP BY 
                                                      caid, 
                                                      year, 
                                                      month, 
                                                      day, 
                                                      h3idx_home
                                                  ) 
                                                GROUP BY 
                                                  caid, 
                                                  h3idx_home
                                              ) 
                                            WHERE 
                                              (distinct_days_home > 1.0)
                                          ) 
                                        WHERE 
                                          (q01 <= 1)
                                      ) AS LHS 
                                      LEFT JOIN (
                                        SELECT 
                                          caid, 
                                          h3idx_occupation, 
                                          distinct_days_occupation, 
                                          total_pings_occupation 
                                        FROM 
                                          (
                                            SELECT 
                                              caid, 
                                              h3idx_occupation, 
                                              distinct_days_occupation, 
                                              total_pings_occupation, 
                                              ROW_NUMBER() OVER (
                                                PARTITION BY caid 
                                                ORDER BY 
                                                  distinct_days_occupation DESC
                                              ) AS q01 
                                            FROM 
                                              (
                                                SELECT 
                                                  caid, 
                                                  h3idx_occupation, 
                                                  COUNT(*) AS distinct_days_occupation, 
                                                  SUM(freq_occupation) AS total_pings_occupation 
                                                FROM 
                                                  (
                                                    SELECT 
                                                      caid, 
                                                      year, 
                                                      month, 
                                                      day, 
                                                      h3idx_occupation, 
                                                      COUNT(*) AS freq_occupation 
                                                    FROM 
                                                      (
                                                        SELECT 
                                                          caid, 
                                                          year, 
                                                          month, 
                                                          day, 
                                                          h3idx AS h3idx_occupation 
                                                        FROM 
                                                          {place_name}_pings_h3idx 
                                                        WHERE 
                                                          (
                                                            hour_of_day IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
                                                          )
                                                      ) 
                                                    GROUP BY 
                                                      caid, 
                                                      year, 
                                                      month, 
                                                      day, 
                                                      h3idx_occupation
                                                  ) 
                                                GROUP BY 
                                                  caid, 
                                                  h3idx_occupation
                                              ) 
                                            WHERE 
                                              (distinct_days_occupation > 1.0)
                                          ) 
                                        WHERE 
                                          (q01 <= 1)
                                      ) AS RHS ON (LHS.caid = RHS.caid)
                                  ) 
                                WHERE 
                                  (h3idx_home != h3idx_occupation)
                              ) 
                            WHERE 
                              (q01 <= 1)'''

In [13]:
cursor.execute(query_identificar_sitios)

<pyathena.pandas.cursor.PandasCursor at 0x7f5804471b70>

In [14]:
#Verificamos:
query_test = f'''SELECT * FROM {place_name}_user_locations LIMIT 10'''
cursor.execute(query_test).as_pandas()

Unnamed: 0,caid,h3idx_home,distinct_days_home,total_pings_home,h3idx_occupation,distinct_days_occupation,total_pings_occupation
0,d463275b66ad15721baf92a27b52a68c94ff55c52b3637...,898e62c5e83ffff,72,1823,898e62c706fffff,43,1261
1,9d20c59e320de39a2c6abe430f4ec5a7985ea174e1ef0b...,898e62c09a3ffff,83,2150,898e62c596fffff,59,1928
2,aa5a233b7274689c58324499548db49ca05076e121c36f...,898e62c5963ffff,51,1494,898e62c09b3ffff,24,1082
3,b3648c97a65259dae5c732487e90ba6105f6a22fca7cd2...,898e75a4ccbffff,15,306,898e62c0553ffff,8,243
4,8935f02028e2ebe9796f12895cc03e2e440acc6894de1a...,898e62c043bffff,35,1259,898e62c1ed7ffff,30,1701
5,9229b19876fdb04609c327df0fb66c7775b1c1c6e98161...,898e62c289bffff,27,542,898e62c720fffff,17,167
6,23b83770abb7f4fdb9d452b7dbddad5d7b32b9b657b117...,898e62d54cfffff,49,455,898e62d54cbffff,34,352
7,0572c0c2c067c924442b07a70a439c71724dc48f7499ff...,898e62c0663ffff,5,16,898e62c0e37ffff,4,40
8,49875a8fe8b9331f364aa1a5e1b6cd3f877b300ba22153...,898e62c5097ffff,15,92,898e62c56cbffff,14,99
9,083c5b424316acb7060726e67b11c8bad3431a8798fc6a...,898e66ba407ffff,10,57,898e66ba40bffff,10,212


## Matrices Origen - Destino

Podemos extraer matrices origen destino de muy alta resolución espacial y temporal: a cada hora, para cada celda, podemos contar la cantidad de visitas  de personas de cada una de las otras celdas.


In [16]:
query_matriz_OD = f'''CREATE TABLE {place_name}_OD_matrix
                          WITH (external_location = '{extracted_table_location}/{place_name}_OD_matrix', 
                                format = 'PARQUET', 
                                parquet_compression = 'SNAPPY') AS
                          SELECT year, month, day, hour_of_day, h3idx, h3idx_home, COUNT(*) AS n
                            FROM (SELECT year, month, day, hour_of_day, LHS.caid AS caid, h3idx, n, h3idx_home
                                    FROM (SELECT year, month, day, hour_of_day, caid, h3idx, n
                                            FROM (SELECT year, month, day, hour_of_day, caid, h3idx, n, MAX(n) OVER (PARTITION BY year, month, day, hour_of_day, caid) AS q01
                                                FROM (SELECT year, month, day, hour_of_day, caid, h3idx, COUNT(*) AS n
                                                        FROM {place_name}_pings_h3idx
                                                        GROUP BY year, month, day, hour_of_day, caid, h3idx)
                                                  )
                                            WHERE (n = q01)) AS LHS
                                            LEFT JOIN (SELECT caid, h3idx_home
                                                        FROM {place_name}_user_locations) AS RHS
                                            ON (LHS.caid = RHS.caid)
                                  )
                        GROUP BY year, month, day, hour_of_day, h3idx, h3idx_home
                        ORDER BY year ASC, month ASC, day ASC, hour_of_day ASC, n DESC'''  

In [17]:
cursor.execute(query_matriz_OD)

<pyathena.pandas.cursor.PandasCursor at 0x7f5804471b70>

In [19]:
#Verificamos:
query_test = f'''SELECT * FROM {place_name}_OD_matrix LIMIT 10'''
cursor.execute(query_test).as_pandas()

Unnamed: 0,year,month,day,hour_of_day,h3idx,h3idx_home,n
0,2020,2,28,2,898e62c0e6bffff,898e62c352bffff,1
1,2020,2,28,2,898e62c05c7ffff,898e62c05c7ffff,1
2,2020,2,28,2,898e62c0957ffff,,1
3,2020,2,28,2,898e62c688fffff,898e62c688fffff,1
4,2020,2,28,2,898e62c2e7bffff,898e62c2e7bffff,1
5,2020,2,28,2,898e62c3503ffff,898e62c3503ffff,1
6,2020,2,28,2,898e75a4c8fffff,898e75a4c8fffff,1
7,2020,2,28,2,898e62d2353ffff,898e62d2353ffff,1
8,2020,2,28,2,898e62c296fffff,,1
9,2020,2,28,2,898e62c588bffff,898e62c588bffff,1


__IMPORTANTE__
Para preservar la privacidad, cuando se publican los datos o análisis derivados es importante eliminar la identificación de celdas hogar que representan a pocos visitantes. La [metodología Flowminder](https://covid19.flowminder.org/home) recomienda fijar el umbral en un mínimo de 15 personas.