In [1]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
import datetime as dt

# Leyendo Matrices OD del 2022

In [2]:
prefix=202003

In [3]:
cols=['tiemposubida', 'tiempobajada', 'comunasubida', 'comunabajada']

In [4]:
tabla=dd.read_csv(f'input/viajes{prefix}_laboral_transparencia.csv', sep=';', usecols=cols)
tabla.head()

Unnamed: 0,comunasubida,comunabajada,tiemposubida,tiempobajada
0,ESTACION CENTRAL,-,2020-03-08 16:42:46,-
1,PUENTE ALTO,-,2020-03-08 10:47:48,-
2,SAN MIGUEL,-,2020-03-08 12:05:52,-
3,CERRO NAVIA,PUDAHUEL,2020-03-08 09:16:24,2020-03-08 09:39:00
4,MAIPU,CERRO NAVIA,2020-03-08 12:26:12,2020-03-08 12:47:59


In [5]:
tabla["tiemposubida"] = tabla["tiemposubida"].map_partitions(pd.to_datetime,format='%Y-%m-%d %H:%M:%S',  errors='coerce',meta = ('datetime64[ns]'))
tabla["tiempobajada"] = tabla["tiempobajada"].map_partitions(pd.to_datetime,format='%Y-%m-%d %H:%M:%S',  errors='coerce',meta = ('datetime64[ns]'))



In [6]:
tabla["hora_subida"] = tabla["tiemposubida"].dt.time.astype(str)
tabla["hora_bajada"] = tabla["tiempobajada"].dt.time.astype(str)
tabla.head()

Unnamed: 0,comunasubida,comunabajada,tiemposubida,tiempobajada,hora_subida,hora_bajada
0,ESTACION CENTRAL,-,2020-03-08 16:42:46,NaT,16:42:46,NaT
1,PUENTE ALTO,-,2020-03-08 10:47:48,NaT,10:47:48,NaT
2,SAN MIGUEL,-,2020-03-08 12:05:52,NaT,12:05:52,NaT
3,CERRO NAVIA,PUDAHUEL,2020-03-08 09:16:24,2020-03-08 09:39:00,09:16:24,09:39:00
4,MAIPU,CERRO NAVIA,2020-03-08 12:26:12,2020-03-08 12:47:59,12:26:12,12:47:59


### Seleciono el bloque horario que nos interesa. Esto elimina los viajes que no tienen infromación de la bajada

In [7]:
viajes=(tabla.loc[(tabla.hora_subida > "06:00:00") 
                            & (tabla.hora_subida < "10:00:00")
                             & (tabla.hora_bajada > "06:00:00")
                           & (tabla.hora_bajada < "10:00:00")].compute()
       )
viajes

Unnamed: 0,comunasubida,comunabajada,tiemposubida,tiempobajada,hora_subida,hora_bajada
3,CERRO NAVIA,PUDAHUEL,2020-03-08 09:16:24,2020-03-08 09:39:00,09:16:24,09:39:00
16,LAS CONDES,LA REINA,2020-03-08 08:36:09,2020-03-08 08:57:17,08:36:09,08:57:17
27,MAIPU,PUDAHUEL,2020-03-08 07:04:59,2020-03-08 08:35:01,07:04:59,08:35:01
82,LA REINA,LA REINA,2020-03-08 09:00:03,2020-03-08 09:03:02,09:00:03,09:03:02
103,CONCHALI,CONCHALI,2020-03-08 09:22:57,2020-03-08 09:30:40,09:22:57,09:30:40
...,...,...,...,...,...,...
128680,PROVIDENCIA,LAS CONDES,2020-03-12 07:56:34,2020-03-12 07:58:15,07:56:34,07:58:15
128681,PROVIDENCIA,SANTIAGO,2020-03-12 07:56:43,2020-03-12 08:17:35,07:56:43,08:17:35
128683,LAS CONDES,LAS CONDES,2020-03-12 09:21:41,2020-03-12 09:26:51,09:21:41,09:26:51
128687,QUILICURA,SANTIAGO,2020-03-12 07:33:07,2020-03-12 08:36:27,07:33:07,08:36:27


In [8]:
viajes['day']=viajes['tiemposubida'].dt.day

In [9]:
viajes

Unnamed: 0,comunasubida,comunabajada,tiemposubida,tiempobajada,hora_subida,hora_bajada,day
3,CERRO NAVIA,PUDAHUEL,2020-03-08 09:16:24,2020-03-08 09:39:00,09:16:24,09:39:00,8
16,LAS CONDES,LA REINA,2020-03-08 08:36:09,2020-03-08 08:57:17,08:36:09,08:57:17,8
27,MAIPU,PUDAHUEL,2020-03-08 07:04:59,2020-03-08 08:35:01,07:04:59,08:35:01,8
82,LA REINA,LA REINA,2020-03-08 09:00:03,2020-03-08 09:03:02,09:00:03,09:03:02,8
103,CONCHALI,CONCHALI,2020-03-08 09:22:57,2020-03-08 09:30:40,09:22:57,09:30:40,8
...,...,...,...,...,...,...,...
128680,PROVIDENCIA,LAS CONDES,2020-03-12 07:56:34,2020-03-12 07:58:15,07:56:34,07:58:15,12
128681,PROVIDENCIA,SANTIAGO,2020-03-12 07:56:43,2020-03-12 08:17:35,07:56:43,08:17:35,12
128683,LAS CONDES,LAS CONDES,2020-03-12 09:21:41,2020-03-12 09:26:51,09:21:41,09:26:51,12
128687,QUILICURA,SANTIAGO,2020-03-12 07:33:07,2020-03-12 08:36:27,07:33:07,08:36:27,12


# DATOS FINALES

In [10]:
od=viajes.groupby(['day','comunasubida', 'comunabajada']).size().reset_index().rename(columns={0:'viajes'})


In [11]:
od.sort_values(by=['day', 'comunasubida', 'comunabajada']).to_csv(f'output/od_public_transport{prefix}.csv.gz', sep=',', index=False, compression='gzip')