In [1]:
#libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import folium
import os
import sys
import abstract_flows.arrow as arrow
import abstract_flows.grid as grid
import abstract_flows.flows as flows

In [2]:
flag_dir = False

def load_data(name, flag_dir = True):
    ''' 
    Params:
    name: str
        Name of the file to load
    flag_dir: bool, If True, the directory is set as the dir in CIMAT computer
        If False, the directory is set as the dir in Antonio's computer

    Returns:
    data
    '''
    if flag_dir:
        dir = '/home/user/Desktop/Datos/'
        if name[-3:] == 'csv':
            try: 
                data = pd.read_csv(dir + name)
            except:
                print('Error loading csv file')
                return None
        elif name[-3:] == 'npy':
            try:
                data = np.load(dir + name)
            except:
                print('Error loading numpy file')
                return None
        else:
            print('Error: file not found')
            return None
    else:
        dir = '/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/'
        if name[-3:] == 'csv':
            try: 
                data = pd.read_csv(dir + name)
            except:
                print('Error loading csv file')
                return None
        elif name[-3:] == 'npy':
            try:
                data = np.load(dir + name)
            except:
                print('Error loading numpy file')
                return None
        else:
            print('Error: file not found')
            return None
    return data

In [5]:
eco_2018 = load_data('ecobici/ecobici_2018.csv', flag_dir)

In [6]:
eco_2018.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin,Tiempo_Viaje
0,M,53,9940,18,01/01/2018,0:05:48,124,01/01/2018,0:19:04,2018-01-01 00:05:48,2018-01-01 00:19:04,796.0
1,M,48,3759,16,01/01/2018,0:09:17,60,01/01/2018,0:32:31,2018-01-01 00:09:17,2018-01-01 00:32:31,1394.0
2,M,63,10783,16,01/01/2018,0:09:39,60,01/01/2018,0:32:41,2018-01-01 00:09:39,2018-01-01 00:32:41,1382.0
3,M,27,10826,16,01/01/2018,0:10:01,60,01/01/2018,0:32:39,2018-01-01 00:10:01,2018-01-01 00:32:39,1358.0
4,M,28,10643,18,01/01/2018,0:10:02,450,01/01/2018,0:20:07,2018-01-01 00:10:02,2018-01-01 00:20:07,605.0


In [7]:
del eco_2018

In [13]:
eco_2019 = load_data('ecobici/ecobici_2019.csv', flag_dir)
eco_2019.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,M,36,11444,67,2019-01-01,0:08:44,36,2019-01-01,0:14:03,5.316667
1,M,36,9196,55,2019-01-01,0:12:05,136,2019-01-01,0:42:11,30.1
2,M,38,2612,29,2019-01-01,0:13:15,115,2019-01-01,0:37:47,24.533333
3,M,43,7463,29,2019-01-01,0:13:36,115,2019-01-01,0:38:04,24.466667
4,M,37,3987,136,2019-01-01,0:15:30,35,2019-01-01,0:22:47,7.283333


In [14]:
eco_2019['Inicio'] = eco_2019['Fecha_Retiro'] + ' ' + eco_2019['Hora_Retiro']

In [15]:
eco_2019.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio
0,M,36,11444,67,2019-01-01,0:08:44,36,2019-01-01,0:14:03,5.316667,2019-01-01 0:08:44
1,M,36,9196,55,2019-01-01,0:12:05,136,2019-01-01,0:42:11,30.1,2019-01-01 0:12:05
2,M,38,2612,29,2019-01-01,0:13:15,115,2019-01-01,0:37:47,24.533333,2019-01-01 0:13:15
3,M,43,7463,29,2019-01-01,0:13:36,115,2019-01-01,0:38:04,24.466667,2019-01-01 0:13:36
4,M,37,3987,136,2019-01-01,0:15:30,35,2019-01-01,0:22:47,7.283333,2019-01-01 0:15:30


In [16]:
eco_2019['Fin'] = eco_2019['Fecha_Arribo'] + ' ' + eco_2019['Hora_Arribo']
eco_2019.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,36,11444,67,2019-01-01,0:08:44,36,2019-01-01,0:14:03,5.316667,2019-01-01 0:08:44,2019-01-01 0:14:03
1,M,36,9196,55,2019-01-01,0:12:05,136,2019-01-01,0:42:11,30.1,2019-01-01 0:12:05,2019-01-01 0:42:11
2,M,38,2612,29,2019-01-01,0:13:15,115,2019-01-01,0:37:47,24.533333,2019-01-01 0:13:15,2019-01-01 0:37:47
3,M,43,7463,29,2019-01-01,0:13:36,115,2019-01-01,0:38:04,24.466667,2019-01-01 0:13:36,2019-01-01 0:38:04
4,M,37,3987,136,2019-01-01,0:15:30,35,2019-01-01,0:22:47,7.283333,2019-01-01 0:15:30,2019-01-01 0:22:47


In [18]:
eco_2019.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2019.csv', index = False)

In [19]:
eco_2019 = load_data('ecobici/ecobici_2019.csv', flag_dir)
eco_2019.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,36,11444,67,2019-01-01,0:08:44,36,2019-01-01,0:14:03,5.316667,2019-01-01 0:08:44,2019-01-01 0:14:03
1,M,36,9196,55,2019-01-01,0:12:05,136,2019-01-01,0:42:11,30.1,2019-01-01 0:12:05,2019-01-01 0:42:11
2,M,38,2612,29,2019-01-01,0:13:15,115,2019-01-01,0:37:47,24.533333,2019-01-01 0:13:15,2019-01-01 0:37:47
3,M,43,7463,29,2019-01-01,0:13:36,115,2019-01-01,0:38:04,24.466667,2019-01-01 0:13:36,2019-01-01 0:38:04
4,M,37,3987,136,2019-01-01,0:15:30,35,2019-01-01,0:22:47,7.283333,2019-01-01 0:15:30,2019-01-01 0:22:47


In [20]:
del eco_2019

In [22]:
eco_2020 = load_data('ecobici/ecobici_2020.csv', flag_dir)
eco_2020.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,F,33.0,8744,52.0,2020-01-01,0:05:06,3,2020-01-01,0:13:00,7.9
1,M,30.0,7865,31.0,2020-01-01,0:09:50,271,2020-01-01,0:21:30,11.666667
2,F,36.0,7819,251.0,2020-01-01,0:13:25,150,2020-01-01,0:26:10,12.75
3,F,33.0,11258,3.0,2020-01-01,0:15:22,1,2020-01-01,0:25:29,10.116667
4,M,29.0,7722,136.0,2020-01-01,0:18:42,63,2020-01-01,0:24:01,5.316667


In [23]:
eco_2020['Inicio'] = eco_2020['Fecha_Retiro'] + ' ' + eco_2020['Hora_Retiro']
eco_2020.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio
0,F,33.0,8744,52.0,2020-01-01,0:05:06,3,2020-01-01,0:13:00,7.9,2020-01-01 0:05:06
1,M,30.0,7865,31.0,2020-01-01,0:09:50,271,2020-01-01,0:21:30,11.666667,2020-01-01 0:09:50
2,F,36.0,7819,251.0,2020-01-01,0:13:25,150,2020-01-01,0:26:10,12.75,2020-01-01 0:13:25
3,F,33.0,11258,3.0,2020-01-01,0:15:22,1,2020-01-01,0:25:29,10.116667,2020-01-01 0:15:22
4,M,29.0,7722,136.0,2020-01-01,0:18:42,63,2020-01-01,0:24:01,5.316667,2020-01-01 0:18:42


In [24]:
eco_2020['Fin'] = eco_2020['Fecha_Arribo'] + ' ' + eco_2020['Hora_Arribo']
eco_2020.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,F,33.0,8744,52.0,2020-01-01,0:05:06,3,2020-01-01,0:13:00,7.9,2020-01-01 0:05:06,2020-01-01 0:13:00
1,M,30.0,7865,31.0,2020-01-01,0:09:50,271,2020-01-01,0:21:30,11.666667,2020-01-01 0:09:50,2020-01-01 0:21:30
2,F,36.0,7819,251.0,2020-01-01,0:13:25,150,2020-01-01,0:26:10,12.75,2020-01-01 0:13:25,2020-01-01 0:26:10
3,F,33.0,11258,3.0,2020-01-01,0:15:22,1,2020-01-01,0:25:29,10.116667,2020-01-01 0:15:22,2020-01-01 0:25:29
4,M,29.0,7722,136.0,2020-01-01,0:18:42,63,2020-01-01,0:24:01,5.316667,2020-01-01 0:18:42,2020-01-01 0:24:01


In [25]:
eco_2020.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2020.csv', index = False)
eco_2020 = load_data('ecobici/ecobici_2020.csv', flag_dir)
eco_2020.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,F,33.0,8744,52.0,2020-01-01,0:05:06,3,2020-01-01,0:13:00,7.9,2020-01-01 0:05:06,2020-01-01 0:13:00
1,M,30.0,7865,31.0,2020-01-01,0:09:50,271,2020-01-01,0:21:30,11.666667,2020-01-01 0:09:50,2020-01-01 0:21:30
2,F,36.0,7819,251.0,2020-01-01,0:13:25,150,2020-01-01,0:26:10,12.75,2020-01-01 0:13:25,2020-01-01 0:26:10
3,F,33.0,11258,3.0,2020-01-01,0:15:22,1,2020-01-01,0:25:29,10.116667,2020-01-01 0:15:22,2020-01-01 0:25:29
4,M,29.0,7722,136.0,2020-01-01,0:18:42,63,2020-01-01,0:24:01,5.316667,2020-01-01 0:18:42,2020-01-01 0:24:01


In [26]:
del eco_2020

In [27]:
eco_2021 = load_data('ecobici/ecobici_2021.csv', flag_dir)
eco_2021.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,M,32.0,7608,18,2021-01-01,07:05:04,143,2021-01-01,07:13:25,8.35
1,M,24.0,8324,76,2021-01-01,09:03:35,76,2021-01-01,09:08:08,4.55
2,F,35.0,6787,281,2021-01-01,09:22:41,61,2021-01-01,09:50:03,27.366667
3,F,34.0,11875,405,2021-01-01,10:21:21,431,2021-01-01,10:27:16,5.916667
4,M,26.0,8164,30,2021-01-01,11:52:38,126,2021-01-01,12:07:30,14.866667


In [28]:
eco_2021['Inicio'] = eco_2021['Fecha_Retiro'] + ' ' + eco_2021['Hora_Retiro']
eco_2021.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio
0,M,32.0,7608,18,2021-01-01,07:05:04,143,2021-01-01,07:13:25,8.35,2021-01-01 07:05:04
1,M,24.0,8324,76,2021-01-01,09:03:35,76,2021-01-01,09:08:08,4.55,2021-01-01 09:03:35
2,F,35.0,6787,281,2021-01-01,09:22:41,61,2021-01-01,09:50:03,27.366667,2021-01-01 09:22:41
3,F,34.0,11875,405,2021-01-01,10:21:21,431,2021-01-01,10:27:16,5.916667,2021-01-01 10:21:21
4,M,26.0,8164,30,2021-01-01,11:52:38,126,2021-01-01,12:07:30,14.866667,2021-01-01 11:52:38


In [29]:
eco_2021['Fin'] = eco_2021['Fecha_Arribo'] + ' ' + eco_2021['Hora_Arribo']
eco_2021.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,32.0,7608,18,2021-01-01,07:05:04,143,2021-01-01,07:13:25,8.35,2021-01-01 07:05:04,2021-01-01 07:13:25
1,M,24.0,8324,76,2021-01-01,09:03:35,76,2021-01-01,09:08:08,4.55,2021-01-01 09:03:35,2021-01-01 09:08:08
2,F,35.0,6787,281,2021-01-01,09:22:41,61,2021-01-01,09:50:03,27.366667,2021-01-01 09:22:41,2021-01-01 09:50:03
3,F,34.0,11875,405,2021-01-01,10:21:21,431,2021-01-01,10:27:16,5.916667,2021-01-01 10:21:21,2021-01-01 10:27:16
4,M,26.0,8164,30,2021-01-01,11:52:38,126,2021-01-01,12:07:30,14.866667,2021-01-01 11:52:38,2021-01-01 12:07:30


In [30]:
eco_2021.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2021.csv', index = False)
eco_2021 = load_data('ecobici/ecobici_2021.csv', flag_dir)
eco_2021.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,32.0,7608,18,2021-01-01,07:05:04,143,2021-01-01,07:13:25,8.35,2021-01-01 07:05:04,2021-01-01 07:13:25
1,M,24.0,8324,76,2021-01-01,09:03:35,76,2021-01-01,09:08:08,4.55,2021-01-01 09:03:35,2021-01-01 09:08:08
2,F,35.0,6787,281,2021-01-01,09:22:41,61,2021-01-01,09:50:03,27.366667,2021-01-01 09:22:41,2021-01-01 09:50:03
3,F,34.0,11875,405,2021-01-01,10:21:21,431,2021-01-01,10:27:16,5.916667,2021-01-01 10:21:21,2021-01-01 10:27:16
4,M,26.0,8164,30,2021-01-01,11:52:38,126,2021-01-01,12:07:30,14.866667,2021-01-01 11:52:38,2021-01-01 12:07:30


In [31]:
del eco_2021

In [32]:
eco_2022 = load_data('ecobici/ecobici_2022.csv', flag_dir)
eco_2022.head()

  data = pd.read_csv(dir + name)


Unnamed: 0.1,Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,0,M,40.0,8811,282,2022-01-04,07:49:31,295,2022-01-04,07:52:28,2.95
1,1,M,40.0,8752,282,2022-01-06,07:45:59,295,2022-01-06,07:48:55,2.933333
2,2,M,40.0,12028,282,2022-01-11,07:51:56,295,2022-01-11,07:54:29,2.55
3,3,M,40.0,7102,282,2022-01-12,07:43:56,295,2022-01-12,07:46:54,2.966667
4,4,M,40.0,4261,282,2022-01-13,07:43:09,281,2022-01-13,07:47:27,4.3


In [33]:
eco_2022.drop(columns = ['Unnamed: 0'], inplace = True)
eco_2022.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,M,40.0,8811,282,2022-01-04,07:49:31,295,2022-01-04,07:52:28,2.95
1,M,40.0,8752,282,2022-01-06,07:45:59,295,2022-01-06,07:48:55,2.933333
2,M,40.0,12028,282,2022-01-11,07:51:56,295,2022-01-11,07:54:29,2.55
3,M,40.0,7102,282,2022-01-12,07:43:56,295,2022-01-12,07:46:54,2.966667
4,M,40.0,4261,282,2022-01-13,07:43:09,281,2022-01-13,07:47:27,4.3


In [34]:
eco_2022['Inicio'] = eco_2022['Fecha_Retiro'] + ' ' + eco_2022['Hora_Retiro']
eco_2022.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio
0,M,40.0,8811,282,2022-01-04,07:49:31,295,2022-01-04,07:52:28,2.95,2022-01-04 07:49:31
1,M,40.0,8752,282,2022-01-06,07:45:59,295,2022-01-06,07:48:55,2.933333,2022-01-06 07:45:59
2,M,40.0,12028,282,2022-01-11,07:51:56,295,2022-01-11,07:54:29,2.55,2022-01-11 07:51:56
3,M,40.0,7102,282,2022-01-12,07:43:56,295,2022-01-12,07:46:54,2.966667,2022-01-12 07:43:56
4,M,40.0,4261,282,2022-01-13,07:43:09,281,2022-01-13,07:47:27,4.3,2022-01-13 07:43:09


In [35]:
eco_2022['Fin'] = eco_2022['Fecha_Arribo'] + ' ' + eco_2022['Hora_Arribo']
eco_2022.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,40.0,8811,282,2022-01-04,07:49:31,295,2022-01-04,07:52:28,2.95,2022-01-04 07:49:31,2022-01-04 07:52:28
1,M,40.0,8752,282,2022-01-06,07:45:59,295,2022-01-06,07:48:55,2.933333,2022-01-06 07:45:59,2022-01-06 07:48:55
2,M,40.0,12028,282,2022-01-11,07:51:56,295,2022-01-11,07:54:29,2.55,2022-01-11 07:51:56,2022-01-11 07:54:29
3,M,40.0,7102,282,2022-01-12,07:43:56,295,2022-01-12,07:46:54,2.966667,2022-01-12 07:43:56,2022-01-12 07:46:54
4,M,40.0,4261,282,2022-01-13,07:43:09,281,2022-01-13,07:47:27,4.3,2022-01-13 07:43:09,2022-01-13 07:47:27


In [36]:
eco_2022.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2022.csv', index = False)
eco_2022 = load_data('ecobici/ecobici_2022.csv', flag_dir)
eco_2022.head()

  data = pd.read_csv(dir + name)


Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,40.0,8811,282,2022-01-04,07:49:31,295,2022-01-04,07:52:28,2.95,2022-01-04 07:49:31,2022-01-04 07:52:28
1,M,40.0,8752,282,2022-01-06,07:45:59,295,2022-01-06,07:48:55,2.933333,2022-01-06 07:45:59,2022-01-06 07:48:55
2,M,40.0,12028,282,2022-01-11,07:51:56,295,2022-01-11,07:54:29,2.55,2022-01-11 07:51:56,2022-01-11 07:54:29
3,M,40.0,7102,282,2022-01-12,07:43:56,295,2022-01-12,07:46:54,2.966667,2022-01-12 07:43:56,2022-01-12 07:46:54
4,M,40.0,4261,282,2022-01-13,07:43:09,281,2022-01-13,07:47:27,4.3,2022-01-13 07:43:09,2022-01-13 07:47:27


In [37]:
eco_2022['Genero_Usuario'].unique()

array(['M', 'F', nan, 'O'], dtype=object)

In [41]:
len(eco_2022[eco_2022['Genero_Usuario'] == 'O']), len(eco_2022[eco_2022['Genero_Usuario'] == 'M']), len(eco_2022[eco_2022['Genero_Usuario'] == 'F'])

(42369, 3393328, 1297514)

In [42]:
eco_2022['Genero_Usuario'] = eco_2022['Genero_Usuario'].replace('O', 'F')

In [43]:
eco_2022['Genero_Usuario'].unique()

array(['M', 'F', nan], dtype=object)

In [44]:
eco_2022.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2022.csv', index = False)
eco_2022 = load_data('ecobici/ecobici_2022.csv', flag_dir)
eco_2022.head()

  data = pd.read_csv(dir + name)


Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,40.0,8811,282,2022-01-04,07:49:31,295,2022-01-04,07:52:28,2.95,2022-01-04 07:49:31,2022-01-04 07:52:28
1,M,40.0,8752,282,2022-01-06,07:45:59,295,2022-01-06,07:48:55,2.933333,2022-01-06 07:45:59,2022-01-06 07:48:55
2,M,40.0,12028,282,2022-01-11,07:51:56,295,2022-01-11,07:54:29,2.55,2022-01-11 07:51:56,2022-01-11 07:54:29
3,M,40.0,7102,282,2022-01-12,07:43:56,295,2022-01-12,07:46:54,2.966667,2022-01-12 07:43:56,2022-01-12 07:46:54
4,M,40.0,4261,282,2022-01-13,07:43:09,281,2022-01-13,07:47:27,4.3,2022-01-13 07:43:09,2022-01-13 07:47:27


In [45]:
eco_2019 = load_data('ecobici/ecobici_2019.csv', flag_dir)
eco_2019.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,36,11444,67,2019-01-01,0:08:44,36,2019-01-01,0:14:03,5.316667,2019-01-01 0:08:44,2019-01-01 0:14:03
1,M,36,9196,55,2019-01-01,0:12:05,136,2019-01-01,0:42:11,30.1,2019-01-01 0:12:05,2019-01-01 0:42:11
2,M,38,2612,29,2019-01-01,0:13:15,115,2019-01-01,0:37:47,24.533333,2019-01-01 0:13:15,2019-01-01 0:37:47
3,M,43,7463,29,2019-01-01,0:13:36,115,2019-01-01,0:38:04,24.466667,2019-01-01 0:13:36,2019-01-01 0:38:04
4,M,37,3987,136,2019-01-01,0:15:30,35,2019-01-01,0:22:47,7.283333,2019-01-01 0:15:30,2019-01-01 0:22:47


In [47]:
eco_2019['Genero_Usuario'].unique()

array(['M', 'F'], dtype=object)

In [49]:
del eco_2019
del eco_2022

In [50]:
eco_2023 = load_data('ecobici/ecobici_2023.csv', flag_dir)
eco_2023.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,M,45.0,3297082,150,2022-12-31,1900-01-01 23:58:00,40,2023-01-01,1900-01-01 00:06:24,1431.6
1,F,27.0,8922254,214,2022-12-31,1900-01-01 23:36:34,25,2023-01-01,1900-01-01 00:07:08,1409.433333
2,M,34.0,2320075,214,2022-12-31,1900-01-01 23:36:33,25,2023-01-01,1900-01-01 00:07:12,1409.35
3,M,58.0,2096493,137,2022-12-31,1900-01-01 23:57:19,36,2023-01-01,1900-01-01 00:07:48,1429.516667
4,M,23.0,6294433,260,2022-12-31,1900-01-01 23:57:42,43,2023-01-01,1900-01-01 00:09:33,1428.15


In [51]:
eco_2023['Genero_Usuario'].unique()

array(['M', 'F', 'O', nan, '?'], dtype=object)

In [54]:
len(eco_2023[eco_2023['Genero_Usuario'] == 'O']), len(eco_2023[eco_2023['Genero_Usuario'] == 'M']), len(eco_2023[eco_2023['Genero_Usuario'] == 'F']), len(eco_2023[eco_2023['Genero_Usuario'] == '?'])

(275566, 8541734, 3358828, 48)

In [56]:
eco_2023['Inicio'] = eco_2023['Fecha_Retiro'] + ' ' + eco_2023['Hora_Retiro'].str[-8:]
eco_2023.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio
0,M,45.0,3297082,150,2022-12-31,1900-01-01 23:58:00,40,2023-01-01,1900-01-01 00:06:24,1431.6,2022-12-31 23:58:00
1,F,27.0,8922254,214,2022-12-31,1900-01-01 23:36:34,25,2023-01-01,1900-01-01 00:07:08,1409.433333,2022-12-31 23:36:34
2,M,34.0,2320075,214,2022-12-31,1900-01-01 23:36:33,25,2023-01-01,1900-01-01 00:07:12,1409.35,2022-12-31 23:36:33
3,M,58.0,2096493,137,2022-12-31,1900-01-01 23:57:19,36,2023-01-01,1900-01-01 00:07:48,1429.516667,2022-12-31 23:57:19
4,M,23.0,6294433,260,2022-12-31,1900-01-01 23:57:42,43,2023-01-01,1900-01-01 00:09:33,1428.15,2022-12-31 23:57:42


In [57]:
eco_2023['Fin'] = eco_2023['Fecha_Arribo'] + ' ' + eco_2023['Hora_Arribo'].str[-8:]
eco_2023.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,45.0,3297082,150,2022-12-31,1900-01-01 23:58:00,40,2023-01-01,1900-01-01 00:06:24,1431.6,2022-12-31 23:58:00,2023-01-01 00:06:24
1,F,27.0,8922254,214,2022-12-31,1900-01-01 23:36:34,25,2023-01-01,1900-01-01 00:07:08,1409.433333,2022-12-31 23:36:34,2023-01-01 00:07:08
2,M,34.0,2320075,214,2022-12-31,1900-01-01 23:36:33,25,2023-01-01,1900-01-01 00:07:12,1409.35,2022-12-31 23:36:33,2023-01-01 00:07:12
3,M,58.0,2096493,137,2022-12-31,1900-01-01 23:57:19,36,2023-01-01,1900-01-01 00:07:48,1429.516667,2022-12-31 23:57:19,2023-01-01 00:07:48
4,M,23.0,6294433,260,2022-12-31,1900-01-01 23:57:42,43,2023-01-01,1900-01-01 00:09:33,1428.15,2022-12-31 23:57:42,2023-01-01 00:09:33


In [58]:
eco_2023.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2023.csv', index = False)
eco_2023 = load_data('ecobici/ecobici_2023.csv', flag_dir)
eco_2023.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje,Inicio,Fin
0,M,45.0,3297082,150,2022-12-31,1900-01-01 23:58:00,40,2023-01-01,1900-01-01 00:06:24,1431.6,2022-12-31 23:58:00,2023-01-01 00:06:24
1,F,27.0,8922254,214,2022-12-31,1900-01-01 23:36:34,25,2023-01-01,1900-01-01 00:07:08,1409.433333,2022-12-31 23:36:34,2023-01-01 00:07:08
2,M,34.0,2320075,214,2022-12-31,1900-01-01 23:36:33,25,2023-01-01,1900-01-01 00:07:12,1409.35,2022-12-31 23:36:33,2023-01-01 00:07:12
3,M,58.0,2096493,137,2022-12-31,1900-01-01 23:57:19,36,2023-01-01,1900-01-01 00:07:48,1429.516667,2022-12-31 23:57:19,2023-01-01 00:07:48
4,M,23.0,6294433,260,2022-12-31,1900-01-01 23:57:42,43,2023-01-01,1900-01-01 00:09:33,1428.15,2022-12-31 23:57:42,2023-01-01 00:09:33


In [59]:
del eco_2023

In [3]:
est_2024 = np.load('/Users/antoniomendez/Desktop/Tesis/Datos/Adj_eco/matrices_estaciones/est_2024.npy')

In [4]:
est_2024

array([[  1.        ,  19.4335895 , -99.1678193 ],
       [  2.        ,  19.43062276, -99.17123066],
       [  3.        ,  19.43163   , -99.158547  ],
       ...,
       [709.        ,  19.344271  , -99.149973  ],
       [710.        ,  19.4168415 , -99.1925204 ],
       [711.        ,  19.4136683 , -99.1919528 ]], shape=(676, 3))

In [5]:
eco_2024_1 = pd.read_csv('data_eco/ecobici_2024_enero.csv')
eco_2024_2 = pd.read_csv('data_eco/2024-02.csv')
eco_2024_3 = pd.read_csv('data_eco/datos_abiertos_2024_03-1-1.csv')
eco_2024_4 = pd.read_csv('data_eco/datos_abiertos_2024_04.csv')
eco_2024_5 = pd.read_csv('data_eco/2024-05-1.csv')
eco_2024_6 = pd.read_csv('data_eco/2024-06.csv')
eco_2024_7 = pd.read_csv('data_eco/datos_abiertos_2024_07.csv')
eco_2024_8 = pd.read_csv('data_eco/2024-08.csv')
eco_2024_9 = pd.read_csv('data_eco/2024-09.csv')
eco_2024_10 = pd.read_csv('data_eco/2024-10.csv')
eco_2024_11 = pd.read_csv('data_eco/2024-11.csv')
eco_2024_12 = pd.read_csv('data_eco/2024-12.csv')

In [6]:
eco_2024_list = [eco_2024_1, eco_2024_2, eco_2024_3, eco_2024_4, eco_2024_5, eco_2024_6, eco_2024_7, eco_2024_8, eco_2024_9, eco_2024_10, eco_2024_11, eco_2024_12]

In [7]:
eco_2024_list[0].head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_EstacionArribo,Fecha Arribo,Hora_Arribo
0,M,48.0,3371879,113,31/12/2023,23:21:52,659,01/01/2024,00:00:02
1,F,26.0,7033434,281,31/12/2023,23:46:02,47,01/01/2024,00:00:02
2,F,37.0,7169857,15,31/12/2023,23:51:57,217,01/01/2024,00:00:49
3,F,30.0,6368211,555,31/12/2023,23:53:10,8,01/01/2024,00:02:39
4,M,30.0,5136924,555,31/12/2023,23:52:53,8,01/01/2024,00:02:45


In [8]:
for i in range(len(eco_2024_list)):
    eco_2024_list[i].rename(columns = {'Ciclo_EstacionArribo': 'Ciclo_Estacion_Arribo', 'Fecha Arribo':'Fecha_Arribo'}, inplace = True)
    eco_2024_list[i]['Fecha_Retiro'] = pd.to_datetime(eco_2024_list[i]['Fecha_Retiro'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')
    eco_2024_list[i]['Fecha_Arribo'] = pd.to_datetime(eco_2024_list[i]['Fecha_Arribo'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')
    eco_2024_list[i]['Inicio'] = eco_2024_list[i]['Fecha_Retiro'] + ' ' + eco_2024_list[i]['Hora_Retiro']
    eco_2024_list[i]['Fin'] = eco_2024_list[i]['Fecha_Arribo'] + ' ' + eco_2024_list[i]['Hora_Arribo']

In [9]:
eco_2024 = pd.concat(eco_2024_list, ignore_index=True)
eco_2024.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin
0,M,48.0,3371879,113,2023-12-31,23:21:52,659,2024-01-01,00:00:02,2023-12-31 23:21:52,2024-01-01 00:00:02
1,F,26.0,7033434,281,2023-12-31,23:46:02,47,2024-01-01,00:00:02,2023-12-31 23:46:02,2024-01-01 00:00:02
2,F,37.0,7169857,15,2023-12-31,23:51:57,217,2024-01-01,00:00:49,2023-12-31 23:51:57,2024-01-01 00:00:49
3,F,30.0,6368211,555,2023-12-31,23:53:10,8,2024-01-01,00:02:39,2023-12-31 23:53:10,2024-01-01 00:02:39
4,M,30.0,5136924,555,2023-12-31,23:52:53,8,2024-01-01,00:02:45,2023-12-31 23:52:53,2024-01-01 00:02:45


In [10]:
eco_2024.tail()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin
22242864,M,27.0,2482685,31,2024-12-31,23:48:08,61,2024-12-31,23:59:27,2024-12-31 23:48:08,2024-12-31 23:59:27
22242865,M,30.0,3046505,32,2024-12-31,23:45:58,116,2024-12-31,23:59:44,2024-12-31 23:45:58,2024-12-31 23:59:44
22242866,F,25.0,4281416,32,2024-12-31,23:45:41,116,2024-12-31,23:59:44,2024-12-31 23:45:41,2024-12-31 23:59:44
22242867,F,27.0,8694077,38,2024-12-31,23:52:54,126,2024-12-31,23:59:57,2024-12-31 23:52:54,2024-12-31 23:59:57
22242868,M,23.0,6625883,41,2024-12-31,23:29:17,638,2024-12-31,23:59:59,2024-12-31 23:29:17,2024-12-31 23:59:59


In [11]:
eco_2024['Ciclo_Estacion_Retiro'] = pd.to_numeric(eco_2024['Ciclo_Estacion_Retiro'], errors='coerce').astype('Int64')
eco_2024.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin
0,M,48.0,3371879,113,2023-12-31,23:21:52,659,2024-01-01,00:00:02,2023-12-31 23:21:52,2024-01-01 00:00:02
1,F,26.0,7033434,281,2023-12-31,23:46:02,47,2024-01-01,00:00:02,2023-12-31 23:46:02,2024-01-01 00:00:02
2,F,37.0,7169857,15,2023-12-31,23:51:57,217,2024-01-01,00:00:49,2023-12-31 23:51:57,2024-01-01 00:00:49
3,F,30.0,6368211,555,2023-12-31,23:53:10,8,2024-01-01,00:02:39,2023-12-31 23:53:10,2024-01-01 00:02:39
4,M,30.0,5136924,555,2023-12-31,23:52:53,8,2024-01-01,00:02:45,2023-12-31 23:52:53,2024-01-01 00:02:45


In [12]:
eco_2024['Ciclo_Estacion_Arribo'] = pd.to_numeric(eco_2024['Ciclo_Estacion_Arribo'], errors='coerce').astype('Int64')
eco_2024.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin
0,M,48.0,3371879,113,2023-12-31,23:21:52,659,2024-01-01,00:00:02,2023-12-31 23:21:52,2024-01-01 00:00:02
1,F,26.0,7033434,281,2023-12-31,23:46:02,47,2024-01-01,00:00:02,2023-12-31 23:46:02,2024-01-01 00:00:02
2,F,37.0,7169857,15,2023-12-31,23:51:57,217,2024-01-01,00:00:49,2023-12-31 23:51:57,2024-01-01 00:00:49
3,F,30.0,6368211,555,2023-12-31,23:53:10,8,2024-01-01,00:02:39,2023-12-31 23:53:10,2024-01-01 00:02:39
4,M,30.0,5136924,555,2023-12-31,23:52:53,8,2024-01-01,00:02:45,2023-12-31 23:52:53,2024-01-01 00:02:45


In [13]:
stations = est_2024[:, 0]
stations

array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,
        23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,
        34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,
        45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,
        56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,
        67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,
        78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.,
        89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99.,
       100., 102., 103., 104., 105., 106., 107., 109., 110., 111., 112.,
       113., 114., 115., 116., 117., 118., 119., 120., 121., 122., 123.,
       124., 125., 126., 127., 128., 129., 130., 131., 132., 133., 134.,
       135., 136., 137., 138., 139., 140., 141., 142., 143., 144., 145.,
       146., 147., 148., 149., 150., 151., 152., 15

In [14]:
eco_2024_filtered = eco_2024[
    eco_2024['Ciclo_Estacion_Retiro'].astype(float).isin(stations) &
    eco_2024['Ciclo_Estacion_Arribo'].astype(float).isin(stations)
]
eco_2024_filtered.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin
0,M,48.0,3371879,113,2023-12-31,23:21:52,659,2024-01-01,00:00:02,2023-12-31 23:21:52,2024-01-01 00:00:02
1,F,26.0,7033434,281,2023-12-31,23:46:02,47,2024-01-01,00:00:02,2023-12-31 23:46:02,2024-01-01 00:00:02
2,F,37.0,7169857,15,2023-12-31,23:51:57,217,2024-01-01,00:00:49,2023-12-31 23:51:57,2024-01-01 00:00:49
3,F,30.0,6368211,555,2023-12-31,23:53:10,8,2024-01-01,00:02:39,2023-12-31 23:53:10,2024-01-01 00:02:39
4,M,30.0,5136924,555,2023-12-31,23:52:53,8,2024-01-01,00:02:45,2023-12-31 23:52:53,2024-01-01 00:02:45


In [15]:
eco_2024_filtered.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/ecobici/ecobici_2024.csv', index = False)
eco_2024 = load_data('ecobici/ecobici_2024.csv', flag_dir)
eco_2024.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Inicio,Fin
0,M,48.0,3371879,113,2023-12-31,23:21:52,659,2024-01-01,00:00:02,2023-12-31 23:21:52,2024-01-01 00:00:02
1,F,26.0,7033434,281,2023-12-31,23:46:02,47,2024-01-01,00:00:02,2023-12-31 23:46:02,2024-01-01 00:00:02
2,F,37.0,7169857,15,2023-12-31,23:51:57,217,2024-01-01,00:00:49,2023-12-31 23:51:57,2024-01-01 00:00:49
3,F,30.0,6368211,555,2023-12-31,23:53:10,8,2024-01-01,00:02:39,2023-12-31 23:53:10,2024-01-01 00:02:39
4,M,30.0,5136924,555,2023-12-31,23:52:53,8,2024-01-01,00:02:45,2023-12-31 23:52:53,2024-01-01 00:02:45


Para mibici

In [16]:
mibici_2018 = load_data('mibici/mibici_2018.csv', flag_dir)
mibici_2018.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_de_viaje
0,4783525,251562,M,1982.0,2018-01-01 06:01:03,2018-01-01 06:10:11,59,28,548.0
1,4783526,260233,M,2000.0,2018-01-01 06:03:15,2018-01-01 06:06:42,250,164,207.0
2,4783527,277431,M,1993.0,2018-01-01 06:09:56,2018-01-01 06:15:17,148,238,321.0
3,4783528,157371,M,1991.0,2018-01-01 06:16:50,2018-01-01 06:26:45,202,51,595.0
4,4783529,237066,M,1995.0,2018-01-01 06:19:44,2018-01-01 06:24:59,11,173,315.0


In [17]:
mibici_2018['Inicio'] = mibici_2018['Inicio_del_viaje']
mibici_2018['Fin'] = mibici_2018['Fin_del_viaje']
mibici_2018.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_de_viaje,Inicio,Fin
0,4783525,251562,M,1982.0,2018-01-01 06:01:03,2018-01-01 06:10:11,59,28,548.0,2018-01-01 06:01:03,2018-01-01 06:10:11
1,4783526,260233,M,2000.0,2018-01-01 06:03:15,2018-01-01 06:06:42,250,164,207.0,2018-01-01 06:03:15,2018-01-01 06:06:42
2,4783527,277431,M,1993.0,2018-01-01 06:09:56,2018-01-01 06:15:17,148,238,321.0,2018-01-01 06:09:56,2018-01-01 06:15:17
3,4783528,157371,M,1991.0,2018-01-01 06:16:50,2018-01-01 06:26:45,202,51,595.0,2018-01-01 06:16:50,2018-01-01 06:26:45
4,4783529,237066,M,1995.0,2018-01-01 06:19:44,2018-01-01 06:24:59,11,173,315.0,2018-01-01 06:19:44,2018-01-01 06:24:59


In [18]:
mibici_2018.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/mibici/mibici_2018.csv', index = False)
mibici_2018 = load_data('mibici/mibici_2018.csv', flag_dir)
mibici_2018.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_de_viaje,Inicio,Fin
0,4783525,251562,M,1982.0,2018-01-01 06:01:03,2018-01-01 06:10:11,59,28,548.0,2018-01-01 06:01:03,2018-01-01 06:10:11
1,4783526,260233,M,2000.0,2018-01-01 06:03:15,2018-01-01 06:06:42,250,164,207.0,2018-01-01 06:03:15,2018-01-01 06:06:42
2,4783527,277431,M,1993.0,2018-01-01 06:09:56,2018-01-01 06:15:17,148,238,321.0,2018-01-01 06:09:56,2018-01-01 06:15:17
3,4783528,157371,M,1991.0,2018-01-01 06:16:50,2018-01-01 06:26:45,202,51,595.0,2018-01-01 06:16:50,2018-01-01 06:26:45
4,4783529,237066,M,1995.0,2018-01-01 06:19:44,2018-01-01 06:24:59,11,173,315.0,2018-01-01 06:19:44,2018-01-01 06:24:59


In [20]:
del mibici_2018

In [21]:
mibici_2019 = load_data('mibici/mibici_2019.csv', flag_dir)
mibici_2019.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,8901295,341162,M,1992.0,2019-01-01 00:00:00,2019-01-01 00:16:09,50,54,16.15
1,8901296,413443,M,1985.0,2019-01-01 06:01:05,2019-01-01 06:16:27,38,194,15.366667
2,8901297,444232,M,1995.0,2019-01-01 06:01:19,2019-01-01 06:09:55,86,18,8.6
3,8901298,437533,M,1998.0,2019-01-01 06:01:25,2019-01-01 06:02:04,86,86,0.65
4,8901299,430224,M,1992.0,2019-01-01 06:01:57,2019-01-01 06:05:01,27,52,3.066667


In [23]:
mibici_2019['Inicio'] = mibici_2019['Inicio_del_viaje']
mibici_2019['Fin'] = mibici_2019['Fin_del_viaje']
mibici_2019.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,8901295,341162,M,1992.0,2019-01-01 00:00:00,2019-01-01 00:16:09,50,54,16.15,2019-01-01 00:00:00,2019-01-01 00:16:09
1,8901296,413443,M,1985.0,2019-01-01 06:01:05,2019-01-01 06:16:27,38,194,15.366667,2019-01-01 06:01:05,2019-01-01 06:16:27
2,8901297,444232,M,1995.0,2019-01-01 06:01:19,2019-01-01 06:09:55,86,18,8.6,2019-01-01 06:01:19,2019-01-01 06:09:55
3,8901298,437533,M,1998.0,2019-01-01 06:01:25,2019-01-01 06:02:04,86,86,0.65,2019-01-01 06:01:25,2019-01-01 06:02:04
4,8901299,430224,M,1992.0,2019-01-01 06:01:57,2019-01-01 06:05:01,27,52,3.066667,2019-01-01 06:01:57,2019-01-01 06:05:01


In [24]:
mibici_2019.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/mibici/mibici_2019.csv', index = False)
mibici_2019 = load_data('mibici/mibici_2019.csv', flag_dir)
mibici_2019.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,8901295,341162,M,1992.0,2019-01-01 00:00:00,2019-01-01 00:16:09,50,54,16.15,2019-01-01 00:00:00,2019-01-01 00:16:09
1,8901296,413443,M,1985.0,2019-01-01 06:01:05,2019-01-01 06:16:27,38,194,15.366667,2019-01-01 06:01:05,2019-01-01 06:16:27
2,8901297,444232,M,1995.0,2019-01-01 06:01:19,2019-01-01 06:09:55,86,18,8.6,2019-01-01 06:01:19,2019-01-01 06:09:55
3,8901298,437533,M,1998.0,2019-01-01 06:01:25,2019-01-01 06:02:04,86,86,0.65,2019-01-01 06:01:25,2019-01-01 06:02:04
4,8901299,430224,M,1992.0,2019-01-01 06:01:57,2019-01-01 06:05:01,27,52,3.066667,2019-01-01 06:01:57,2019-01-01 06:05:01


In [28]:
mibici_2020 = load_data('mibici/mibici_2020.csv', flag_dir)
mibici_2020.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,14420217,451617,M,1992.0,2020-01-01 06:02:20,2020-01-01 06:05:38,52,268,3.3
1,14420218,324211,M,1985.0,2020-01-01 06:02:22,2020-01-01 06:07:32,254,180,5.166667
2,14420219,611633,M,1981.0,2020-01-01 06:03:01,2020-01-01 06:21:43,258,278,18.7
3,14420220,521601,M,1989.0,2020-01-01 06:05:48,2020-01-01 06:15:11,200,201,9.383333
4,14420221,631227,M,1979.0,2020-01-01 06:08:15,2020-01-01 06:29:33,151,248,21.3


In [29]:
mibici_2020['Inicio'] = mibici_2020['Inicio_del_viaje']
mibici_2020['Fin'] = mibici_2020['Fin_del_viaje']
mibici_2020.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,14420217,451617,M,1992.0,2020-01-01 06:02:20,2020-01-01 06:05:38,52,268,3.3,2020-01-01 06:02:20,2020-01-01 06:05:38
1,14420218,324211,M,1985.0,2020-01-01 06:02:22,2020-01-01 06:07:32,254,180,5.166667,2020-01-01 06:02:22,2020-01-01 06:07:32
2,14420219,611633,M,1981.0,2020-01-01 06:03:01,2020-01-01 06:21:43,258,278,18.7,2020-01-01 06:03:01,2020-01-01 06:21:43
3,14420220,521601,M,1989.0,2020-01-01 06:05:48,2020-01-01 06:15:11,200,201,9.383333,2020-01-01 06:05:48,2020-01-01 06:15:11
4,14420221,631227,M,1979.0,2020-01-01 06:08:15,2020-01-01 06:29:33,151,248,21.3,2020-01-01 06:08:15,2020-01-01 06:29:33


In [31]:
mibici_2020 = load_data('mibici/mibici_2020.csv', flag_dir)
mibici_2020.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,14420217,451617,M,1992.0,2020-01-01 06:02:20,2020-01-01 06:05:38,52,268,3.3,2020-01-01 06:02:20,2020-01-01 06:05:38
1,14420218,324211,M,1985.0,2020-01-01 06:02:22,2020-01-01 06:07:32,254,180,5.166667,2020-01-01 06:02:22,2020-01-01 06:07:32
2,14420219,611633,M,1981.0,2020-01-01 06:03:01,2020-01-01 06:21:43,258,278,18.7,2020-01-01 06:03:01,2020-01-01 06:21:43
3,14420220,521601,M,1989.0,2020-01-01 06:05:48,2020-01-01 06:15:11,200,201,9.383333,2020-01-01 06:05:48,2020-01-01 06:15:11
4,14420221,631227,M,1979.0,2020-01-01 06:08:15,2020-01-01 06:29:33,151,248,21.3,2020-01-01 06:08:15,2020-01-01 06:29:33


In [32]:
del mibici_2020

In [33]:
mibici_2021 = load_data('mibici/mibici_2021.csv', flag_dir)
mibici_2021.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,18026279,436620,M,1978.0,2021-01-01 00:10:40,2021-01-01 00:17:54,62,77,7.233333
1,18026280,672064,M,1977.0,2021-01-01 05:58:10,2021-01-01 05:58:37,19,19,0.45
2,18026281,653216,M,1976.0,2021-01-01 05:58:17,2021-01-01 06:01:11,11,182,2.9
3,18026282,672064,M,1977.0,2021-01-01 05:58:59,2021-01-01 06:09:26,19,11,10.45
4,18026283,521601,M,1989.0,2021-01-01 06:00:58,2021-01-01 06:08:00,200,198,7.033333


In [34]:
mibici_2021['Inicio'] = mibici_2021['Inicio_del_viaje']
mibici_2021['Fin'] = mibici_2021['Fin_del_viaje']
mibici_2021.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,18026279,436620,M,1978.0,2021-01-01 00:10:40,2021-01-01 00:17:54,62,77,7.233333,2021-01-01 00:10:40,2021-01-01 00:17:54
1,18026280,672064,M,1977.0,2021-01-01 05:58:10,2021-01-01 05:58:37,19,19,0.45,2021-01-01 05:58:10,2021-01-01 05:58:37
2,18026281,653216,M,1976.0,2021-01-01 05:58:17,2021-01-01 06:01:11,11,182,2.9,2021-01-01 05:58:17,2021-01-01 06:01:11
3,18026282,672064,M,1977.0,2021-01-01 05:58:59,2021-01-01 06:09:26,19,11,10.45,2021-01-01 05:58:59,2021-01-01 06:09:26
4,18026283,521601,M,1989.0,2021-01-01 06:00:58,2021-01-01 06:08:00,200,198,7.033333,2021-01-01 06:00:58,2021-01-01 06:08:00


In [35]:
mibici_2021.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/mibici/mibici_2021.csv', index = False)
mibici_2021 = load_data('mibici/mibici_2021.csv', flag_dir)
mibici_2021.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,18026279,436620,M,1978.0,2021-01-01 00:10:40,2021-01-01 00:17:54,62,77,7.233333,2021-01-01 00:10:40,2021-01-01 00:17:54
1,18026280,672064,M,1977.0,2021-01-01 05:58:10,2021-01-01 05:58:37,19,19,0.45,2021-01-01 05:58:10,2021-01-01 05:58:37
2,18026281,653216,M,1976.0,2021-01-01 05:58:17,2021-01-01 06:01:11,11,182,2.9,2021-01-01 05:58:17,2021-01-01 06:01:11
3,18026282,672064,M,1977.0,2021-01-01 05:58:59,2021-01-01 06:09:26,19,11,10.45,2021-01-01 05:58:59,2021-01-01 06:09:26
4,18026283,521601,M,1989.0,2021-01-01 06:00:58,2021-01-01 06:08:00,200,198,7.033333,2021-01-01 06:00:58,2021-01-01 06:08:00


In [36]:
del mibici_2021

In [37]:
mibici_2022 = load_data('mibici/mibici_2022.csv', flag_dir)
mibici_2022.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,21782677,1304044,M,2000.0,2022-01-01 00:05:41,2022-01-01 00:14:37,227,236,8.933333
1,21782678,673514,M,1984.0,2022-01-01 00:09:50,2022-01-01 00:20:35,67,53,10.75
2,21782679,454306,M,1990.0,2022-01-01 00:10:07,2022-01-01 00:20:31,67,53,10.4
3,21782680,372607,M,1977.0,2022-01-01 00:10:45,2022-01-01 00:13:09,30,23,2.4
4,21782681,1277213,F,1973.0,2022-01-01 00:11:22,2022-01-01 00:29:16,86,8,17.9


In [38]:
mibici_2022['Inicio'] = mibici_2022['Inicio_del_viaje']
mibici_2022['Fin'] = mibici_2022['Fin_del_viaje']
mibici_2022.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,21782677,1304044,M,2000.0,2022-01-01 00:05:41,2022-01-01 00:14:37,227,236,8.933333,2022-01-01 00:05:41,2022-01-01 00:14:37
1,21782678,673514,M,1984.0,2022-01-01 00:09:50,2022-01-01 00:20:35,67,53,10.75,2022-01-01 00:09:50,2022-01-01 00:20:35
2,21782679,454306,M,1990.0,2022-01-01 00:10:07,2022-01-01 00:20:31,67,53,10.4,2022-01-01 00:10:07,2022-01-01 00:20:31
3,21782680,372607,M,1977.0,2022-01-01 00:10:45,2022-01-01 00:13:09,30,23,2.4,2022-01-01 00:10:45,2022-01-01 00:13:09
4,21782681,1277213,F,1973.0,2022-01-01 00:11:22,2022-01-01 00:29:16,86,8,17.9,2022-01-01 00:11:22,2022-01-01 00:29:16


In [39]:
mibici_2022.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/mibici/mibici_2022.csv', index = False)
mibici_2022 = load_data('mibici/mibici_2022.csv', flag_dir)
mibici_2022.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,21782677,1304044,M,2000.0,2022-01-01 00:05:41,2022-01-01 00:14:37,227,236,8.933333,2022-01-01 00:05:41,2022-01-01 00:14:37
1,21782678,673514,M,1984.0,2022-01-01 00:09:50,2022-01-01 00:20:35,67,53,10.75,2022-01-01 00:09:50,2022-01-01 00:20:35
2,21782679,454306,M,1990.0,2022-01-01 00:10:07,2022-01-01 00:20:31,67,53,10.4,2022-01-01 00:10:07,2022-01-01 00:20:31
3,21782680,372607,M,1977.0,2022-01-01 00:10:45,2022-01-01 00:13:09,30,23,2.4,2022-01-01 00:10:45,2022-01-01 00:13:09
4,21782681,1277213,F,1973.0,2022-01-01 00:11:22,2022-01-01 00:29:16,86,8,17.9,2022-01-01 00:11:22,2022-01-01 00:29:16


In [40]:
del mibici_2022

In [41]:
mibici_2023 = load_data('mibici/mibici_2023.csv', flag_dir)
mibici_2023.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,A}äe_nacimiento,Tiempo_viaje
0,26751198,1350606,M,1983.0,2023-01-01 00:00:41,2023-01-01 00:26:52,154,50,,26.183333
1,26751199,15163,M,1986.0,2023-01-01 00:04:48,2023-01-01 00:07:46,64,79,,2.966667
2,26751200,1534764,M,1987.0,2023-01-01 00:06:49,2023-01-01 00:17:36,35,12,,10.783333
3,26751201,1240140,M,1996.0,2023-01-01 00:08:05,2023-01-01 00:09:00,35,35,,0.916667
4,26751202,1732214,M,1969.0,2023-01-01 00:09:14,2023-01-01 00:14:57,54,171,,5.716667


In [43]:
columns = mibici_2023.columns
columns[-2]

'A}äe_nacimiento'

In [48]:
col_name = columns[-2]

problem = mibici_2023[mibici_2023[col_name].isna() != True]
problem.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,A}äe_nacimiento,Tiempo_viaje
1123390,28080053,2125076,M,,2023-04-01 00:00:13,2023-04-01 00:19:22,154,95,1996.0,19.15
1123391,28080054,316101,M,,2023-04-01 00:00:13,2023-04-01 00:06:26,154,170,1990.0,6.216667
1123392,28080056,1115612,M,,2023-04-01 00:01:54,2023-04-01 00:22:12,193,34,1998.0,20.3
1123393,28080059,611033,M,,2023-04-01 00:03:13,2023-04-01 00:19:46,79,275,2001.0,16.55
1123394,28080062,21043,M,,2023-04-01 00:04:28,2023-04-01 00:24:49,50,303,1990.0,20.35


In [49]:
problem.tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,A}äe_nacimiento,Tiempo_viaje
1820351,28897242,1716032,M,,2023-05-31 23:59:22,2023-06-01 00:09:55,200,20,1995.0,10.55
1820352,28897243,1542511,,,2023-05-31 23:59:40,2023-06-01 00:24:55,88,327,1998.0,25.25
1820353,28897244,631056,M,,2023-05-31 23:59:43,2023-06-01 00:02:58,66,43,1998.0,3.25
1820354,28897245,610456,M,,2023-05-31 23:59:47,2023-06-01 00:13:06,153,234,1993.0,13.316667
1820355,28897246,1667143,M,,2023-05-31 23:59:48,2023-06-01 00:05:47,227,232,1997.0,5.983333


In [50]:
mibici_2023['Año_de_nacimiento'].fillna(mibici_2023[col_name], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  mibici_2023['Año_de_nacimiento'].fillna(mibici_2023[col_name], inplace=True)


In [51]:
mibici_2023[mibici_2023[col_name].isna() != True].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,A}äe_nacimiento,Tiempo_viaje
1123390,28080053,2125076,M,1996.0,2023-04-01 00:00:13,2023-04-01 00:19:22,154,95,1996.0,19.15
1123391,28080054,316101,M,1990.0,2023-04-01 00:00:13,2023-04-01 00:06:26,154,170,1990.0,6.216667
1123392,28080056,1115612,M,1998.0,2023-04-01 00:01:54,2023-04-01 00:22:12,193,34,1998.0,20.3
1123393,28080059,611033,M,2001.0,2023-04-01 00:03:13,2023-04-01 00:19:46,79,275,2001.0,16.55
1123394,28080062,21043,M,1990.0,2023-04-01 00:04:28,2023-04-01 00:24:49,50,303,1990.0,20.35


In [52]:
mibici_2023[mibici_2023[col_name].isna() != True].tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,A}äe_nacimiento,Tiempo_viaje
1820351,28897242,1716032,M,1995.0,2023-05-31 23:59:22,2023-06-01 00:09:55,200,20,1995.0,10.55
1820352,28897243,1542511,,1998.0,2023-05-31 23:59:40,2023-06-01 00:24:55,88,327,1998.0,25.25
1820353,28897244,631056,M,1998.0,2023-05-31 23:59:43,2023-06-01 00:02:58,66,43,1998.0,3.25
1820354,28897245,610456,M,1993.0,2023-05-31 23:59:47,2023-06-01 00:13:06,153,234,1993.0,13.316667
1820355,28897246,1667143,M,1997.0,2023-05-31 23:59:48,2023-06-01 00:05:47,227,232,1997.0,5.983333


In [53]:
mibici_2023.drop(columns = [col_name], inplace = True)
mibici_2023.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,26751198,1350606,M,1983.0,2023-01-01 00:00:41,2023-01-01 00:26:52,154,50,26.183333
1,26751199,15163,M,1986.0,2023-01-01 00:04:48,2023-01-01 00:07:46,64,79,2.966667
2,26751200,1534764,M,1987.0,2023-01-01 00:06:49,2023-01-01 00:17:36,35,12,10.783333
3,26751201,1240140,M,1996.0,2023-01-01 00:08:05,2023-01-01 00:09:00,35,35,0.916667
4,26751202,1732214,M,1969.0,2023-01-01 00:09:14,2023-01-01 00:14:57,54,171,5.716667


In [54]:
mibici_2023['Inicio'] = mibici_2023['Inicio_del_viaje']
mibici_2023['Fin'] = mibici_2023['Fin_del_viaje']
mibici_2023.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,26751198,1350606,M,1983.0,2023-01-01 00:00:41,2023-01-01 00:26:52,154,50,26.183333,2023-01-01 00:00:41,2023-01-01 00:26:52
1,26751199,15163,M,1986.0,2023-01-01 00:04:48,2023-01-01 00:07:46,64,79,2.966667,2023-01-01 00:04:48,2023-01-01 00:07:46
2,26751200,1534764,M,1987.0,2023-01-01 00:06:49,2023-01-01 00:17:36,35,12,10.783333,2023-01-01 00:06:49,2023-01-01 00:17:36
3,26751201,1240140,M,1996.0,2023-01-01 00:08:05,2023-01-01 00:09:00,35,35,0.916667,2023-01-01 00:08:05,2023-01-01 00:09:00
4,26751202,1732214,M,1969.0,2023-01-01 00:09:14,2023-01-01 00:14:57,54,171,5.716667,2023-01-01 00:09:14,2023-01-01 00:14:57


In [55]:
mibici_2023.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/mibici/mibici_2023.csv', index = False)
mibici_2023 = load_data('mibici/mibici_2023.csv', flag_dir)
mibici_2023.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje,Inicio,Fin
0,26751198,1350606,M,1983.0,2023-01-01 00:00:41,2023-01-01 00:26:52,154,50,26.183333,2023-01-01 00:00:41,2023-01-01 00:26:52
1,26751199,15163,M,1986.0,2023-01-01 00:04:48,2023-01-01 00:07:46,64,79,2.966667,2023-01-01 00:04:48,2023-01-01 00:07:46
2,26751200,1534764,M,1987.0,2023-01-01 00:06:49,2023-01-01 00:17:36,35,12,10.783333,2023-01-01 00:06:49,2023-01-01 00:17:36
3,26751201,1240140,M,1996.0,2023-01-01 00:08:05,2023-01-01 00:09:00,35,35,0.916667,2023-01-01 00:08:05,2023-01-01 00:09:00
4,26751202,1732214,M,1969.0,2023-01-01 00:09:14,2023-01-01 00:14:57,54,171,5.716667,2023-01-01 00:09:14,2023-01-01 00:14:57


In [56]:
del mibici_2023

In [59]:
mibici_2024_1 = pd.read_csv('data_mibici/datos_abiertos_2024_01.csv', encoding='latin1')
mibici_2024_2 = pd.read_csv('data_mibici/datos_abiertos_2024_02.csv', encoding='latin1')
mibici_2024_3 = pd.read_csv('data_mibici/datos_abiertos_2024_03.csv', encoding='latin1')
mibici_2024_4 = pd.read_csv('data_mibici/datos_abiertos_2024_04.csv', encoding='latin1')
mibici_2024_5 = pd.read_csv('data_mibici/datos_abiertos_2024_05.csv', encoding='latin1')
mibici_2024_6 = pd.read_csv('data_mibici/datos_abiertos_2024_06.csv', encoding='latin1')
mibici_2024_7 = pd.read_csv('data_mibici/datos_abiertos_2024_07.csv', encoding='latin1')
mibici_2024_8 = pd.read_csv('data_mibici/datos_abiertos_2024_08.csv', encoding='latin1')
mibici_2024_9 = pd.read_csv('data_mibici/datos_abiertos_2024_09.csv', encoding='latin1')
mibici_2024_10 = pd.read_csv('data_mibici/datos_abiertos_2024_10.csv', encoding='latin1')
mibici_2024_11 = pd.read_csv('data_mibici/datos_abiertos_2024_11.csv', encoding='latin1')
mibici_2024_12 = pd.read_csv('data_mibici/datos_abiertos_2024_12.csv', encoding='latin1')
mibici_2024_list = [mibici_2024_1, mibici_2024_2, mibici_2024_3, mibici_2024_4, mibici_2024_5, mibici_2024_6, mibici_2024_7, mibici_2024_8, mibici_2024_9, mibici_2024_10, mibici_2024_11, mibici_2024_12]

In [60]:
mibici_2024_list[0].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,31789291,640715,M,1984.0,2024-01-01 00:05:25,2024-01-01 00:29:37,294,271
1,31789292,2353010,F,1995.0,2024-01-01 00:07:23,2024-01-01 00:16:37,182,254
2,31789293,1556365,M,1984.0,2024-01-01 00:07:31,2024-01-01 00:16:37,182,254
3,31789294,2626233,M,1994.0,2024-01-01 00:07:56,2024-01-01 00:31:51,35,154
4,31789295,2602006,M,1998.0,2024-01-01 00:08:09,2024-01-01 00:31:51,35,154


In [61]:
mibici_2024_list[1].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,A}äe_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,32244898,133007,M,1994.0,2024-02-01 00:01:46,2024-02-01 00:13:00,62,195
1,32244900,2524051,F,1997.0,2024-02-01 00:02:26,2024-02-01 00:12:41,196,25
2,32244901,1431452,M,2001.0,2024-02-01 00:02:36,2024-02-01 00:02:39,383,383
3,32244902,1405511,F,1969.0,2024-02-01 00:02:52,2024-02-01 00:14:38,66,14
4,32244903,276725,M,1990.0,2024-02-01 00:02:57,2024-02-01 00:12:17,75,179


In [62]:
mibici_2024_list[1].rename(columns = {'A}äe_nacimiento': 'Año_de_nacimiento'}, inplace = True)
mibici_2024_list[1].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,32244898,133007,M,1994.0,2024-02-01 00:01:46,2024-02-01 00:13:00,62,195
1,32244900,2524051,F,1997.0,2024-02-01 00:02:26,2024-02-01 00:12:41,196,25
2,32244901,1431452,M,2001.0,2024-02-01 00:02:36,2024-02-01 00:02:39,383,383
3,32244902,1405511,F,1969.0,2024-02-01 00:02:52,2024-02-01 00:14:38,66,14
4,32244903,276725,M,1990.0,2024-02-01 00:02:57,2024-02-01 00:12:17,75,179


In [63]:
mibici_2024_list[2].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,32689032,2144362,,1997.0,2024-03-01 00:00:22,2024-03-01 00:20:38,70,247
1,32689037,2703032,M,1970.0,2024-03-01 00:00:52,2024-03-01 00:25:33,78,289
2,32689038,513364,M,1992.0,2024-03-01 00:01:26,2024-03-01 00:26:04,78,261
3,32689039,1467106,M,1995.0,2024-03-01 00:02:04,2024-03-01 00:09:56,154,30
4,32689042,641553,M,1985.0,2024-03-01 00:02:22,2024-03-01 00:07:13,86,49


In [64]:
mibici_2024_list[3].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,33143294,2540332,M,1989.0,2024-04-01 00:00:48,2024-04-01 00:05:06,21,33
1,33143296,531743,M,1976.0,2024-04-01 00:02:10,2024-04-01 00:18:36,79,207
2,33143297,1264247,M,1999.0,2024-04-01 00:02:18,2024-04-01 00:18:58,79,207
3,33143298,2071356,M,2004.0,2024-04-01 00:02:19,2024-04-01 00:18:49,79,207
4,33143299,745046,M,1995.0,2024-04-01 00:02:42,2024-04-01 00:10:45,41,52


In [65]:
mibici_2024_list[4].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,A}äe_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,33617601,1010367,M,1990.0,2024-05-01 00:01:35,2024-05-01 00:09:08,196,49
1,33617605,2240043,M,1994.0,2024-05-01 00:02:00,2024-05-01 00:22:20,174,229
2,33617607,2762155,M,2003.0,2024-05-01 00:02:29,2024-05-01 00:02:46,33,33
3,33617610,1452765,M,1988.0,2024-05-01 00:02:57,2024-05-01 00:14:42,30,3
4,33617611,2431176,M,2000.0,2024-05-01 00:03:17,2024-05-01 00:10:59,196,177


In [66]:
mibici_2024_list[4].rename(columns = {'A}äe_nacimiento': 'Año_de_nacimiento'}, inplace = True)
mibici_2024_list[4].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,33617601,1010367,M,1990.0,2024-05-01 00:01:35,2024-05-01 00:09:08,196,49
1,33617605,2240043,M,1994.0,2024-05-01 00:02:00,2024-05-01 00:22:20,174,229
2,33617607,2762155,M,2003.0,2024-05-01 00:02:29,2024-05-01 00:02:46,33,33
3,33617610,1452765,M,1988.0,2024-05-01 00:02:57,2024-05-01 00:14:42,30,3
4,33617611,2431176,M,2000.0,2024-05-01 00:03:17,2024-05-01 00:10:59,196,177


In [67]:
mibici_2024_list[5].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,34099262,1024463,M,1988.0,2024-06-01 00:00:17,2024-06-01 00:08:09,82,190
1,34099263,370373,M,1990.0,2024-06-01 00:00:26,2024-06-01 00:08:26,66,53
2,34099264,576623,M,1999.0,2024-06-01 00:01:02,2024-06-01 00:09:41,131,320
3,34099265,1040604,M,1991.0,2024-06-01 00:01:04,2024-06-01 00:08:32,272,330
4,34099266,2701416,M,1996.0,2024-06-01 00:01:56,2024-06-01 00:11:38,66,195


In [68]:
mibici_2024_list[6].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,AÃ±o_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,34524982,556754,M,1981.0,2024-07-01 00:00:34,2024-07-01 00:09:40,88,88
1,34524983,3477334,M,1987.0,2024-07-01 00:01:09,2024-07-01 00:43:56,210,210
2,34524986,4040711,M,1998.0,2024-07-01 00:03:32,2024-07-01 00:26:51,206,197
3,34524987,21120,M,1981.0,2024-07-01 00:04:15,2024-07-01 00:08:19,26,52
4,34524988,4044423,M,2001.0,2024-07-01 00:04:20,2024-07-01 00:30:54,213,160


In [69]:
mibici_2024_list[6].rename(columns = {'AÃ±o_de_nacimiento': 'Año_de_nacimiento'}, inplace = True)
mibici_2024_list[6].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,34524982,556754,M,1981.0,2024-07-01 00:00:34,2024-07-01 00:09:40,88,88
1,34524983,3477334,M,1987.0,2024-07-01 00:01:09,2024-07-01 00:43:56,210,210
2,34524986,4040711,M,1998.0,2024-07-01 00:03:32,2024-07-01 00:26:51,206,197
3,34524987,21120,M,1981.0,2024-07-01 00:04:15,2024-07-01 00:08:19,26,52
4,34524988,4044423,M,2001.0,2024-07-01 00:04:20,2024-07-01 00:30:54,213,160


In [70]:
mibici_2024_list[7].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,34950758,1653754,M,2001.0,2024-08-01 00:00:09,2024-08-01 00:14:18,12,159
1,34950759,2703032,M,1970.0,2024-08-01 00:01:08,2024-08-01 00:23:41,78,341
2,34950760,2077125,M,1990.0,2024-08-01 00:01:16,2024-08-01 00:26:01,85,53
3,34950764,3503076,F,1981.0,2024-08-01 00:05:12,2024-08-01 00:25:26,255,349
4,34950765,3470220,M,1997.0,2024-08-01 00:05:47,2024-08-01 00:21:17,21,178


In [71]:
mibici_2024_list[8].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,35406564,3366663,M,2000.0,01/09/2024 0:00,01/09/2024 0:07,154,161
1,35406567,631455,M,1990.0,01/09/2024 0:02,01/09/2024 0:08,50,156
2,35406571,216004,M,1990.0,01/09/2024 0:02,01/09/2024 0:18,64,273
3,35406572,676145,M,1988.0,01/09/2024 0:03,01/09/2024 0:10,271,164
4,35406573,2244256,M,1998.0,01/09/2024 0:03,01/09/2024 0:19,199,63


In [73]:
mibici_2024_list[8].tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
390067,35847242,1452352,M,2001.0,30/09/2024 23:57,01/10/2024 0:12,32,249
390068,35847244,2026005,M,2003.0,30/09/2024 23:58,01/10/2024 0:03,269,354
390069,35847245,1302130,M,1970.0,30/09/2024 23:59,01/10/2024 0:15,184,176
390070,35847246,476143,M,1992.0,30/09/2024 23:59,01/10/2024 0:17,169,169
390071,35847247,513364,M,1992.0,30/09/2024 23:59,01/10/2024 0:11,158,261


In [74]:
mibici_2024_list[8]['Inicio_del_viaje'] = pd.to_datetime(mibici_2024_list[8]['Inicio_del_viaje'], format='%d/%m/%Y %H:%M').dt.strftime('%Y-%m-%d %H:%M:%S')
mibici_2024_list[8]['Fin_del_viaje'] = pd.to_datetime(mibici_2024_list[8]['Fin_del_viaje'], format='%d/%m/%Y %H:%M').dt.strftime('%Y-%m-%d %H:%M:%S')
mibici_2024_list[8].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,35406564,3366663,M,2000.0,2024-09-01 00:00:00,2024-09-01 00:07:00,154,161
1,35406567,631455,M,1990.0,2024-09-01 00:02:00,2024-09-01 00:08:00,50,156
2,35406571,216004,M,1990.0,2024-09-01 00:02:00,2024-09-01 00:18:00,64,273
3,35406572,676145,M,1988.0,2024-09-01 00:03:00,2024-09-01 00:10:00,271,164
4,35406573,2244256,M,1998.0,2024-09-01 00:03:00,2024-09-01 00:19:00,199,63


In [75]:
mibici_2024_list[9].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,35847248,701161,M,1983.0,01/10/2024 0:00,01/10/2024 0:21,40,213
1,35847249,4453122,M,1997.0,01/10/2024 0:01,01/10/2024 0:18,189,3
2,35847250,302530,M,1994.0,01/10/2024 0:01,01/10/2024 0:11,131,342
3,35847251,4062235,F,1999.0,01/10/2024 0:01,01/10/2024 0:18,189,3
4,35847252,2664376,M,1979.0,01/10/2024 0:02,01/10/2024 0:12,244,46


In [76]:
mibici_2024_list[9]['Inicio_del_viaje'] = pd.to_datetime(mibici_2024_list[9]['Inicio_del_viaje'], format='%d/%m/%Y %H:%M').dt.strftime('%Y-%m-%d %H:%M:%S')
mibici_2024_list[9]['Fin_del_viaje'] = pd.to_datetime(mibici_2024_list[9]['Fin_del_viaje'], format='%d/%m/%Y %H:%M').dt.strftime('%Y-%m-%d %H:%M:%S')
mibici_2024_list[9].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,35847248,701161,M,1983.0,2024-10-01 00:00:00,2024-10-01 00:21:00,40,213
1,35847249,4453122,M,1997.0,2024-10-01 00:01:00,2024-10-01 00:18:00,189,3
2,35847250,302530,M,1994.0,2024-10-01 00:01:00,2024-10-01 00:11:00,131,342
3,35847251,4062235,F,1999.0,2024-10-01 00:01:00,2024-10-01 00:18:00,189,3
4,35847252,2664376,M,1979.0,2024-10-01 00:02:00,2024-10-01 00:12:00,244,46


In [77]:
mibici_2024_list[10].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,36319536,1176651,M,1995.0,2024-11-01 00:00:04,2024-11-01 00:04:32,271,8
1,36319537,726517,F,1996.0,2024-11-01 00:00:14,2024-11-01 00:07:19,291,108
2,36319538,536255,F,2000.0,2024-11-01 00:00:16,2024-11-01 00:06:59,272,58
3,36319539,1463306,M,1993.0,2024-11-01 00:00:26,2024-11-01 00:04:30,64,83
4,36319549,2135111,M,1981.0,2024-11-01 00:01:54,2024-11-01 00:21:11,108,154


In [78]:
mibici_2024_list[11].head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id
0,36768628,4045170,M,1998.0,2024-12-01 00:00:17,2024-12-01 00:26:04,55,361
1,36768629,4031470,F,1999.0,2024-12-01 00:00:38,2024-12-01 00:12:36,82,52
2,36768630,2057773,,1996.0,2024-12-01 00:00:41,2024-12-01 00:06:23,68,277
3,36768631,4054364,M,1993.0,2024-12-01 00:00:52,2024-12-01 00:25:35,62,204
4,36768632,4045237,M,1990.0,2024-12-01 00:00:55,2024-12-01 00:06:35,68,277


In [79]:
for i in range(len(mibici_2024_list)):
    mibici_2024_list[i]['Inicio'] = mibici_2024_list[i]['Inicio_del_viaje']
    mibici_2024_list[i]['Fin'] = mibici_2024_list[i]['Fin_del_viaje']

In [80]:
mibici_2024 = pd.concat(mibici_2024_list, ignore_index=True)
mibici_2024.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Inicio,Fin
0,31789291,640715,M,1984.0,2024-01-01 00:05:25,2024-01-01 00:29:37,294,271,2024-01-01 00:05:25,2024-01-01 00:29:37
1,31789292,2353010,F,1995.0,2024-01-01 00:07:23,2024-01-01 00:16:37,182,254,2024-01-01 00:07:23,2024-01-01 00:16:37
2,31789293,1556365,M,1984.0,2024-01-01 00:07:31,2024-01-01 00:16:37,182,254,2024-01-01 00:07:31,2024-01-01 00:16:37
3,31789294,2626233,M,1994.0,2024-01-01 00:07:56,2024-01-01 00:31:51,35,154,2024-01-01 00:07:56,2024-01-01 00:31:51
4,31789295,2602006,M,1998.0,2024-01-01 00:08:09,2024-01-01 00:31:51,35,154,2024-01-01 00:08:09,2024-01-01 00:31:51


In [81]:
mibici_2024.tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Inicio,Fin
4645916,37162337,460001,M,1989.0,2024-12-31 23:44:12,2025-01-01 00:10:43,188,269,2024-12-31 23:44:12,2025-01-01 00:10:43
4645917,37162338,543744,M,1985.0,2024-12-31 23:46:38,2024-12-31 23:50:08,39,49,2024-12-31 23:46:38,2024-12-31 23:50:08
4645918,37162339,737764,M,1996.0,2024-12-31 23:47:37,2024-12-31 23:51:12,158,276,2024-12-31 23:47:37,2024-12-31 23:51:12
4645919,37162340,43277,M,1979.0,2024-12-31 23:54:40,2025-01-01 01:17:01,57,77,2024-12-31 23:54:40,2025-01-01 01:17:01
4645920,37162341,2711370,M,1996.0,2024-12-31 23:55:17,2025-01-01 00:05:00,51,268,2024-12-31 23:55:17,2025-01-01 00:05:00


In [82]:
mibici_2024.to_csv('/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/mibici/mibici_2024.csv', index=False, encoding='utf-8')
mibici_2024 = load_data('mibici/mibici_2024.csv', flag_dir)
mibici_2024.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Inicio,Fin
0,31789291,640715,M,1984.0,2024-01-01 00:05:25,2024-01-01 00:29:37,294,271,2024-01-01 00:05:25,2024-01-01 00:29:37
1,31789292,2353010,F,1995.0,2024-01-01 00:07:23,2024-01-01 00:16:37,182,254,2024-01-01 00:07:23,2024-01-01 00:16:37
2,31789293,1556365,M,1984.0,2024-01-01 00:07:31,2024-01-01 00:16:37,182,254,2024-01-01 00:07:31,2024-01-01 00:16:37
3,31789294,2626233,M,1994.0,2024-01-01 00:07:56,2024-01-01 00:31:51,35,154,2024-01-01 00:07:56,2024-01-01 00:31:51
4,31789295,2602006,M,1998.0,2024-01-01 00:08:09,2024-01-01 00:31:51,35,154,2024-01-01 00:08:09,2024-01-01 00:31:51


In [83]:
mibici_2024.tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Inicio,Fin
4645916,37162337,460001,M,1989.0,2024-12-31 23:44:12,2025-01-01 00:10:43,188,269,2024-12-31 23:44:12,2025-01-01 00:10:43
4645917,37162338,543744,M,1985.0,2024-12-31 23:46:38,2024-12-31 23:50:08,39,49,2024-12-31 23:46:38,2024-12-31 23:50:08
4645918,37162339,737764,M,1996.0,2024-12-31 23:47:37,2024-12-31 23:51:12,158,276,2024-12-31 23:47:37,2024-12-31 23:51:12
4645919,37162340,43277,M,1979.0,2024-12-31 23:54:40,2025-01-01 01:17:01,57,77,2024-12-31 23:54:40,2025-01-01 01:17:01
4645920,37162341,2711370,M,1996.0,2024-12-31 23:55:17,2025-01-01 00:05:00,51,268,2024-12-31 23:55:17,2025-01-01 00:05:00
