In [1]:
#libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import folium
import os
import sys
import abstract_flows.arrow as arrow
import abstract_flows.grid as grid
import abstract_flows.flows as flows

In [2]:
flag_dir = False

def load_data(name, flag_dir = True):
    ''' 
    Params:
    name: str
        Name of the file to load
    flag_dir: bool, If True, the directory is set as the dir in CIMAT computer
        If False, the directory is set as the dir in Antonio's computer

    Returns:
    data
    '''
    if flag_dir:
        dir = '/home/user/Desktop/Datos/'
        if name[-3:] == 'csv':
            try: 
                data = pd.read_csv(dir + name)
            except:
                print('Error loading csv file')
                sys.exit()
        elif name[-3:] == 'npy':
            try:
                data = np.load(dir + name)
            except:
                print('Error loading numpy file')
                sys.exit()
        else:
            print('Error: file not found')
            sys.exit()
    else:
        dir = '/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/'
        if name[-3:] == 'csv':
            try: 
                data = pd.read_csv(dir + name)
            except:
                print('Error loading csv file')
                sys.exit()
        elif name[-3:] == 'npy':
            try:
                data = np.load(dir + name)
            except:
                print('Error loading numpy file')
                sys.exit()
        else:
            print('Error: file not found')
            sys.exit()
    return data

In [3]:
mibici_2019 = load_data('mibici/mibici_2019.csv', flag_dir)
mibici_2019.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,8901295,341162,M,1992.0,2019-01-01 00:00:00,2019-01-01 00:16:09,50,54,16.15
1,8901296,413443,M,1985.0,2019-01-01 06:01:05,2019-01-01 06:16:27,38,194,15.366667
2,8901297,444232,M,1995.0,2019-01-01 06:01:19,2019-01-01 06:09:55,86,18,8.6
3,8901298,437533,M,1998.0,2019-01-01 06:01:25,2019-01-01 06:02:04,86,86,0.65
4,8901299,430224,M,1992.0,2019-01-01 06:01:57,2019-01-01 06:05:01,27,52,3.066667


In [4]:
data_prueba = mibici_2019[mibici_2019['Inicio_del_viaje'].str.contains('2019-01-23')]
data_prueba.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
269711,9216477,151501,M,1987.0,2019-01-23 00:00:01,2019-01-23 00:11:50,111,98,11.816667
269712,9216527,273367,M,1995.0,2019-01-23 06:01:06,2019-01-23 06:12:07,162,62,11.016667
269713,9216528,413647,M,1962.0,2019-01-23 06:01:10,2019-01-23 06:07:56,54,173,6.766667
269714,9216529,200420,F,1994.0,2019-01-23 06:01:19,2019-01-23 06:17:01,268,190,15.7
269715,9216530,64163,M,1986.0,2019-01-23 06:01:28,2019-01-23 06:21:23,78,156,19.916667


In [5]:
data_prueba.tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
286864,9236087,441444,M,1974.0,2019-01-23 23:58:09,2019-01-24 00:02:48,44,40,4.65
286865,9236088,421237,M,1988.0,2019-01-23 23:58:47,2019-01-24 00:21:19,47,246,22.533333
286866,9236089,266101,M,1976.0,2019-01-23 23:58:59,2019-01-24 00:06:41,42,255,7.7
286867,9236090,361524,F,1994.0,2019-01-23 23:59:03,2019-01-24 00:17:53,19,69,18.833333
286868,9236091,73255,F,1994.0,2019-01-23 23:59:48,2019-01-24 00:27:51,53,256,28.05


In [None]:
def list_hours(init_hour, end_hour, time_interval):
    init_hour = datetime.datetime.strptime(init_hour, '%H:%M:%S')
    end_hour = datetime.datetime.strptime(end_hour, '%H:%M:%S')
    time_interval = datetime.timedelta(minutes=time_interval)

    lh = []
    
    if init_hour > end_hour:
        while init_hour.strftime('%H:%M:%S') != '00:00:00':  
            lh.append(init_hour.strftime('%H:%M:%S'))
            init_hour += time_interval
            if init_hour.strftime('%H:%M:%S') == '00:00:00':  
                break

        init_hour = datetime.datetime.strptime('00:00:00', '%H:%M:%S') 

    while init_hour <= end_hour:
        lh.append(init_hour.strftime('%H:%M:%S'))
        init_hour += time_interval

    lh.append(end_hour.strftime('%H:%M:%S'))

    return lh

In [19]:
l_prueba = list_hours('06:00:00', '23:59:59', 60)
l_prueba

['06:00:00',
 '07:00:00',
 '08:00:00',
 '09:00:00',
 '10:00:00',
 '11:00:00',
 '12:00:00',
 '13:00:00',
 '14:00:00',
 '15:00:00',
 '16:00:00',
 '17:00:00',
 '18:00:00',
 '19:00:00',
 '20:00:00',
 '21:00:00',
 '22:00:00',
 '23:00:00',
 '23:59:59']

In [7]:
stations = load_data('Adj_mibici/matrices_estaciones/est_2019.npy', flag_dir)

In [14]:
l = 60
l_hours1 = list_hours('06:00:00', '23:59:59', l)

for hour in l_hours1[:-1]:
    if hour == '23:00:00':
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 23:00:00') & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + '23:59:59')]
    else:
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 '+ hour) & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + l_hours1[l_hours1.index(hour)+1])]
    print(f"Processing hour: {hour}")
    current_counter = flows.count_trips_mibici(current_data, threshold=1, directed=True)
    try:
        current_stations = pd.unique(np.concatenate((current_counter['Est_A'].unique(), current_counter['Est_B'].unique())))
        current_grid = grid.Grid(5, 5, 'mibici')
        current_map = current_grid.map_around()
        current_station_cells = flows.stations_and_cells(current_grid.geodataframe(), current_stations, stations)
        current_graph_df = flows.abstract_flows(current_counter, current_grid.geodataframe(), current_station_cells, stations, threshold=1)
        current_map = flows.plot_flows_dataframe(current_graph_df, current_grid.geodataframe(), current_map, title=f'Flows MiBici {hour} - {l_hours1[l_hours1.index(hour)+1]}')
        dir_name = f'/home/user/Desktop/Viajes_por_hora/con_{l}min/'
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        current_map.save(f'{dir_name}mibici_{hour}.html')
    except Exception as e:
        print(f"Error processing hour {hour}: {e}")
        continue

Processing hour: 06:00:00
Processing hour: 07:00:00
Processing hour: 08:00:00
Processing hour: 09:00:00
Processing hour: 10:00:00
Processing hour: 11:00:00
Processing hour: 12:00:00
Processing hour: 13:00:00
Processing hour: 14:00:00
Processing hour: 15:00:00
Processing hour: 16:00:00
Processing hour: 17:00:00
Processing hour: 18:00:00
Processing hour: 19:00:00
Processing hour: 20:00:00
Processing hour: 21:00:00
Processing hour: 22:00:00
Processing hour: 23:00:00


In [15]:
l = 120
l_hours1 = list_hours('06:00:00', '23:59:59', l)

for hour in l_hours1[:-1]:
    if hour == '23:00:00':
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 23:00:00') & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + '23:59:59')]
    else:
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 '+ hour) & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + l_hours1[l_hours1.index(hour)+1])]
    print(f"Processing hour: {hour}")
    current_counter = flows.count_trips_mibici(current_data, threshold=1, directed=True)
    try:
        current_stations = pd.unique(np.concatenate((current_counter['Est_A'].unique(), current_counter['Est_B'].unique())))
        current_grid = grid.Grid(5, 5, 'mibici')
        current_map = current_grid.map_around()
        current_station_cells = flows.stations_and_cells(current_grid.geodataframe(), current_stations, stations)
        current_graph_df = flows.abstract_flows(current_counter, current_grid.geodataframe(), current_station_cells, stations, threshold=1)
        current_map = flows.plot_flows_dataframe(current_graph_df, current_grid.geodataframe(), current_map, title=f'Flows MiBici {hour} - {l_hours1[l_hours1.index(hour)+1]}')
        dir_name = f'/home/user/Desktop/Viajes_por_hora/con_{l}min/'
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        current_map.save(f'{dir_name}mibici_{hour}.html')
    except Exception as e:
        print(f"Error processing hour {hour}: {e}")
        continue

Processing hour: 06:00:00
Processing hour: 08:00:00
Processing hour: 10:00:00
Processing hour: 12:00:00
Processing hour: 14:00:00
Processing hour: 16:00:00
Processing hour: 18:00:00
Processing hour: 20:00:00
Processing hour: 22:00:00


In [16]:
l = 180
l_hours1 = list_hours('06:00:00', '23:59:59', l)

for hour in l_hours1[:-1]:
    if hour == '23:00:00':
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 23:00:00') & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + '23:59:59')]
    else:
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 '+ hour) & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + l_hours1[l_hours1.index(hour)+1])]
    print(f"Processing hour: {hour}")
    current_counter = flows.count_trips_mibici(current_data, threshold=1, directed=True)
    try:
        current_stations = pd.unique(np.concatenate((current_counter['Est_A'].unique(), current_counter['Est_B'].unique())))
        current_grid = grid.Grid(5, 5, 'mibici')
        current_map = current_grid.map_around()
        current_station_cells = flows.stations_and_cells(current_grid.geodataframe(), current_stations, stations)
        current_graph_df = flows.abstract_flows(current_counter, current_grid.geodataframe(), current_station_cells, stations, threshold=1)
        current_map = flows.plot_flows_dataframe(current_graph_df, current_grid.geodataframe(), current_map, title=f'Flows MiBici {hour} - {l_hours1[l_hours1.index(hour)+1]}')
        dir_name = f'/home/user/Desktop/Viajes_por_hora/con_{l}min/'
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        current_map.save(f'{dir_name}mibici_{hour}.html')
    except Exception as e:
        print(f"Error processing hour {hour}: {e}")
        continue

Processing hour: 06:00:00
Processing hour: 09:00:00
Processing hour: 12:00:00
Processing hour: 15:00:00
Processing hour: 18:00:00
Processing hour: 21:00:00


In [17]:
l = 240
l_hours1 = list_hours('06:00:00', '23:59:59', l)

for hour in l_hours1[:-1]:
    if hour == '23:00:00':
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 23:00:00') & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + '23:59:59')]
    else:
        current_data = data_prueba[(data_prueba['Inicio_del_viaje'] >= '2019-01-23 '+ hour) & (data_prueba['Inicio_del_viaje'] < '2019-01-23 ' + l_hours1[l_hours1.index(hour)+1])]
    print(f"Processing hour: {hour} - {l_hours1[l_hours1.index(hour)+1]}")
    #print(current_data['Inicio_del_viaje'].head(2))
    #print(current_data['Inicio_del_viaje'].tail(2))
    current_counter = flows.count_trips_mibici(current_data, threshold=1, directed=True)
    try:
        current_stations = pd.unique(np.concatenate((current_counter['Est_A'].unique(), current_counter['Est_B'].unique())))
        current_grid = grid.Grid(5, 5, 'mibici')
        current_map = current_grid.map_around()
        current_station_cells = flows.stations_and_cells(current_grid.geodataframe(), current_stations, stations)
        current_graph_df = flows.abstract_flows(current_counter, current_grid.geodataframe(), current_station_cells, stations, threshold=1)
        #print(current_graph_df.head(2))
        #print(current_graph_df.tail(2))
        current_map = flows.plot_flows_dataframe(current_graph_df, current_grid.geodataframe(), current_map, title=f'Flows MiBici {hour} - {l_hours1[l_hours1.index(hour)+1]}')
        dir_name = f'/home/user/Desktop/Viajes_por_hora/con_{l}min/'
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        current_map.save(f'{dir_name}mibici_{hour}.html')
    except Exception as e:
        print(f"Error processing hour {hour}: {e}")
        continue

Processing hour: 06:00:00 - 10:00:00
Processing hour: 10:00:00 - 14:00:00
Processing hour: 14:00:00 - 18:00:00
Processing hour: 18:00:00 - 22:00:00
Processing hour: 22:00:00 - 23:59:59
