In [249]:
import pandas as pd
from haversine import haversine as hvs, Unit
import numpy as np

In [208]:
# USO DE HAVERSINE
# lyon = (-13.5549784, -71.8559785) # (lat, lon)
# stop = (-71.8573001, -13.5548123)

# print(stop)
# hvs(lyon, stop)

# Analisis de Datos

### Busstop ida

In [127]:
# leer dataset busstop
df_stops_ida = pd.read_csv('busstops_ida.csv')

# diseño del df: creacion de latitude_longitude
df_stops_ida['latitude'] = df_stops_ida.apply(lambda row: (row['latitude'], row['longitude']), axis=1)
df_stops_ida.drop(['longitude', 'number'], inplace=True, axis=1)
df_stops_ida = df_stops_ida.rename(columns={'latitude':'latitude_longitude'}, inplace=False)

# df_stops_ida.info()   
df_stops_ida

Unnamed: 0,id,name,navigation,latitude_longitude,radio
0,0,inicio,159,"(-13.5549784, -71.856025)",30
1,36,primer stop,304,"(-13.5548123, -71.8573001)",18
2,37,segundo stop,281,"(-13.5532944, -71.8619442)",13
3,38,Ccollana,283,"(-13.552296, -71.867299)",9
4,39,Puente Huaccoto,283,"(-13.551592, -71.870211)",10
...,...,...,...,...,...
79,114,Segundo,221,"(-13.547528, -71.985248)",10
80,115,Reservorio,225,"(-13.54966, -71.988003)",10
81,116,Cuarto,230,"(-13.550408, -71.98888)",10
82,117,Quinto,203,"(-13.550841, -71.989448)",10


### Crear links

In [128]:
def generateLinks(row):
    row['stop'] = df_linkref.loc[row.name+1, 'id']
    row['linkref'] = str(row['id']) + ':' + str(df_linkref.loc[row.name+1, 'id'])
    return row

df_linkref = df_stops_ida.copy()
df_linkref = df_linkref.head(df_linkref.shape[0]-1).apply(generateLinks, axis=1)
df_linkref = df_linkref.drop(df_linkref.columns[:5], axis=1)
df_linkref = df_linkref.rename(columns={'id':'init_stop', 'stop':'end_stop'}, inplace=False)
df_linkref['end_stop'] = df_linkref['end_stop'].values.astype(str)
df_linkref

Unnamed: 0,end_stop,linkref
0,36,0:36
1,37,36:37
2,38,37:38
3,39,38:39
4,40,39:40
...,...,...
78,114,113:114
79,115,114:115
80,116,115:116
81,117,116:117


### Monitoring

In [218]:
# leer dataset monitoring
data = pd.read_csv('monitoring.csv')

# diseño del df: creacion data_time y latitude_longitude
data['date'] += ' ' + data['time']
data['latitude'] = data.apply(lambda row: (row['latitude'], row['longitude']), axis=1)
data.drop(['time', 'longitude'], inplace=True, axis=1)
data = data.rename(columns={'date':'date_time', 'latitude':'latitude_longitude', 'vehicle_id_id':'vehicle_id'}, inplace=False)
data = data.drop_duplicates(subset=['latitude_longitude']) # (65155, 7) (64702, 7)
data = data.sort_values(by='date_time')

# convertir de object a datatime
data['date_time'] = pd.to_datetime(data['date_time'], format='%Y-%m-%d %H:%M:%S')
data.shape
# data 

(64702, 7)

### Analisis General

In [130]:
# cantidad de datos: laps y vehicles
vehicles = data['vehicle_id'].unique()
print('# laps: ', len(data['lap'].unique()), '->',data['lap'].unique())
print('# vehicles: ', len(vehicles), '->' ,vehicles)

# laps:  7 -> [0 1 3 2 4 5 6]
# vehicles:  37 -> ['SJ03' 'SJ07' 'SJ02' 'SJ04' 'SJ20' 'SJ31' 'SJ27' 'SJ29' 'SJ28' 'SJ10'
 'SJ30' 'SJ37' 'SJ23' 'SJ35' 'SJ42' 'SJ36' 'SJ21' 'SJ38' 'SJ19' 'SJ13'
 'SJ39' 'SJ12' 'SJ34' 'SJ18' 'SJ41' 'SJ08' 'SJ14' 'SJ16' 'SJ09' 'SJ05'
 'SJ11' 'SJ01' 'SJ24' 'SJ40' 'SJ26' 'SJ25' 'SJ06']


In [131]:
# datos por cada vehiculo
veh_rows = {}
for veh in vehicles: 
    veh_rows[veh] = len(data[data['vehicle_id'] == veh])

print('# Max de datos: ', max(veh_rows.values()))
print('# Min de datos: ', min(veh_rows.values()))
veh_rows.items()

# Max de datos:  2607
# Min de datos:  3


dict_items([('SJ03', 1759), ('SJ07', 1862), ('SJ02', 2416), ('SJ04', 2356), ('SJ20', 2376), ('SJ31', 2191), ('SJ27', 2607), ('SJ29', 1391), ('SJ28', 1848), ('SJ10', 2194), ('SJ30', 2203), ('SJ37', 2561), ('SJ23', 1761), ('SJ35', 1994), ('SJ42', 1571), ('SJ36', 1985), ('SJ21', 1027), ('SJ38', 2310), ('SJ19', 2110), ('SJ13', 1884), ('SJ39', 809), ('SJ12', 1756), ('SJ34', 2325), ('SJ18', 2371), ('SJ41', 2245), ('SJ08', 1366), ('SJ14', 2078), ('SJ16', 2156), ('SJ09', 934), ('SJ05', 2324), ('SJ11', 1319), ('SJ01', 1978), ('SJ24', 1346), ('SJ40', 1227), ('SJ26', 3), ('SJ25', 55), ('SJ06', 4)])

### Analizando un vehiculo

In [352]:
# funcion para verificar si el punto paso por un stop
def isStop(row, stops):
    r_nav = 45
    r_err = 25  # meters
    stops = stops[abs(stops['navigation'] - row['navigation']) <= r_nav]
    stops['dis'] = stops['latitude_longitude'].apply(lambda stop: round(hvs(row['latitude_longitude'], stop, unit=Unit.METERS), 2))
    stops = stops[stops['dis'] <= stops['radio'] + r_err]

    # print(stops)
    # print(row)
    # print('-'*50)
    if len(stops) != 0:
        row['stop'] = str(stops.loc[stops['dis'].idxmin(), 'id'])
        row['dis'] = float(stops.loc[stops['dis'].idxmin(), 'dis'])
    else:
        row['stop'] = np.nan
        row['dis'] = 0
        
    return row

# funcion para calcular el tiempo de un punto con su antecesor en seg
def time_travel(row, df):
    if row.name == 0:
        return 0
    else:
        return (df.loc[row.name, 'date_time'] - df.loc[row.name-1, 'date_time']).total_seconds()


In [356]:
# seleccionando vehiculo
veh = vehicles[2]
lap = 1

# filtrando datos del vehiculo
data_veh = data[(data['vehicle_id'] == veh) & (data['lap'] == lap)].reset_index(drop=True)
# print('laps del vehiculo ' + veh + ' : ', data_veh['lap'].unique())
# print(data_veh.shape)
# data_veh.head()

data_veh_stop = data_veh.apply(lambda row: isStop(row, df_stops_ida), axis=1)
data_veh_stop = data_veh_stop[data_veh_stop['stop'].notna()]
data_veh_stop = data_veh_stop.sort_values('dis').drop_duplicates('stop').sort_index().reset_index(drop=True)
# data_veh_stop

data_veh_stop['time_travel'] = data_veh_stop.apply(lambda row: time_travel(row, data_veh_stop), axis=1)
# print(data_veh_stop.shape)
# data_veh_stop.head()

df_out = df_linkref.join(data_veh_stop.set_index('stop'), on='end_stop')

In [357]:
# df_out = df_out.drop(['id', 'velocity', 'navigation', 'vehicle_id'], axis=1)
print('# nan: ' , df_out[df_out['id'].isna()].shape[0])
df_out

# nan:  26


Unnamed: 0,end_stop,linkref,id,date_time,lap,latitude_longitude,velocity,navigation,vehicle_id,dis,time_travel
0,36,0:36,21941715.0,2022-07-14 05:51:47,1.0,"(-13.5547672, -71.8574325)",11.0,300.0,SJ02,15.17,96.0
1,37,36:37,21941857.0,2022-07-14 05:53:21,1.0,"(-13.5533357, -71.861639)",11.0,280.0,SJ02,33.31,94.0
2,38,37:38,21942015.0,2022-07-14 05:54:52,1.0,"(-13.5522926, -71.8673)",8.0,287.0,SJ02,0.39,91.0
3,39,38:39,21942103.0,2022-07-14 05:55:33,1.0,"(-13.5515668, -71.8701948)",23.0,288.0,SJ02,3.30,41.0
4,40,39:40,21942210.0,2022-07-14 05:56:37,1.0,"(-13.5506637, -71.8737872)",24.0,285.0,SJ02,19.62,64.0
...,...,...,...,...,...,...,...,...,...,...,...
78,114,113:114,21951432.0,2022-07-14 06:56:35,1.0,"(-13.5473144, -71.9851209)",30.0,217.0,SJ02,27.44,24.0
79,115,114:115,,NaT,,,,,,,
80,116,115:116,21951864.0,2022-07-14 06:57:35,1.0,"(-13.5506069, -71.9891003)",33.0,228.0,SJ02,32.50,60.0
81,117,116:117,,NaT,,,,,,,


In [96]:
df_out.to_csv(veh + 'sample.csv')