In [96]:
import pandas as pd
from haversine import haversine as hvs, Unit

# Extraer en CSV

In [2]:
# leer dataset monitoring
dt = pd.read_csv('monitoring.csv')

# parametros de extraccion
lap = 4
vehicle =  dt['vehicle_id_id'].unique()[1]

# diseño del df
dt_veh = dt[dt['vehicle_id_id'] == vehicle].copy()
dt_veh['date'] += ' ' + dt_veh['time']
dt_veh.drop('time', axis=1)
dt_veh = dt_veh.rename(columns={'date':'date_time'}, inplace=False)
dt_veh = dt_veh[dt_veh['lap'] == lap]

# ordenar por data time
dt_veh = dt_veh.sort_values(by='date_time').reset_index(drop=True)
dt_veh

Unnamed: 0,id,date_time,time,lap,latitude,longitude,velocity,navigation,vehicle_id_id
0,22066198,2022-07-14 15:32:20,15:32:20,4,-13.555669,-71.856324,4,42,SJ07
1,22067017,2022-07-14 15:36:21,15:36:21,4,-13.555698,-71.856394,2,255,SJ07
2,22067552,2022-07-14 15:38:57,15:38:57,4,-13.555688,-71.856388,3,256,SJ07
3,22068422,2022-07-14 15:43:03,15:43:03,4,-13.555729,-71.856400,4,244,SJ07
4,22068695,2022-07-14 15:44:18,15:44:18,4,-13.555952,-71.856371,9,107,SJ07
...,...,...,...,...,...,...,...,...,...
124,22106906,2022-07-14 18:38:35,18:38:35,4,-13.555744,-71.856360,1,302,SJ07
125,22108301,2022-07-14 18:44:57,18:44:57,4,-13.555705,-71.856299,2,213,SJ07
126,22109886,2022-07-14 18:52:24,18:52:24,4,-13.555736,-71.856328,3,232,SJ07
127,22113663,2022-07-14 19:07:14,19:07:14,4,-13.555708,-71.856296,2,250,SJ07


In [3]:
if 'lap' not in dt_veh.columns:
    dt_veh.to_csv('monitoring_veh/' + str(vehicle) + '.csv')
else:
    dt_veh.to_csv('monitoring_veh_lap/' + str(vehicle) + '-lap' + str(lap) + '.csv')

# Analisis de Datos

### Monitoring

In [98]:
# leer dataset monitoring
data = pd.read_csv('monitoring.csv')

# diseño del df: creacion data_time y latitude_longitude
data['date'] += ' ' + data['time']
data['latitude'] = data.apply(lambda row: (row['latitude'], row['longitude']), axis=1)
data.drop(['time', 'longitude'], inplace=True, axis=1)
data = data.rename(columns={'date':'date_time', 'latitude':'latitude_longitude', 'vehicle_id_id':'vehicle_id'}, inplace=False)

# convertir de object a datatime
data['date_time'] = pd.to_datetime(data['date_time'], format='%Y-%m-%d %H:%M:%S')
# data.info()
data

Unnamed: 0,id,date_time,lap,latitude_longitude,velocity,navigation,vehicle_id
0,21940692,2022-07-14 05:41:33,0,"(-13.5414627, -71.9106878)",64,88,SJ03
1,21948830,2022-07-14 06:43:13,1,"(-13.5316772, -71.9270098)",38,285,SJ07
2,21951171,2022-07-14 06:55:11,1,"(-13.5448934, -71.9819569)",9,168,SJ02
3,21953390,2022-07-14 07:05:16,1,"(-13.5402794, -71.9813266)",15,193,SJ04
4,21954612,2022-07-14 07:10:49,1,"(-13.5378226, -71.9802519)",5,24,SJ20
...,...,...,...,...,...,...,...
65150,21954398,2022-07-14 07:09:35,1,"(-13.5214927, -71.9694905)",27,191,SJ10
65151,21954611,2022-07-14 07:10:46,1,"(-13.5282156, -71.9400002)",27,100,SJ38
65152,21954843,2022-07-14 07:11:58,1,"(-13.5331445, -71.9120059)",26,302,SJ21
65153,21955152,2022-07-14 07:13:31,1,"(-13.5280222, -71.9398906)",11,304,SJ13


### Busstop

In [99]:
# leer dataset busstop
data_stops = pd.read_csv('busstop.csv') # id 117 end

# diseño del df: creacion de latitude_longitude
data_stops['latitude'] = data_stops.apply(lambda row: (row['latitude'], row['longitude']), axis=1)
data_stops.drop(['longitude', 'number'], inplace=True, axis=1)
data_stops = data_stops.rename(columns={'latitude':'latitude_longitude'}, inplace=False)

# data_stops.info()   
data_stops

Unnamed: 0,id,name,latitude_longitude,navigation
0,38,Ccollana,"(-13.552296, -71.867299)",283
1,39,Puente Huaccoto,"(-13.551592, -71.870211)",283
2,40,Kayra,"(-13.55064, -71.873952)",289
3,41,Forestal,"(-13.55002, -71.876104)",286
4,42,Grifo Tapia,"(-13.549282, -71.878649)",284
...,...,...,...,...
146,183,Tapia,"(-13.549241, -71.878975)",107
147,184,Forestal,"(-13.549985, -71.876345)",107
148,185,Kayra,"(-13.550608, -71.874402)",107
149,186,Puente,"(-13.551644, -71.870166)",105


### Creacion de linkref

In [100]:
# def ruta de ida y vuelta
data_stops['dir'] = data_stops.apply(lambda row: 0 if row.name <= 79 else 1, axis=1) # 0 ida, 1 vuelta

# select ida
data_stops_ida = data_stops[data_stops['dir'] == 0].copy()
data_stops_ida

Unnamed: 0,id,name,latitude_longitude,navigation,dir
0,38,Ccollana,"(-13.552296, -71.867299)",283,0
1,39,Puente Huaccoto,"(-13.551592, -71.870211)",283,0
2,40,Kayra,"(-13.55064, -71.873952)",289,0
3,41,Forestal,"(-13.55002, -71.876104)",286,0
4,42,Grifo Tapia,"(-13.549282, -71.878649)",284,0
...,...,...,...,...,...
75,113,Segundo,"(-13.547528, -71.985248)",221,0
76,114,Reservorio,"(-13.54966, -71.988003)",225,0
77,115,Cuarto,"(-13.550408, -71.98888)",230,0
78,116,Quinto,"(-13.550841, -71.989448)",203,0


In [101]:
def generateLinks(row):
    row['stop'] = df_linkref.loc[row.name+1, 'id']
    row['linkref'] = str(row['id']) + ':' + str(df_linkref.loc[row.name+1, 'id'])
    return row
    

df_linkref = data_stops_ida.copy()
df_linkref = df_linkref.head(79).apply(generateLinks, axis=1)
df_linkref = df_linkref.drop(df_linkref.columns[:5], axis=1)
df_linkref

Unnamed: 0,stop,linkref
0,39,38:39
1,40,39:40
2,41,40:41
3,42,41:42
4,43,42:43
...,...,...
74,113,112:113
75,114,113:114
76,115,114:115
77,116,115:116


### Analisis de monitoring

In [103]:
# cantidad de datos: laps y vehicles
vehicles = data['vehicle_id'].unique()
print('# laps: ', len(data['lap'].unique()), '->',data['lap'].unique())
print('# vehicles: ', len(vehicles), '->' ,vehicles)

# laps:  7 -> [0 1 3 2 4 5 6]
# vehicles:  37 -> ['SJ03' 'SJ07' 'SJ02' 'SJ04' 'SJ20' 'SJ31' 'SJ27' 'SJ29' 'SJ28' 'SJ10'
 'SJ30' 'SJ37' 'SJ23' 'SJ35' 'SJ42' 'SJ36' 'SJ21' 'SJ38' 'SJ19' 'SJ13'
 'SJ39' 'SJ12' 'SJ34' 'SJ18' 'SJ41' 'SJ08' 'SJ14' 'SJ16' 'SJ09' 'SJ05'
 'SJ11' 'SJ01' 'SJ24' 'SJ40' 'SJ26' 'SJ25' 'SJ06']


In [104]:
# datos por cada vehiculo
veh_rows = {}
for veh in vehicles: 
    veh_rows[veh] = len(data[data['vehicle_id'] == veh])

print('# Max de datos: ', max(veh_rows.values()))
print('# Min de datos: ', min(veh_rows.values()))
veh_rows.items()

# Max de datos:  2607
# Min de datos:  3


dict_items([('SJ03', 1759), ('SJ07', 1882), ('SJ02', 2437), ('SJ04', 2380), ('SJ20', 2409), ('SJ31', 2191), ('SJ27', 2607), ('SJ29', 1403), ('SJ28', 1864), ('SJ10', 2214), ('SJ30', 2225), ('SJ37', 2589), ('SJ23', 1761), ('SJ35', 1994), ('SJ42', 1589), ('SJ36', 2003), ('SJ21', 1049), ('SJ38', 2340), ('SJ19', 2110), ('SJ13', 1884), ('SJ39', 809), ('SJ12', 1756), ('SJ34', 2359), ('SJ18', 2402), ('SJ41', 2245), ('SJ08', 1366), ('SJ14', 2078), ('SJ16', 2156), ('SJ09', 934), ('SJ05', 2333), ('SJ11', 1319), ('SJ01', 2004), ('SJ24', 1346), ('SJ40', 1296), ('SJ26', 3), ('SJ25', 55), ('SJ06', 4)])

### Analizando un vehiculo

In [105]:
# seleccionando vehiculo
veh = vehicles[1]

# filtrando datos del vehiculo
data_veh = data[data['vehicle_id'] == veh]
data_veh = data_veh.sort_values(by='date_time').reset_index(drop=True)
print('laps del vehiculo ' + veh + ' : ', data_veh['lap'].unique())

laps del vehiculo SJ07 :  [0 1 2 3 4 5]


In [106]:
data_veh = data_veh[(data_veh['lap'] != 0) & (data_veh['lap'] != 5)].reset_index(drop=True)
print('laps del vehiculo ' + veh + ' : ', data_veh['lap'].unique())

laps del vehiculo SJ07 :  [1 2 3 4]


In [11]:
# USO DE HAVERSINE
# lyon = (-13.5476402, -71.8805191) # (lat, lon)
# stop = data_stops.loc[0, 'latitude_longitude']

# print(stop)
# hvs(lyon, stop)

In [107]:
data_veh

Unnamed: 0,id,date_time,lap,latitude_longitude,velocity,navigation,vehicle_id
0,21944991,2022-07-14 06:20:19,1,"(-13.5546226, -71.8563921)",7,346,SJ07
1,21945043,2022-07-14 06:20:43,1,"(-13.5549784, -71.8559785)",15,158,SJ07
2,21945096,2022-07-14 06:21:07,1,"(-13.555744, -71.8559083)",12,263,SJ07
3,21945141,2022-07-14 06:21:24,1,"(-13.5553553, -71.8566123)",26,303,SJ07
4,21945179,2022-07-14 06:21:36,1,"(-13.5547807, -71.8574136)",35,305,SJ07
...,...,...,...,...,...,...,...
1494,22106906,2022-07-14 18:38:35,4,"(-13.5557443, -71.8563604)",1,302,SJ07
1495,22108301,2022-07-14 18:44:57,4,"(-13.5557048, -71.8562991)",2,213,SJ07
1496,22109886,2022-07-14 18:52:24,4,"(-13.5557357, -71.8563283)",3,232,SJ07
1497,22113663,2022-07-14 19:07:14,4,"(-13.5557079, -71.8562957)",2,250,SJ07


In [108]:
def isStop(row):
    r_nav = 10
    r_dis = 25 # meters
    stops = data_stops_ida[abs(data_stops_ida['navigation'] - row['navigation']) <= r_nav]
    stops['dis'] = stops['latitude_longitude'].apply(lambda stop: round(hvs(row['latitude_longitude'], stop, unit=Unit.METERS), 2))
    stops = stops[stops['dis'] <= r_dis]

    if len(stops) != 0:
        # print(stops.loc[stops['dis'].idxmin(), ['id', 'navigation', 'latitude_longitude', 'dis']])
        # print(stops.loc[stops['dis'].idxmin(), 'id'])
        return int(stops.loc[stops['dis'].idxmin(), 'id'])


In [109]:
data_veh['stop'] = data_veh.apply(isStop, axis=1)
data_out = data_veh[data_veh['stop'].notna()].reset_index(drop=True)

In [111]:
data_out = data_out[data_out['lap'] == 1]
data_out.shape

(31, 8)

In [112]:
# funcion para calcular el tiempo de un punto con su antecesor en seg
def time_travel(row):
    if row.name == 0:
        return 0
    else:
        return (data_out.loc[row.name, 'date_time'] - data_out.loc[row.name-1, 'date_time']).total_seconds()

In [115]:
data_out['time_travel'] = data_out.apply(time_travel, axis=1)
data_out.head()

Unnamed: 0,id,date_time,lap,latitude_longitude,velocity,navigation,vehicle_id,stop,time_travel
0,21945647,2022-07-14 06:24:31,1,"(-13.5522946, -71.8673761)",15,278,SJ07,38.0,0.0
1,21945772,2022-07-14 06:25:11,1,"(-13.5515776, -71.8702452)",17,286,SJ07,39.0,40.0
2,21946037,2022-07-14 06:26:51,1,"(-13.5499653, -71.8763163)",18,284,SJ07,41.0,100.0
3,21946142,2022-07-14 06:27:33,1,"(-13.5492052, -71.8787996)",28,289,SJ07,42.0,42.0
4,21946414,2022-07-14 06:29:08,1,"(-13.547809, -71.8839344)",38,289,SJ07,46.0,95.0


In [120]:
df_out = df_linkref.join(data_out.set_index('stop'), on='stop')
df_out = df_out.drop(['id', 'velocity', 'navigation', 'vehicle_id', 'latitude_longitude', 'lap', 'stop'], axis=1)
df_out

Unnamed: 0,linkref,date_time,time_travel
0,38:39,2022-07-14 06:25:11,40.0
1,39:40,NaT,
2,40:41,2022-07-14 06:26:51,100.0
3,41:42,2022-07-14 06:27:33,42.0
4,42:43,NaT,
...,...,...,...
74,112:113,NaT,
75,113:114,NaT,
76,114:115,2022-07-14 07:22:32,72.0
77,115:116,NaT,


In [16]:
data_out.to_csv(veh + 'ida_stop.csv')