In [1]:
import pandas as pd
pd.set_option('display.max_columns', 200)
import numpy as np
import folium

import matplotlib.pyplot as plt
plt.style.use('seaborn')
%matplotlib inline
import haversine as hs

import warnings
import os, os.path
import itertools
from tqdm import tqdm
warnings.filterwarnings("ignore")

# Вывод количества водителей в папке
#len([name for name in os.listdir('./drivers_with_gps_and_sl_problems/')])

In [168]:
orders_df = pd.read_csv('rides_of_drivers.csv', index_col=0)
time_cols = ['ORDER_DTTM', 'DRIVER_ASSIGNED_DTTM', 'READY_FOR_COLL_DTTM', 'CLIENT_COLL_DTTM', 'ORDER_FINISHED_DTTM']
for col in time_cols:
    orders_df[col] = pd.to_datetime(orders_df[col])

# Сортировка данных по времени
def sort_by_time(df):
    df['time'] = pd.to_datetime(df.time)
    df = df.sort_values('time').reset_index(drop=True)
    return df


# Функция подсчета коэффициентов для подсчета скорсти
def doubled_koeffs(n, k=1.2): # k - во сколько раз следующий коэфф больше предыдущего
    f_1 = (k - 1) / (k ** n - 1)
    return [f_1 * (k ** i) for i in range(n)]


def make_time_statistics(df):
    df['hour'] = df.time.apply(lambda x: x.hour)
    df['minutes'] = df.time.apply(lambda x: x.minute)
    df['seconds'] = df.time.apply(lambda x: x.second)
    df['day'] = df.time.apply(lambda x: x.date())
    df['clock'] = df.time.apply(lambda x: x.time())
    return df


# Считаю, сколько минут подряд остается одна и та же секунда в данных (для данных акселерометра)
# Сейчас не используется, но решил не удалять
def check_for_the_same_second(df):
    res = []
    prev_sec, prev_min = -1, -1
    k = 0
    for i, row in df[['seconds', 'minutes']].iterrows():
        if row.seconds == prev_sec:
            if row.minutes != prev_min:
                k += 1
        else:
            res.append(k)
            k = 1
        prev_sec = row.seconds
        prev_min = row.minutes
    return res[1:]


# Подсчет скорости по n_points + 1 последовательным интервалам
def count_velocity(df, n_points=0):
    df['time'] = df.time.apply(lambda x: x.replace(microsecond=0))
    df = df.groupby('time').mean().reset_index()
    df[['prev_lat', 'prev_lon', 'time_1']] = df[['lat', 'lon', 'time']].shift(periods=1).fillna(method='bfill')
    df['distance_1'] = df[['lat', 'lon', 'prev_lat', 'prev_lon']].apply(lambda x: hs.haversine((x.lat, x.lon), (x.prev_lat, x.prev_lon)), axis=1)
    df['timedelta_1'] = df[['time', 'time_1']].apply(lambda x: (x.time - x.time_1).total_seconds()/60/60, axis=1)
    for i in range(2, n_points + 2):
        df[[f'distance_{i}', f'time_{i}']] = df[['distance_1', 'time_1']].shift(periods=i-1).fillna(method='bfill')
        df[f'distance_{i}'] = df[f'distance_{i}'] + df[f'distance_{i-1}']
        df[f'timedelta_{i}'] = df[['time', f'time_{i}']].apply(lambda x: (x.time - x[f'time_{i}']).total_seconds()/60/60, axis=1)
        
    def count_v(x):
        res = 0
        koefs = doubled_koeffs(n_points + 1)
        for i in range(1, n_points + 2):
            if x[f'timedelta_{i}'] == 0:
                if x[f'distance_{i}'] > 0.3:
                    res += np.inf
                    break
                else:
                    res += 0
            else:
                res += koefs[i-1] * x[f'distance_{i}'] / x[f'timedelta_{i}']
        return res
        
    #df['velocity'] = df[['distance', 'timedelta']].apply(lambda x: x.distance / x.timedelta if x.distance > 0.1 else 0., axis=1)
    df['velocity'] = df.apply(count_v, axis=1)
    return df.fillna(0.)#.iloc[n_points + 1::n_points+1, :].reset_index(drop=True)
    

# Читаем данные сразу с их преобразованием 
# скорость для акселерометра не вижу смысла считать, потому что куча точек внутри одной секунды, а потом большие разрывы по минуте
def read_data(path):
    hash_ = int(path.split('/')[-2])
    track = pd.read_csv(path + 'track.csv')
    accelerometer = pd.read_csv(path + 'accelerometer.csv')
    track['time'] = track.gps_time
    track = sort_by_time(track)
    #accelerometer = sort_by_time(accelerometer)
    
    track = count_velocity(track, n_points=1)
    #accelerometer = count_velocity(accelerometer, n_points=0)
    
    track = make_time_statistics(track)
    #accelerometer = make_time_statistics(accelerometer)
    
    a = orders_df[orders_df.driver_hash == hash_]
    a = a.sort_values('ORDER_DTTM').reset_index(drop=True)

    return track, accelerometer, a


# Отрисовка путей на карте (крысным - данные с треккера, зеленые - с акселерометра)
def plot_tracks(driver_data):
    m = folium.Map(location=[55.7504461, 37.6174943],
              zoom_start=10)

    track_data = driver_data[0][['lat', 'lon']].values
    #accelerometer_data = driver_data[1][['lat', 'lon']].values
#     start_order_data = driver_data[2][['A_LAT', 'A_LON']]
#     finish_order_data = driver_data[2][['B_LAT', 'B_LON']]
    
    #df = pd.concat([driver_data[0], driver_data[1]], axis=0)
    slow_df = driver_data[0][driver_data[0].velocity < 10]
    good_df = driver_data[0][(driver_data[0].velocity >= 10) & (driver_data[0].velocity < 350)]
    medium_df = driver_data[0][(driver_data[0].velocity >= 350) & (driver_data[0].velocity < 500)]
    bad_df = driver_data[0][(driver_data[0].velocity >= 500) & (driver_data[0].velocity < 1000)]
    verybad_df = driver_data[0][driver_data[0].velocity >= 1000]
    
    time_thold = 60 * 20
    dist_thold = 1.5
    for df, color in [(slow_df, 'green'), (good_df, 'darkgreen'), (medium_df, 'orange'), (bad_df, 'red'), (verybad_df, 'darkred')]:
        components = []
        df = df.reset_index(drop=True)
        for idx, row in df.iterrows():
            if idx == 0:
                prev_time = row.time
                prev_pos = (row.lat, row.lon)
                prev_idx = idx
                continue
                
            cur_time = row.time
            cur_pos = (row.lat, row.lon)
            timedelta = (cur_time - prev_time).total_seconds()
            dist = hs.haversine(cur_pos, prev_pos)
            if color in ['orange', 'red', 'darkred']:
                if timedelta > time_thold:
                    if idx - 1 != prev_idx:
                        components.append(df.loc[prev_idx:idx-1, :])
                    prev_idx = idx
            else:
                if timedelta > time_thold or dist > dist_thold:
                    if (idx - 1) - prev_idx > 5:
                            components.append(df.loc[prev_idx:idx-1, :])
                    prev_idx = idx
            prev_time = row.time
            prev_pos = (row.lat, row.lon)
        
        for comp in components:
            try:
                start = comp.iloc[0, :]
                end = comp.iloc[-1, :]
                folium.PolyLine(comp[['lat', 'lon']].values,
                                color=color,
                                popup=f'V = {round(comp.velocity.mean(), 1)} km/h',
                                weight=3,
                                opacity=1).add_to(m) # track_data
                if color in ['orange', 'red', 'darkred']:
                    folium.Marker(
                            location=[start.lat, start.lon],
                            popup=f'Start: {start.time.ctime()}',
                            icon=folium.Icon(color=color)).add_to(m)
                    folium.Marker(
                            location=[end.lat, end.lon],
                            popup=f'End: {end.time.ctime()}',
                            icon=folium.Icon(color=color)).add_to(m)
            except:
                pass
    
    COLORS = ['pink', 'darkgreen', 'orange', 'lightred', 'darkpurple', 'red', 'blue', 'gray', 'lightgreen', 
              'lightblue', 'purple', 'black', 'darkred', 'cadetblue', 'beige', 'green', 'darkblue', 'lightgray', 'white']
    
    # Отрисовка каждый 10ой точки на путях (если рисовать каждую работает очень долго)
#     for i, (x, y, time_) in enumerate(driver_data[0][['lat', 'lon', 'time']].values[::2]):
#         folium.CircleMarker(
#             location=[x, y],
#             popup=f'{time_.ctime()}',
#             color='blue',
#             fill=False,
#             radius=3).add_to(m)

    return m

## Фродеры

In [160]:
folder = './drivers_with_gps_and_sl_problems/'
f_drivers = []
TO_PLOT = True

for driver in tqdm(os.listdir(folder)):
    data = read_data(folder + driver + '/')
    f_drivers.append(data)
    if TO_PLOT:
        plot_tracks(data).save(folder + driver + '/' + 'plot.html')

100%|██████████| 14/14 [00:54<00:00,  3.92s/it]


## Не фродеры

In [161]:
folder = './data_good_right_dates/'
good_drivers = []
TO_PLOT = True


for driver in tqdm(os.listdir(folder)):
    data = read_data(folder + driver + '/')
    good_drivers.append(data)
    if TO_PLOT:
        plot_tracks(data).save(folder + driver + '/' + 'plot.html')

100%|██████████| 23/23 [07:11<00:00, 18.76s/it]


In [191]:
folder = './data/'
drivers = []
TO_PLOT = True


for driver in tqdm([file for file in os.listdir(folder) if file != '.DS_Store']):
    isExist = os.path.exists(folder + driver + '/' + 'plot.html')
    if isExist:
        continue
    data = read_data(folder + driver + '/')
    drivers.append(data)
    if TO_PLOT:
        plot_tracks(data).save(folder + driver + '/' + 'plot.html')

100%|██████████| 280/280 [30:01<00:00,  6.44s/it]   


In [19]:
# import webbrowser
# i = 0 # max - 13

# k = [0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280]
# folder = './drivers_with_gps_and_sl_problems/'
# #folder = './data/'
# for driver in tqdm(os.listdir(folder)): #[k[i]: k[i+1]]):
#     webbrowser.open(folder + driver + '/' + 'plot.html')

100%|██████████| 20/20 [00:08<00:00,  2.30it/s]
