## Imports

In [47]:
import pandas as pd
import sys, os, importlib
from datetime import datetime
import numpy as np

In [48]:
pd.set_option('display.max_rows', 100)

if os.path.join('..','0_funcoes_base') not in sys.path:
    sys.path.append(os.path.join('..','0_funcoes_base')) 

df_manipulator = importlib.import_module('df_manipulator')
dt_manipulator = importlib.import_module('date_manipulator')
plot_manipulator = importlib.import_module('plot_manipulator')
file_manipulator = importlib.import_module('file_manipulator')



## Dados de configuração

In [49]:
input_config = {
    'file': {
        'ref_dir':'./out/',
        'filename':'3_export_lunar_20200517.csv',
        'delimiter': ';'
    }
}

output_config = {
    'file': {
        'ref_dir': './out',
        'delimiter':';',
        'with_header': True,
        'prefix':'4_'
    }
}

## Funções

In [50]:
def map_region_into_df(df, speed_bump_id, region_id, relative_start_region, relative_end_region, verbose=False):
    _df = df.copy()

    # Get indexes with speedbump
    sb_indexes_by_id  = df_manipulator.get_speed_bumps_idx(_df, speed_bump_id=speed_bump_id)
    speed_bumps       = sum(list(sb_indexes_by_id.values()), [])
    speed_bumps       = [sb.to_pydatetime() for sb in speed_bumps]

    available_min_idx = min(_df.index)
    available_max_idx = max(_df.index)

    region_by_speed_bump_timestamp = []

    for sb_id in sb_indexes_by_id:
        for sb_idx in sb_indexes_by_id[sb_id]:
            if verbose:
                print(f'Region {region_id}: [{sb_id}] {sb_idx} -> from {sb_idx-relative_start_region} to {sb_idx+relative_end_region}')

            region_by_speed_bump_timestamp.extend([dt_manipulator.get_date_window(start_date=sb_idx-relative_start_region, end_date=sb_idx+relative_end_region, bound=(available_min_idx, available_max_idx))])

    if len(region_by_speed_bump_timestamp) < 1:
        return _df

    #print(region_by_speed_bump_timestamp)
    #return
    # Rules
    ## Region 0: without speedbump
    ## Region 1: just one speedbump

    final_region_timestamps = []
    for region_timestamp in region_by_speed_bump_timestamp:
        
        # Apply region rules
        if region_id == 0:
            if len((set(speed_bumps) & set(region_timestamp))) > 0:
                #print(f'Failed 1: {len((set(speed_bumps) & set(region_timestamp)))}')
                continue
        # elif region_id == 2:
        #     if len((set(speed_bumps) & set(region_timestamp))) > 1:
        #         #print(f'set(speed_bumps): {set(speed_bumps)}')
        #         #print(f'\n\nset(region_timestamp): {set(region_timestamp)}')
        #         print(f'\n\n/\: {set(speed_bumps) & set(region_timestamp)}')
        #         print(f'\n\nFailed 2: {len((set(speed_bumps) & set(region_timestamp)))}')
        #         continue

        final_region_timestamps.extend(region_timestamp)

    final_region_timestamps = list(set(final_region_timestamps))

    _df['region_id'][_df.index.isin(final_region_timestamps)] = region_id

    return _df

## Carrega dataframe

In [51]:
df = df_manipulator.load_dataframe(input_config['file']['filename'], input_config['file']['ref_dir'], input_config['file']['delimiter'])
df.timestamp = pd.to_datetime(df.timestamp)
df_manipulator.set_index(df, 'timestamp', True)

In [52]:
df

Unnamed: 0_level_0,speed_bump_id,y,z,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-17 20:10:48.400,0,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.500,0,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.600,0,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.700,0,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.800,0,9.746048,1.806213,-22.921991,-42.473372
...,...,...,...,...,...
2020-05-17 20:35:51.100,0,9.250443,1.770294,-22.925816,-42.484631
2020-05-17 20:35:51.200,0,10.105179,1.660172,-22.925816,-42.484631
2020-05-17 20:35:51.300,0,9.920822,2.608276,-22.925816,-42.484631
2020-05-17 20:35:51.400,0,9.523392,2.505325,-22.925816,-42.484631


## Adiciona regiões nos registros

Seja *sb(t)* o instante que o veículo passou pelo quebra-mola.

Serão classificadas **dois tipos** de regiões:

- Região 1
    - Momentos da direção aonde não consta quebra-molas próximos;
    - Intervalo:  \[sb(t-inf), sb(t-5)\] V \[sb(t+3), sb(t+inf)\]
- Região 2
    - Momento que precedem quebra-mola;
    - Inclui a ação do motorista frear para quebra-mola, passar pelo mesmo e acelerar;
    - Intervalo:  \[sb(t-5), sb(t+3)\]


**Obs:** sb(t-x), aonde x possui unidade em segundo.

In [53]:
df_out = df.copy()
df_out['region_id'] = 0

df_out = map_region_into_df(df_out, speed_bump_id=1, region_id=1, relative_start_region=pd.Timedelta(5, 's'), relative_end_region=pd.Timedelta(2, 's'), verbose=False)

In [54]:
df_out.region_id.value_counts()

0    11443
1     2680
Name: region_id, dtype: int64

## Adiciona normalização, média móvel, variância e diff do eixo Z

## Salva o dataframe de saída

In [55]:
df_out['z-norm'] = (df_out['z'] - df_out['z'].mean())/df_out['z'].std()
df_out['z-diff_10'] = df_out['z-norm'].diff(periods=10)
df_out['z-mean_10'] = df_out['z-norm'].rolling(10).mean()
df_out['z-std_10'] = df_out['z-norm'].rolling(10).std()
df_out['z-diff_20'] = df_out['z-norm'].diff(periods=20)
df_out['z-mean_20'] = df_out['z-norm'].rolling(20).mean()
df_out['z-std_20'] = df_out['z-norm'].rolling(20).std()


df_out.dropna(how='any', inplace=True)

In [56]:
df_out

Unnamed: 0_level_0,speed_bump_id,y,z,lat,lng,region_id,z-norm,z-diff_10,z-mean_10,z-std_10,z-diff_20,z-mean_20,z-std_20
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-05-17 20:10:50.400,0,3.212265,15.888901,-22.921991,-42.473372,0,6.462035,6.814763,0.006528,2.595076,6.793177,-0.218775,1.806092
2020-05-17 20:10:50.500,0,7.222565,-8.127335,-22.921991,-42.473372,0,-4.934918,-4.582190,-0.451691,3.033131,-4.603776,-0.448964,2.091927
2020-05-17 20:10:50.600,0,6.200241,-0.556870,-22.921991,-42.473372,0,-1.342339,-1.325916,-0.584283,3.040960,-0.877128,-0.492820,2.101458
2020-05-17 20:10:50.700,0,6.887375,-2.287872,-22.921991,-42.473372,0,-2.163790,-2.147367,-0.799020,3.072064,-1.698579,-0.577749,2.134350
2020-05-17 20:10:50.800,0,6.887375,-2.287872,-22.921991,-42.473372,0,-2.163790,-1.694031,-0.968423,3.098484,-1.942857,-0.674892,2.161298
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-17 20:35:51.100,0,9.250443,1.770294,-22.925816,-42.484631,0,-0.237979,-0.313583,0.111056,0.930938,-0.160202,0.145595,0.644656
2020-05-17 20:35:51.200,0,10.105179,1.660172,-22.925816,-42.484631,0,-0.290238,-0.428332,0.068222,0.939371,-0.553313,0.117929,0.651188
2020-05-17 20:35:51.300,0,9.920822,2.608276,-22.925816,-42.484631,0,0.159688,0.021593,0.070382,0.939574,-0.068168,0.114521,0.650761
2020-05-17 20:35:51.400,0,9.523392,2.505325,-22.925816,-42.484631,0,0.110832,-0.212461,0.049136,0.935614,-0.193149,0.104863,0.649233


In [57]:
out_filename = file_manipulator.get_out_filename(output_config['file']['prefix'], output_config['file']['ref_dir'], input_config['file']['filename'])

df_out.to_csv(out_filename, sep=output_config['file']['delimiter'], header=output_config['file']['with_header'])

print(f'O arquivo {out_filename} foi gerado!')

O arquivo ./out/4_export_lunar_20200517.csv foi gerado!
