## Imports

In [21]:
import pandas as pd
import sys, os, importlib
from datetime import datetime
import numpy as np

In [22]:
pd.set_option('display.max_rows', 100)

if os.path.join('..','0_funcoes_base') not in sys.path:
    sys.path.append(os.path.join('..','0_funcoes_base')) 

df_manipulator = importlib.import_module('df_manipulator')
dt_manipulator = importlib.import_module('date_manipulator')
plot_manipulator = importlib.import_module('plot_manipulator')
file_manipulator = importlib.import_module('file_manipulator')



## Dados de configuração

In [23]:
input_config = {
    'file': {
        'ref_dir':'./out/',
        'filename':'3_export_lunar_20200517.csv',
        'delimiter': ';'
    }
}

output_config = {
    'file': {
        'ref_dir': './out',
        'delimiter':';',
        'with_header': True,
        'prefix':'4_'
    }
}

## Funções

In [24]:
def map_region_into_df(df, speed_bump_id, region_id, relative_start_region, relative_end_region, verbose=False):
    _df = df.copy()

    # Get indexes with speedbump
    sb_indexes_by_id  = df_manipulator.get_speed_bumps_idx(_df, speed_bump_id=speed_bump_id)
    speed_bumps       = sum(list(sb_indexes_by_id.values()), [])
    speed_bumps       = [sb.to_pydatetime() for sb in speed_bumps]

    available_min_idx = min(_df.index)
    available_max_idx = max(_df.index)

    region_by_speed_bump_timestamp = []

    for sb_id in sb_indexes_by_id:
        for sb_idx in sb_indexes_by_id[sb_id]:
            if verbose:
                print(f'Region {region_id}: [{sb_id}] {sb_idx} -> from {sb_idx-relative_start_region} to {sb_idx+relative_end_region}')

            region_by_speed_bump_timestamp.extend([dt_manipulator.get_date_window(start_date=sb_idx-relative_start_region, end_date=sb_idx+relative_end_region, bound=(available_min_idx, available_max_idx))])

    if len(region_by_speed_bump_timestamp) < 1:
        return _df

    #print(region_by_speed_bump_timestamp)
    #return
    # Rules
    ## Region 1: without speedbump
    ## Region 2: just one speedbump

    final_region_timestamps = []
    for region_timestamp in region_by_speed_bump_timestamp:
        
        # Apply region rules
        if region_id == 1:
            if len((set(speed_bumps) & set(region_timestamp))) > 0:
                #print(f'Failed 1: {len((set(speed_bumps) & set(region_timestamp)))}')
                continue
        # elif region_id == 2:
        #     if len((set(speed_bumps) & set(region_timestamp))) > 1:
        #         #print(f'set(speed_bumps): {set(speed_bumps)}')
        #         #print(f'\n\nset(region_timestamp): {set(region_timestamp)}')
        #         print(f'\n\n/\: {set(speed_bumps) & set(region_timestamp)}')
        #         print(f'\n\nFailed 2: {len((set(speed_bumps) & set(region_timestamp)))}')
        #         continue

        final_region_timestamps.extend(region_timestamp)

    final_region_timestamps = list(set(final_region_timestamps))

    _df['region_id'][_df.index.isin(final_region_timestamps)] = region_id

    return _df

## Carrega dataframe

In [25]:
df = df_manipulator.load_dataframe(input_config['file']['filename'], input_config['file']['ref_dir'], input_config['file']['delimiter'])
df.timestamp = pd.to_datetime(df.timestamp)
df_manipulator.set_index(df, 'timestamp', True)

In [26]:
df

Unnamed: 0_level_0,speed_bump_id,y,z,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-17 20:10:48.400,0,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.500,0,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.600,0,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.700,0,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.800,0,9.746048,1.806213,-22.921991,-42.473372
...,...,...,...,...,...
2020-05-17 20:35:51.100,0,9.250443,1.770294,-22.925816,-42.484631
2020-05-17 20:35:51.200,0,10.105179,1.660172,-22.925816,-42.484631
2020-05-17 20:35:51.300,0,9.920822,2.608276,-22.925816,-42.484631
2020-05-17 20:35:51.400,0,9.523392,2.505325,-22.925816,-42.484631


## Adiciona regiões nos registros

Seja *sb(t)* o instante que o veículo passou pelo quebra-mola.

Serão classificadas **dois tipos** de regiões:

- Região 1
    - Momentos da direção aonde não consta quebra-molas próximos;
    - Intervalo:  \[sb(t-inf), sb(t-5)\] V \[sb(t+3), sb(t+inf)\]
- Região 2
    - Momento que precedem quebra-mola;
    - Inclui a ação do motorista frear para quebra-mola, passar pelo mesmo e acelerar;
    - Intervalo:  \[sb(t-5), sb(t+3)\]


**Obs:** sb(t-x), aonde x possui unidade em segundo.

In [27]:
df_out = df.copy()
df_out['region_id'] = 1

df_out = map_region_into_df(df_out, speed_bump_id=1, region_id=2, relative_start_region=pd.Timedelta(5, 's'), relative_end_region=pd.Timedelta(3, 's'), verbose=False)
#df_out = map_region_into_df(df_out, region_id=3, relative_start_region=-pd.Timedelta(1, 's'), relative_end_region=pd.Timedelta(20, 's'), verbose=True)

In [28]:
df_out.region_id.value_counts()

1    11103
2     3020
Name: region_id, dtype: int64

## Adiciona média móvel de Z, variância

## Salva o dataframe de saída

In [29]:
df_out['z-mean_10'] = df_out['z'].rolling(10).mean()
df_out['z-std_10'] = df_out['z'].rolling(10).std()
df_out['z-norm'] = (df_out['z'] - df_out['z-mean_10'])/df_out['z-std_10']
df_out['z-diff_5'] = df_out['z'].diff(periods=5)
df_out['z-diff_10'] = df_out['z'].diff(periods=10)


df_out.dropna(how='any', inplace=True)

In [31]:
df_out

Unnamed: 0_level_0,speed_bump_id,y,z,lat,lng,region_id,z-mean_10,z-std_10,z-diff_5,z-diff_10,z-norm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-05-17 20:10:49.400,0,9.188202,1.528488,-22.921991,-42.473372,1,1.335992,0.416962,-0.320816,-0.045486,0.461662
2020-05-17 20:10:49.500,0,9.188202,1.528488,-22.921991,-42.473372,1,1.331444,0.414317,1.005569,-0.045486,0.475588
2020-05-17 20:10:49.600,0,9.365372,2.237167,-22.921991,-42.473372,1,1.426015,0.502685,1.448486,0.945709,1.613640
2020-05-17 20:10:49.700,0,9.365372,2.237167,-22.921991,-42.473372,1,1.520586,0.560224,0.986404,0.945709,1.279099
2020-05-17 20:10:49.800,0,9.743652,1.281876,-22.921991,-42.473372,1,1.468152,0.555034,-0.174789,-0.524338,-0.335612
...,...,...,...,...,...,...,...,...,...,...,...
2020-05-17 20:35:51.100,0,9.250443,1.770294,-22.925816,-42.484631,1,2.505797,1.961719,1.826767,-0.660797,-0.374928
2020-05-17 20:35:51.200,0,10.105179,1.660172,-22.925816,-42.484631,1,2.415536,1.979490,-4.141968,-0.902603,-0.381596
2020-05-17 20:35:51.300,0,9.920822,2.608276,-22.925816,-42.484631,1,2.420087,1.979918,-3.193863,0.045502,0.095049
2020-05-17 20:35:51.400,0,9.523392,2.505325,-22.925816,-42.484631,1,2.375316,1.971573,1.259354,-0.447708,0.065942


In [32]:
out_filename = file_manipulator.get_out_filename(output_config['file']['prefix'], output_config['file']['ref_dir'], input_config['file']['filename'])

df_out.to_csv(out_filename, sep=output_config['file']['delimiter'], header=output_config['file']['with_header'])

print(f'O arquivo {out_filename} foi gerado!')

O arquivo ./out/4_export_lunar_20200517.csv foi gerado!
