## Imports

In [25]:
import pandas as pd
import sys, os, importlib
from datetime import datetime

In [26]:
pd.set_option('display.max_rows', 100)

if os.path.join('..','0_funcoes_base') not in sys.path:
    sys.path.append(os.path.join('..','0_funcoes_base')) 

df_manipulator = importlib.import_module('df_manipulator')
file_manipulator = importlib.import_module('file_manipulator')

## Dados de configuração

In [27]:
input_config = {
    'file': {
        'ref_dir':'./out/',
        'filename':'2_export_lunar_20200517.csv',
        'delimiter': ';'
    }
}

output_config = {
    'file': {
        'ref_dir': './out',
        'delimiter':';',
        'with_header': True,
        'prefix':'3_'
    },
    'remove_columns':['id','x']
}

## Funções

In [28]:
def drop_unnecessary_columns(df, columns):
    _df = df.copy()
    if not columns or len(columns) < 1:
        return _df

    _df = _df.drop(columns=columns, axis=1)
    return _df

def drop_unnecessary_speed_bumps(df, speed_bump_id):
    _df = df.copy()
    sb_indexes = df_manipulator.get_speed_bumps_idx(_df, speed_bump_id=speed_bump_id)

    timestamps_to_delete = df_manipulator.get_affected_timestamps_by_speed_bumps(_df, sb_indexes, mapping_window_width=(5000, 5000), mapping_window_freq='milliseconds')

    indexes_to_delete = []
    for v in timestamps_to_delete.values():
        for v2 in v.values():
            indexes_to_delete.extend(list(v2) + list(v.keys()))

    _df = _df[~_df.index.isin(indexes_to_delete)]
    return _df

## Carrega dataframe

In [29]:
df = df_manipulator.load_dataframe(input_config['file']['filename'], input_config['file']['ref_dir'], input_config['file']['delimiter'])
df.timestamp = pd.to_datetime(df.timestamp)
df_manipulator.set_index(df, 'timestamp', True)

In [30]:
df

Unnamed: 0_level_0,id,speed_bump_id,x,y,z,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-05-17 20:10:48.400,d198e52b-bd5b-424a-8ea5-d391fbdb7b20,0,-3.399200,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.500,d198e52b-bd5b-424a-8ea5-d391fbdb7b20,0,-3.399200,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.600,a9df8305-e4ed-4530-ab78-e1aa3ca9ffbd,0,-2.987396,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.700,a9df8305-e4ed-4530-ab78-e1aa3ca9ffbd,0,-2.987396,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.800,74dedc33-8d9b-4aa6-a352-df1ee9364ed7,0,-4.376038,9.746048,1.806213,-22.921991,-42.473372
...,...,...,...,...,...,...,...
2020-05-17 20:35:51.100,72df7adf-cb5f-4f91-b76d-f08fa6da21ed,0,-0.700928,9.250443,1.770294,-22.925816,-42.484631
2020-05-17 20:35:51.200,0f15d4aa-c4c3-40ba-9d1f-1746e938a5eb,0,0.386032,10.105179,1.660172,-22.925816,-42.484631
2020-05-17 20:35:51.300,c044a484-bb8b-421b-8cc8-a6ad4713e28f,0,-1.378494,9.920822,2.608276,-22.925816,-42.484631
2020-05-17 20:35:51.400,c9d94f9a-6211-4198-b39b-70db0ef2e890,0,-1.215683,9.523392,2.505325,-22.925816,-42.484631


In [31]:
df.speed_bump_id.value_counts()

0    14978
1       45
2        9
Name: speed_bump_id, dtype: int64

## Remove colunas desnecessárias

In [32]:
df_out = drop_unnecessary_columns(df, columns=output_config['remove_columns'])

In [33]:
df_out

Unnamed: 0_level_0,speed_bump_id,y,z,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-17 20:10:48.400,0,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.500,0,9.516205,1.573975,-22.921991,-42.473372
2020-05-17 20:10:48.600,0,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.700,0,10.203339,1.291458,-22.921991,-42.473372
2020-05-17 20:10:48.800,0,9.746048,1.806213,-22.921991,-42.473372
...,...,...,...,...,...
2020-05-17 20:35:51.100,0,9.250443,1.770294,-22.925816,-42.484631
2020-05-17 20:35:51.200,0,10.105179,1.660172,-22.925816,-42.484631
2020-05-17 20:35:51.300,0,9.920822,2.608276,-22.925816,-42.484631
2020-05-17 20:35:51.400,0,9.523392,2.505325,-22.925816,-42.484631


## Remove speed_bumps de id 2 ("passarela alta")

In [34]:
df_out = drop_unnecessary_speed_bumps(df_out, speed_bump_id=2)

In [35]:
df_out.speed_bump_id.value_counts()

0    14078
1       45
Name: speed_bump_id, dtype: int64

## Salva o dataframe de saída

In [36]:
out_filename = file_manipulator.get_out_filename(output_config['file']['prefix'], output_config['file']['ref_dir'], input_config['file']['filename'])

df_out.to_csv(out_filename, sep=output_config['file']['delimiter'], header=output_config['file']['with_header'])

print(f'O arquivo {out_filename} foi gerado!')

O arquivo ./out/3_export_lunar_20200517.csv foi gerado!
