## Imports

In [259]:
import pandas as pd
import sys, os, importlib
from datetime import datetime

In [260]:
pd.set_option('display.max_rows', 100)

if os.path.join('..','0_funcoes_base') not in sys.path:
    sys.path.append(os.path.join('..','0_funcoes_base')) 

df_manipulator = importlib.import_module('df_manipulator')
date_manipulator = importlib.import_module('date_manipulator')


## Dados de configuração

In [261]:
input_config = {
    'ref_dir':'./out/',
    'filename':'1_export_lunar_20200517.csv',
    'delimiter': ';'
}

out_config = {
    'ref_dir': './out',
    'delimiter':';',
    'with_header': True
}

## Funções

In [262]:
def round_timestamp(df, col, expression):
    df_out = df.copy()
    df_out[col] = df_out[col].dt.round(expression)
    return df_out

def remove_duplicated_timestamp(df):
    df_out = df.copy()

    duplicated_timestamp_indexes      = [idx for idx,value in df_out.timestamp.duplicated(keep='first').items() if value == True]
    speedbump_on_duplicated_timestamp = [idx for idx in duplicated_timestamp_indexes if df_out.loc[idx].speedBumpId != 0]

    rows_to_remove = list(set(duplicated_timestamp_indexes) - set(speedbump_on_duplicated_timestamp))

    if len(rows_to_remove) > 0:
        df_out.drop(df_out.index[rows_to_remove], inplace=True)
    return df_out

def get_out_filename(prefix):
    return os.path.join(out_config['ref_dir'], prefix + input_config['filename'].replace('1_', ''))

## Carrega dataframe

In [263]:
df = df_manipulator.load_dataframe(input_config['filename'], input_config['ref_dir'], input_config['delimiter'])

df.timestamp = pd.to_datetime(df.timestamp)

In [264]:
df

Unnamed: 0,timestamp,id,speedBumpId,x,y,z,lat,lng
0,2020-05-17 20:10:48.346,d198e52b-bd5b-424a-8ea5-d391fbdb7b20,0,-3.399200,9.516205,1.573975,-22.921991,-42.473372
1,2020-05-17 20:10:48.543,a9df8305-e4ed-4530-ab78-e1aa3ca9ffbd,0,-2.987396,10.203339,1.291458,-22.921991,-42.473372
2,2020-05-17 20:10:48.743,74dedc33-8d9b-4aa6-a352-df1ee9364ed7,0,-4.376038,9.746048,1.806213,-22.921991,-42.473372
3,2020-05-17 20:10:48.843,4a64f1bf-ea98-49db-baff-d7d08baf2dce,0,-3.698471,9.317490,1.849304,-22.921991,-42.473372
4,2020-05-17 20:10:48.944,25a3976b-a812-4bf2-a2fe-113d7b01cd11,0,-5.039230,9.372559,0.522919,-22.921991,-42.473372
...,...,...,...,...,...,...,...,...
11131,2020-05-17 20:35:51.161,0f15d4aa-c4c3-40ba-9d1f-1746e938a5eb,0,0.386032,10.105179,1.660172,-22.925816,-42.484631
11132,2020-05-17 20:35:51.261,c044a484-bb8b-421b-8cc8-a6ad4713e28f,0,-1.378494,9.920822,2.608276,-22.925816,-42.484631
11133,2020-05-17 20:35:51.361,c9d94f9a-6211-4198-b39b-70db0ef2e890,0,-1.215683,9.523392,2.505325,-22.925816,-42.484631
11134,2020-05-17 20:35:51.461,2fa87037-71b8-42e0-bd89-64bff33cfead,0,-0.435181,8.999054,2.871628,-22.925816,-42.484631


## Arredonda timestamp do dataframe e remove duplicatas

In [265]:
df_filtered = round_timestamp(df, 'timestamp', '50ms')
df_filtered = remove_duplicated_timestamp(df_filtered)


In [266]:
print(f'Antes  do filtro: {df.shape}')
print(f'Depois do filtro: {df_filtered.shape}')

Antes  do filtro: (11136, 8)
Depois do filtro: (11136, 8)


In [267]:
df_filtered

Unnamed: 0,timestamp,id,speedBumpId,x,y,z,lat,lng
0,2020-05-17 20:10:48.350,d198e52b-bd5b-424a-8ea5-d391fbdb7b20,0,-3.399200,9.516205,1.573975,-22.921991,-42.473372
1,2020-05-17 20:10:48.550,a9df8305-e4ed-4530-ab78-e1aa3ca9ffbd,0,-2.987396,10.203339,1.291458,-22.921991,-42.473372
2,2020-05-17 20:10:48.750,74dedc33-8d9b-4aa6-a352-df1ee9364ed7,0,-4.376038,9.746048,1.806213,-22.921991,-42.473372
3,2020-05-17 20:10:48.850,4a64f1bf-ea98-49db-baff-d7d08baf2dce,0,-3.698471,9.317490,1.849304,-22.921991,-42.473372
4,2020-05-17 20:10:48.950,25a3976b-a812-4bf2-a2fe-113d7b01cd11,0,-5.039230,9.372559,0.522919,-22.921991,-42.473372
...,...,...,...,...,...,...,...,...
11131,2020-05-17 20:35:51.150,0f15d4aa-c4c3-40ba-9d1f-1746e938a5eb,0,0.386032,10.105179,1.660172,-22.925816,-42.484631
11132,2020-05-17 20:35:51.250,c044a484-bb8b-421b-8cc8-a6ad4713e28f,0,-1.378494,9.920822,2.608276,-22.925816,-42.484631
11133,2020-05-17 20:35:51.350,c9d94f9a-6211-4198-b39b-70db0ef2e890,0,-1.215683,9.523392,2.505325,-22.925816,-42.484631
11134,2020-05-17 20:35:51.450,2fa87037-71b8-42e0-bd89-64bff33cfead,0,-0.435181,8.999054,2.871628,-22.925816,-42.484631


## Ajusta clique de quebra-mola

## Salva o dataframe de saída

In [268]:
df_manipulator.set_index(df_filtered, 'timestamp', inplace=True)

out_filename = get_out_filename(prefix='2_')
df_filtered.to_csv(out_filename, sep=out_config['delimiter'], header=out_config['with_header'])
print(f'O arquivo {out_filename} foi gerado!')

O arquivo ./out/2_export_lunar_20200517.csv foi gerado!
