# Imports

In [20]:
import pandas as pd
import sys, os, importlib
from datetime import datetime

In [21]:
pd.set_option('display.max_rows', 100)

if os.path.join('..','0_funcoes_base') not in sys.path:
    sys.path.append(os.path.join('..','0_funcoes_base')) 

df_manipulator = importlib.import_module('df_manipulator')
file_manipulator = importlib.import_module('file_manipulator')

# Dados de configuração

In [22]:
input_config = {
    'file': {
        'ref_dir':'./out/',
        'filename':'2_export_lunar_20200517_20200620.csv',
        'delimiter': ';'
    }
}

output_config = {
    'file': {
        'ref_dir': './out',
        'delimiter':';',
        'with_header': True,
        'prefix':'3_'
    },
    'remove_columns':['id','x'],
    'window_width': (5000, 3000),
    'window_freq': 'milliseconds'
}

# Funções

In [23]:
def drop_unnecessary_columns(df, columns):
    _df = df.copy()
    if not columns or len(columns) < 1:
        return _df

    _df = _df.drop(columns=columns, axis=1)
    return _df

def drop_unnecessary_speed_bumps(df, speed_bump_id):
    _df = df.copy()
    sb_indexes = df_manipulator.get_speed_bumps_idx(_df, speed_bump_id=speed_bump_id)

    timestamps_to_delete = df_manipulator.get_affected_timestamps_by_speed_bumps(_df, sb_indexes, mapping_window_width=output_config['window_width'], mapping_window_freq=output_config['window_freq'])

    indexes_to_delete = []
    for v in timestamps_to_delete.values():
        for v2 in v.values():
            indexes_to_delete.extend(list(v2) + list(v.keys()))

    _df = _df[~_df.index.isin(indexes_to_delete)]
    return _df

# Carrega dataframe

In [24]:
df = df_manipulator.load_dataframes(input_config['file']['filename'], input_config['file']['ref_dir'], input_config['file']['delimiter'])
df.timestamp = pd.to_datetime(df.timestamp)
df_manipulator.set_index(df, 'timestamp', True)

df

Unnamed: 0_level_0,id,speed_bump_id,x,y,z,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-05-17 20:14:32.800,b78d7d2f-3fd1-4d81-81ee-d00afc039814,0,0.843323,8.838654,3.714386,-22.919592,-42.473961
2020-05-17 20:14:32.900,1c59acca-2bb8-4686-9e93-95aa791f6f70,0,-2.240402,10.248825,3.108658,-22.919592,-42.473961
2020-05-17 20:14:33.000,de01a2a1-dd19-4081-921f-890907e5ca4d,0,-1.998596,10.842590,2.215622,-22.919592,-42.473961
2020-05-17 20:14:33.100,8864e0ac-54c0-4b94-b018-cd764c6dcef0,0,-2.326599,9.702957,2.493347,-22.919592,-42.473961
2020-05-17 20:14:33.200,8864e0ac-54c0-4b94-b018-cd764c6dcef0,0,-2.326599,9.702957,2.493347,-22.919592,-42.473961
...,...,...,...,...,...,...,...
2020-06-20 14:41:23.800,9045a063-725e-4d4f-abf7-d73d8bba8a89,0,0.548843,8.189819,6.620956,-22.871247,-42.340474
2020-06-20 14:41:23.900,9045a063-725e-4d4f-abf7-d73d8bba8a89,0,0.548843,8.189819,6.620956,-22.871247,-42.340474
2020-06-20 14:41:24.000,145b6c26-33d5-4c20-b585-3e620dd871ec,0,0.146622,8.970322,5.715942,-22.871481,-42.340609
2020-06-20 14:41:24.100,22d48c68-3114-4d47-9e98-7aae0bb74ecb,0,0.589539,11.125107,4.638550,-22.871481,-42.340609


In [25]:
df.speed_bump_id.value_counts()

0    21252
1       84
2        9
Name: speed_bump_id, dtype: int64

# Remove colunas desnecessárias

In [26]:
df_out = drop_unnecessary_columns(df, columns=output_config['remove_columns'])

# Remove speed_bumps de id 2 ("passarela alta")

In [27]:
df_out = drop_unnecessary_speed_bumps(df_out, speed_bump_id=2)

df_out.speed_bump_id.value_counts()

0    20532
1       84
Name: speed_bump_id, dtype: int64

In [28]:
df_out.head(10)

Unnamed: 0_level_0,speed_bump_id,y,z,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-17 20:14:32.800,0,8.838654,3.714386,-22.919592,-42.473961
2020-05-17 20:14:32.900,0,10.248825,3.108658,-22.919592,-42.473961
2020-05-17 20:14:33.000,0,10.84259,2.215622,-22.919592,-42.473961
2020-05-17 20:14:33.100,0,9.702957,2.493347,-22.919592,-42.473961
2020-05-17 20:14:33.200,0,9.702957,2.493347,-22.919592,-42.473961
2020-05-17 20:14:33.300,0,8.345444,2.601089,-22.919592,-42.473961
2020-05-17 20:14:33.400,0,9.590424,3.173294,-22.919592,-42.473961
2020-05-17 20:14:33.500,0,8.21376,3.173294,-22.919592,-42.473961
2020-05-17 20:14:33.600,0,9.37973,3.443848,-22.919592,-42.473961
2020-05-17 20:14:33.700,0,9.386917,2.984161,-22.919592,-42.473961


# Salva o dataframe de saída

In [29]:
out_filename = file_manipulator.get_out_filename(output_config['file']['prefix'], output_config['file']['ref_dir'], input_config['file']['filename'])

df_out.to_csv(out_filename, sep=output_config['file']['delimiter'], header=output_config['file']['with_header'])

print(f'O arquivo {out_filename} foi gerado!')

O arquivo ./out/3_export_lunar_20200517_20200620.csv foi gerado!
