In [89]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [90]:
df = pd.read_csv("/content/drive/MyDrive/dados_colab/ponta_negra/todos_202308121223.csv", sep=";")

Separar os valores e adicionar as novas colunas

In [91]:
new_columns = df['valores'].str.split(';', expand=True)
new_column_names = [
    'contador',
    'hora',
    'minuto',
    'segundo',
    'dia',
    'mes',
    'ano',
    'latitude',
    'longitude',
    'temperatura',
    'umidade',
    'tamanho',
    'rssi']
new_columns.columns = new_column_names

# Concatenar as novas colunas no DataFrame original
df = pd.concat([df, new_columns], axis=1)

# Removendo colunas desnecessárias
df.drop(columns = ['id', 'valores'], inplace=True)
# df.head()

Criando o DataFrame do Mock

In [92]:
df_mock = pd.DataFrame(df[
      (df['dia'] == '4') &
      (df['mes'] == '5') &
      (df['ano'] == '6')
])
print(f'Qtd total {df.count()[0]}')
print(f'Qtd mock {df_mock.count()[0]}')
print(f'Qtd barco {df.count()[0] - df_mock.count()[0]}')

Qtd total 7793
Qtd mock 6156
Qtd barco 1637


Criando o DataFrame do barco

In [93]:
df_barco = pd.DataFrame(df[
      (df['dia'] != '4') |
      (df['mes'] != '5') |
      (df['ano'] != '6')
])

print(f'Qtd barco {df_barco.count()[0]}')

Qtd barco 1637


Formatando a data hora do GPS

In [94]:
def create_datetime(row):
    ano = str(row['ano']).zfill(4)
    mes = str(row['mes']).zfill(2)
    dia = str(row['dia']).zfill(2)
    hora = str(row['hora']).zfill(2)
    minuto = str(row['minuto']).zfill(2)
    segundo = str(row['segundo']).zfill(2)
    return f'{ano}-{mes}-{dia} {hora}:{minuto}:{segundo}'

df['data_hora_gps'] = df.apply(create_datetime, axis=1)
df.drop(columns = ['hora', 'minuto', 'segundo', 'dia', 'mes', 'ano'], inplace=True)

df_mock['data_hora_gps'] = df_mock.apply(create_datetime, axis=1)
df_mock.drop(columns = ['hora', 'minuto', 'segundo', 'dia', 'mes', 'ano'], inplace=True)

df_barco['data_hora_gps'] = df_barco.apply(create_datetime, axis=1)
df_barco.drop(columns = ['hora', 'minuto', 'segundo', 'dia', 'mes', 'ano'], inplace=True)

Exporta os dados para um arquivo em csv

In [95]:
df.to_csv('todos.csv', index=False, sep=';')
df_mock.to_csv('mock.csv', index=False, sep=';')
df_barco.to_csv('barco.csv', index=False, sep=';')

Converte as colunas para os tipos int32 e float64

In [96]:
df_barco.reset_index(drop=True, inplace=True)
df_barco['contador'] = df_barco['contador'].astype(np.int32)
df_barco['latitude'] = df_barco['latitude'].astype(np.float64)
df_barco['longitude'] = df_barco['longitude'].astype(np.float64)
df_barco['rssi'] = df_barco['rssi'].astype(np.int32)
# print(type(df_barco))
print(df_barco.dtypes)
# print(df_barco.columns.values)

data_hora_criacao     object
contador               int32
latitude             float64
longitude            float64
temperatura           object
umidade               object
tamanho               object
rssi                   int32
data_hora_gps         object
dtype: object


In [97]:
df_barco.head()

Unnamed: 0,data_hora_criacao,contador,latitude,longitude,temperatura,umidade,tamanho,rssi,data_hora_gps
0,2023-08-06 20:43:55.317,6842,-3.063638,-60.10954,31.1,68.4,52,-100,214521719-32776--1877958650 655380:43:-1599471561
1,2023-08-06 20:43:52.156,6838,121898.0,-0.0,8.154514060939243e+34,0.0,52,-98,1610663911-08-349446 20:43:50
2,2023-08-06 20:43:43.978,6832,-3.064039,-60.10912,31.1,64.2,52,-97,9439207-08-06 20:43:44
3,2023-08-06 20:43:38.323,6827,4.556444e+19,-94.03112,13.2,4.269160140372144e+30,52,-97,-1675181147--2008266680-06 20:43:808452134
4,2023-08-06 20:43:36.135,6825,-0.0,-2.481117e+27,-2.285208600532596e+30,-0.8,52,-97,2023--1069232928-264198 20:43:36
