In [74]:
# Carregar os módulos necessários para análise
import pandas as pd
import folium
from folium.plugins import HeatMap
from tqdm import tqdm
import numpy as np
from locale import atof

In [93]:
# Importando os dados
# Dados foram extraídos de um experimento a partir de:
# https://github.com/pires/android-obd-reader
df_vehicle = pd.read_csv("Log_12_05_2017_06_55_03.csv", 
                   encoding = 'latin2',
                   sep=';',
                   skiprows=[0],
                   low_memory=False)

In [94]:
# Visualizando as cinco primeiras linhas da base de dados
df_vehicle.head()

Unnamed: 0,TIME,LATITUDE,LONGITUDE,ALTITUDE,VEHICLE_ID,BAROMETRIC_PRESSURE,ENGINE_COOLANT_TEMP,FUEL_LEVEL,ENGINE_LOAD,AMBIENT_AIR_TEMP,...,FUEL_PRESSURE,SPEED,Short Term Fuel Trim Bank 2,Short Term Fuel Trim Bank 1,ENGINE_RUNTIME,THROTTLE_POS,DTC_NUMBER,TROUBLE_CODES,TIMING_ADVANCE,EQUIV_RATIO
0,1494582908210,-5.876252,-35.225795,8.721269,qgj1172,,29C,,45.1%,,...,,0km/h,,,00:00:25,16.1%,,,,1.0%
1,1494582909227,-5.876105,-35.225835,41.425589,qgj1172,,,,44.3%,,...,,,,,00:00:26,15.7%,,,,1.0%
2,1494582911243,-5.876105,-35.225835,41.425589,qgj1172,,30C,,44.3%,,...,,0km/h,,,00:00:28,15.7%,,,,1.0%
3,1494582912262,-5.876105,-35.225835,41.425589,qgj1172,,31C,,42.4%,,...,,0km/h,,,00:00:29,16.1%,,,,1.0%
4,1494582914284,-5.876158,-35.225838,27.910395,qgj1172,,31C,,35.7%,,...,,0km/h,,,00:00:31,17.6%,,,,1.0%


In [95]:
# Substituindo a string "null" e "canerror" na base de dados por NaN - not a number
df_vehicle.replace('null',np.NaN, inplace=True)
df_vehicle.replace('CANERROR',np.NaN, inplace=True)
df_vehicle.head()

Unnamed: 0,TIME,LATITUDE,LONGITUDE,ALTITUDE,VEHICLE_ID,BAROMETRIC_PRESSURE,ENGINE_COOLANT_TEMP,FUEL_LEVEL,ENGINE_LOAD,AMBIENT_AIR_TEMP,...,FUEL_PRESSURE,SPEED,Short Term Fuel Trim Bank 2,Short Term Fuel Trim Bank 1,ENGINE_RUNTIME,THROTTLE_POS,DTC_NUMBER,TROUBLE_CODES,TIMING_ADVANCE,EQUIV_RATIO
0,1494582908210,-5.876252,-35.225795,8.721269,qgj1172,,29C,,45.1%,,...,,0km/h,,,00:00:25,16.1%,,,,1.0%
1,1494582909227,-5.876105,-35.225835,41.425589,qgj1172,,,,44.3%,,...,,,,,00:00:26,15.7%,,,,1.0%
2,1494582911243,-5.876105,-35.225835,41.425589,qgj1172,,30C,,44.3%,,...,,0km/h,,,00:00:28,15.7%,,,,1.0%
3,1494582912262,-5.876105,-35.225835,41.425589,qgj1172,,31C,,42.4%,,...,,0km/h,,,00:00:29,16.1%,,,,1.0%
4,1494582914284,-5.876158,-35.225838,27.910395,qgj1172,,31C,,35.7%,,...,,0km/h,,,00:00:31,17.6%,,,,1.0%


In [96]:
# Verificando as colunas que fazem sentido explorar
df_vehicle.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1349 entries, 0 to 1348
Data columns (total 28 columns):
TIME                           1349 non-null int64
LATITUDE                       1349 non-null float64
LONGITUDE                      1349 non-null float64
ALTITUDE                       1349 non-null float64
VEHICLE_ID                     1349 non-null object
BAROMETRIC_PRESSURE            0 non-null float64
ENGINE_COOLANT_TEMP            1317 non-null object
FUEL_LEVEL                     0 non-null float64
ENGINE_LOAD                    1348 non-null object
AMBIENT_AIR_TEMP               0 non-null float64
ENGINE_RPM                     1349 non-null object
INTAKE_MANIFOLD_PRESSURE       0 non-null float64
MAF                            1349 non-null object
Term Fuel Trim Bank 1          0 non-null float64
FUEL_ECONOMY                   0 non-null float64
Long Term Fuel Trim Bank 2     0 non-null float64
FUEL_TYPE                      0 non-null float64
AIR_INTAKE_TEMP        

In [97]:
df_vehicle.columns

Index(['TIME', 'LATITUDE', 'LONGITUDE', 'ALTITUDE', 'VEHICLE_ID',
       'BAROMETRIC_PRESSURE', 'ENGINE_COOLANT_TEMP', 'FUEL_LEVEL',
       'ENGINE_LOAD', 'AMBIENT_AIR_TEMP', 'ENGINE_RPM',
       'INTAKE_MANIFOLD_PRESSURE', 'MAF', 'Term Fuel Trim Bank 1',
       'FUEL_ECONOMY', 'Long Term Fuel Trim Bank 2', 'FUEL_TYPE',
       'AIR_INTAKE_TEMP', 'FUEL_PRESSURE', 'SPEED',
       'Short Term Fuel Trim Bank 2', 'Short Term Fuel Trim Bank 1',
       'ENGINE_RUNTIME', 'THROTTLE_POS', 'DTC_NUMBER', 'TROUBLE_CODES',
       'TIMING_ADVANCE', 'EQUIV_RATIO'],
      dtype='object')

In [98]:
# Limitando as colunas
df_vehicle = df_vehicle[[
    'TIME',
    'LATITUDE',
    'LONGITUDE',
    'ALTITUDE',
    'ENGINE_COOLANT_TEMP',
    'ENGINE_LOAD',
    'ENGINE_RPM',
    'MAF',
    'SPEED',
    'THROTTLE_POS',
    'ENGINE_RUNTIME',
]
]


In [99]:
# Analisando a nova base de dados
df_vehicle.head()

Unnamed: 0,TIME,LATITUDE,LONGITUDE,ALTITUDE,ENGINE_COOLANT_TEMP,ENGINE_LOAD,ENGINE_RPM,MAF,SPEED,THROTTLE_POS,ENGINE_RUNTIME
0,1494582908210,-5.876252,-35.225795,8.721269,29C,45.1%,1004RPM,6.47g/s,0km/h,16.1%,00:00:25
1,1494582909227,-5.876105,-35.225835,41.425589,,44.3%,1029RPM,6.41g/s,,15.7%,00:00:26
2,1494582911243,-5.876105,-35.225835,41.425589,30C,44.3%,1015RPM,6.33g/s,0km/h,15.7%,00:00:28
3,1494582912262,-5.876105,-35.225835,41.425589,31C,42.4%,1029RPM,6.72g/s,0km/h,16.1%,00:00:29
4,1494582914284,-5.876158,-35.225838,27.910395,31C,35.7%,957RPM,8.91g/s,0km/h,17.6%,00:00:31


In [100]:
# A nova base de dados ainda contem dados faltantes
df_vehicle.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1349 entries, 0 to 1348
Data columns (total 11 columns):
TIME                   1349 non-null int64
LATITUDE               1349 non-null float64
LONGITUDE              1349 non-null float64
ALTITUDE               1349 non-null float64
ENGINE_COOLANT_TEMP    1317 non-null object
ENGINE_LOAD            1348 non-null object
ENGINE_RPM             1349 non-null object
MAF                    1349 non-null object
SPEED                  1065 non-null object
THROTTLE_POS           1349 non-null object
ENGINE_RUNTIME         1348 non-null object
dtypes: float64(3), int64(1), object(7)
memory usage: 116.0+ KB


In [101]:
# Eliminar as linhas com algum valor nulo
df_vehicle.dropna(axis=0, inplace=True)
df_vehicle.reset_index(drop=True,inplace=True)

# Chegamos em uma base de dados higienizada
df_vehicle.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1062 entries, 0 to 1061
Data columns (total 11 columns):
TIME                   1062 non-null int64
LATITUDE               1062 non-null float64
LONGITUDE              1062 non-null float64
ALTITUDE               1062 non-null float64
ENGINE_COOLANT_TEMP    1062 non-null object
ENGINE_LOAD            1062 non-null object
ENGINE_RPM             1062 non-null object
MAF                    1062 non-null object
SPEED                  1062 non-null object
THROTTLE_POS           1062 non-null object
ENGINE_RUNTIME         1062 non-null object
dtypes: float64(3), int64(1), object(7)
memory usage: 91.3+ KB


In [102]:
# Observe que alugmas colunas possuem valores com unidades
df_vehicle.head()

Unnamed: 0,TIME,LATITUDE,LONGITUDE,ALTITUDE,ENGINE_COOLANT_TEMP,ENGINE_LOAD,ENGINE_RPM,MAF,SPEED,THROTTLE_POS,ENGINE_RUNTIME
0,1494582908210,-5.876252,-35.225795,8.721269,29C,45.1%,1004RPM,6.47g/s,0km/h,16.1%,00:00:25
1,1494582911243,-5.876105,-35.225835,41.425589,30C,44.3%,1015RPM,6.33g/s,0km/h,15.7%,00:00:28
2,1494582912262,-5.876105,-35.225835,41.425589,31C,42.4%,1029RPM,6.72g/s,0km/h,16.1%,00:00:29
3,1494582914284,-5.876158,-35.225838,27.910395,31C,35.7%,957RPM,8.91g/s,0km/h,17.6%,00:00:31
4,1494582916314,-5.876158,-35.225838,27.910395,31C,51.0%,1059RPM,7.61g/s,0km/h,16.9%,00:00:33


In [103]:
# Eliminando caracteres indesejáveis (C, %, km/h, RPM)
df_vehicle['ENGINE_COOLANT_TEMP'] = df_vehicle['ENGINE_COOLANT_TEMP'].map(lambda x: x.rstrip('C'))
df_vehicle['ENGINE_LOAD'] = df_vehicle['ENGINE_LOAD'].map(lambda x: x.rstrip('%'))
df_vehicle['SPEED'] = df_vehicle['SPEED'].map(lambda x: x.rstrip('km/h'))
df_vehicle['THROTTLE_POS'] = df_vehicle['THROTTLE_POS'].map(lambda x: x.rstrip('%'))
df_vehicle['ENGINE_RPM'] = df_vehicle['ENGINE_RPM'].map(lambda x: x.rstrip('RPM'))
df_vehicle['MAF'] = df_vehicle['MAF'].map(lambda x: x.rstrip('g/s'))


In [104]:
# Dado higienizados
df_vehicle.head()

Unnamed: 0,TIME,LATITUDE,LONGITUDE,ALTITUDE,ENGINE_COOLANT_TEMP,ENGINE_LOAD,ENGINE_RPM,MAF,SPEED,THROTTLE_POS,ENGINE_RUNTIME
0,1494582908210,-5.876252,-35.225795,8.721269,29,45.1,1004,6.47,0,16.1,00:00:25
1,1494582911243,-5.876105,-35.225835,41.425589,30,44.3,1015,6.33,0,15.7,00:00:28
2,1494582912262,-5.876105,-35.225835,41.425589,31,42.4,1029,6.72,0,16.1,00:00:29
3,1494582914284,-5.876158,-35.225838,27.910395,31,35.7,957,8.91,0,17.6,00:00:31
4,1494582916314,-5.876158,-35.225838,27.910395,31,51.0,1059,7.61,0,16.9,00:00:33


In [105]:
#cast
df_vehicle['ENGINE_COOLANT_TEMP'] = df_vehicle['ENGINE_COOLANT_TEMP'].astype(np.int64)
df_vehicle['ENGINE_LOAD'] = df_vehicle['ENGINE_LOAD'].astype(np.float64)
df_vehicle['ENGINE_RPM'] = df_vehicle['ENGINE_RPM'].astype(np.int64)
df_vehicle['MAF'] = df_vehicle['MAF'].astype(np.float64)
df_vehicle['SPEED'] = df_vehicle['SPEED'].astype(np.int64)
df_vehicle['THROTTLE_POS'] = df_vehicle['THROTTLE_POS'].astype(np.float64)

In [161]:
# se for necessário salvar a base de dados
# df_vehicle.to_csv('dataset_higienizado.csv')

In [107]:
# Criar uma lista com [[lat,log,propriedade]]
coordinates = []
for i in tqdm(range(len(df_vehicle))):
        coordinates.append([df_vehicle.ix[i,'LATITUDE'], 
                        df_vehicle.ix[i,'LONGITUDE'], 
                        df_vehicle.ix[i,'SPEED']])
    
#create map object
map_speed = folium.Map(
    location = [-5.791659, -35.228385],
    zoom_start= 12
)

# Valor máximo da propriedade com o intuito de normalizar os graficos
max_speed = df_vehicle['SPEED'].max()
print(max_speed)

HeatMap(coordinates,max_val=max_speed).add_to(map_speed)
map_speed

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
100%|██████████| 1062/1062 [00:00<00:00, 50321.98it/s]


80
