# Prevendo Consumo de Carros Elétricos

Link dos dados: https://data.mendeley.com/datasets/tb9yrptydn/2

## Importando Dados

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_excel('dados/FEV-data-Excel.xlsx')
df.head()

Unnamed: 0,Car full name,Make,Model,Minimal price (gross) [PLN],Engine power [KM],Maximum torque [Nm],Type of brakes,Drive type,Battery capacity [kWh],Range (WLTP) [km],...,Permissable gross weight [kg],Maximum load capacity [kg],Number of seats,Number of doors,Tire size [in],Maximum speed [kph],Boot capacity (VDA) [l],Acceleration 0-100 kph [s],Maximum DC charging power [kW],mean - Energy consumption [kWh/100 km]
0,Audi e-tron 55 quattro,Audi,e-tron 55 quattro,345700,360,664,disc (front + rear),4WD,95.0,438,...,3130.0,640.0,5,5,19,200,660.0,5.7,150,24.45
1,Audi e-tron 50 quattro,Audi,e-tron 50 quattro,308400,313,540,disc (front + rear),4WD,71.0,340,...,3040.0,670.0,5,5,19,190,660.0,6.8,150,23.8
2,Audi e-tron S quattro,Audi,e-tron S quattro,414900,503,973,disc (front + rear),4WD,95.0,364,...,3130.0,565.0,5,5,20,210,660.0,4.5,150,27.55
3,Audi e-tron Sportback 50 quattro,Audi,e-tron Sportback 50 quattro,319700,313,540,disc (front + rear),4WD,71.0,346,...,3040.0,640.0,5,5,19,190,615.0,6.8,150,23.3
4,Audi e-tron Sportback 55 quattro,Audi,e-tron Sportback 55 quattro,357000,360,664,disc (front + rear),4WD,95.0,447,...,3130.0,670.0,5,5,19,200,615.0,5.7,150,23.85


In [3]:
df.isna().sum()

Car full name                             0
Make                                      0
Model                                     0
Minimal price (gross) [PLN]               0
Engine power [KM]                         0
Maximum torque [Nm]                       0
Type of brakes                            1
Drive type                                0
Battery capacity [kWh]                    0
Range (WLTP) [km]                         0
Wheelbase [cm]                            0
Length [cm]                               0
Width [cm]                                0
Height [cm]                               0
Minimal empty weight [kg]                 0
Permissable gross weight [kg]             8
Maximum load capacity [kg]                8
Number of seats                           0
Number of doors                           0
Tire size [in]                            0
Maximum speed [kph]                       0
Boot capacity (VDA) [l]                   1
Acceleration 0-100 kph [s]      

## Data Wrangling

In [4]:
# Buscando o valor mais frequente da coluna Type of brakes
top_brakes = df['Type of brakes'].value_counts().reset_index()
top_brakes = top_brakes.iloc[0, 0]

# Buscando a valor médio da coluna Boot capacity (VDA) [l]
boot_capacity_mean = df['Boot capacity (VDA) [l]'].mean()

# Fazendo substituição dos valores NA
df['Type of brakes'].fillna(top_brakes, inplace = True) # Substituindo pelo valor mais frequente
df['Boot capacity (VDA) [l]'].fillna(boot_capacity_mean, inplace = True) # Substituindo pela média

# Aplicando One Hot Encoding nas colunas categoricas
df = pd.get_dummies(df, columns = ['Type of brakes', 'Drive type'], dtype = int, prefix = '', prefix_sep = '')
df = pd.get_dummies(df, columns = ['Number of seats', 'Number of doors'], dtype = int, prefix_sep = ' ')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 35 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Car full name                           53 non-null     object 
 1   Make                                    53 non-null     object 
 2   Model                                   53 non-null     object 
 3   Minimal price (gross) [PLN]             53 non-null     int64  
 4   Engine power [KM]                       53 non-null     int64  
 5   Maximum torque [Nm]                     53 non-null     int64  
 6   Battery capacity [kWh]                  53 non-null     float64
 7   Range (WLTP) [km]                       53 non-null     int64  
 8   Wheelbase [cm]                          53 non-null     float64
 9   Length [cm]                             53 non-null     float64
 10  Width [cm]                              53 non-null     float64


In [7]:
df['Boot capacity (VDA) [l]'].value_counts()

Boot capacity (VDA) [l]
660.0    3
260.0    3
615.0    3
425.0    3
447.0    2
338.0    2
250.0    2
435.0    2
745.0    2
315.0    2
451.0    2
857.0    2
332.0    2
385.0    2
171.0    2
350.0    2
488.0    2
185.0    1
543.0    1
603.0    1
267.0    1
434.0    1
311.0    1
310.0    1
211.0    1
500.0    1
656.0    1
357.0    1
380.0    1
510.0    1
870.0    1
Name: count, dtype: int64