# Machine Learning Model (Sprint 4)


# Importando e instalando bibliotecas


In [1]:
%pip install -r ./requirements.txt

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
import joblib
from api.utils.utils import create_folder

Note: you may need to restart the kernel to use updated packages.


## 1. Pré-processamento dos dados.


In [2]:
data_path = "./data/vehicle_dataset.csv"

vehicle_data = pd.read_csv(data_path)
vehicle_data = vehicle_data.drop(columns=["Date"])
vehicle_data["Model Year"]= vehicle_data["Model Year"].replace("<2010", "2009").replace("Unk", "2009").astype(int)

vehicle_data.head()

Unnamed: 0,ZIP Code,Model Year,Fuel,Make,Duty,Vehicles
0,91302,2009,Unk,OTHER/UNK,Unk,490
1,93631,2009,Unk,OTHER/UNK,Unk,414
2,90603,2009,Unk,OTHER/UNK,Unk,504
3,93561,2009,Unk,OTHER/UNK,Unk,778
4,94505,2009,Unk,OTHER/UNK,Unk,387


## 2. Armazenando encoders para uso na API.


In [3]:
label_encoders = {}
for column in ["ZIP Code", "Fuel", "Make", "Duty"]:
    label_encoder = LabelEncoder()
    vehicle_data[column] = label_encoder.fit_transform(vehicle_data[column])
    label_encoders[column] = label_encoder

vehicle_data.head()

Unnamed: 0,ZIP Code,Model Year,Fuel,Make,Duty,Vehicles
0,536,2009,9,80,2,490
1,2032,2009,9,80,2,414
2,260,2009,9,80,2,504
3,1984,2009,9,80,2,778
4,2415,2009,9,80,2,387


## 3. Treinando o Modelo.


In [4]:
x = vehicle_data.drop(columns=["Vehicles"])
y = vehicle_data["Vehicles"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42, n_jobs=1)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2}")

Mean Squared Error: 34514.58334330119
R2 Score: 0.3917877311578938


## 4. Salvando o modelo e os encoders.


In [5]:
create_folder("models")
joblib.dump(model, "./models/vehicle_regression_model.joblib")
joblib.dump(label_encoders, "./models/label_encoders.joblib")

D:\Programmer\Faculdade\Fiap\2° Semestre\Challenge\Sprint 4\IA & ChatBot\Resolução\models


['./models/label_encoders.joblib']