In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Criando dados fictícios
data = {
    "Combustivel": ["Gasolina", "Diesel", "Etanol", "Gasolina", "Diesel", "Etanol", "Gasolina", "Diesel", "Etanol"],
    "Idade": [3, 5, 2, 8, 10, 1, 4, 6, 2],
    "Quilometragem": [30000, 50000, 20000, 80000, 120000, 15000, 40000, 60000, 25000],
    "Preco": [40000, 25000, 50000, 18000, 12000, 55000, 35000, 20000, 48000]
}

# Criando o DataFrame
df = pd.DataFrame(data)

# Separando variáveis independentes (X) e dependente (y)
X = df.drop(columns=["Preco"])
y = df["Preco"]

# Dividindo os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definindo colunas categóricas e numéricas
categorical_features = ["Combustivel"]
numerical_features = ["Idade", "Quilometragem"]

# Criando transformações
categorical_transformer = OneHotEncoder(handle_unknown="ignore")
numerical_transformer = StandardScaler()

# Criando ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numerical_transformer, numerical_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

# Criando o pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# Treinando o modelo
pipeline.fit(X_train, y_train)

# Fazendo previsões
y_pred = pipeline.predict(X_test)

# Avaliando o desempenho do modelo
mse = mean_squared_error(y_test, y_pred)
print(f"Erro Quadrático Médio (MSE): {mse:.2f}")


Erro Quadrático Médio (MSE): 154178359.10
