In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pickle

# Carregar os dados
url = "https://raw.githubusercontent.com/caioooooo3/Desafio_Lighthouse/main/teste_indicium_precificacao.csv"
data = pd.read_csv(url)

# Pré-processamento
data['reviews_por_mes'].fillna(0, inplace=True)
data = data.drop(columns=['nome', 'host_name', 'ultima_review'])
data = pd.get_dummies(data, columns=['room_type', 'bairro_group', 'bairro'], drop_first=True)

# Separar variáveis preditoras e alvo
X = data.drop(columns=['id', 'host_id', 'price'])
y = data['price']

# Divisão dos dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinamento do modelo
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Avaliação do modelo
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"MAE: {mae:.2f}, RMSE: {rmse:.2f}")

# Salvar modelo treinado
with open('modelo_precificacao.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Modelo salvo como 'modelo_precificacao.pkl'!")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['reviews_por_mes'].fillna(0, inplace=True)


MAE: 66.76, RMSE: 233.93
Modelo salvo como 'modelo_precificacao.pkl'!
