In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle

# Global settings
n_jobs = -1 # This parameter conrols the parallel processing. -1 means using all processors.
random_state = 40


# URL del archivo CSV en el repositorio de GitHub
url1 = 'https://raw.githubusercontent.com/jsalcedo14/DSA_project/master/dataTrain_carListings.csv'
url2 = 'https://raw.githubusercontent.com/jsalcedo14/DSA_project/master/dataTest_carListings.csv'
# Lee el archivo CSV en un DataFrame de pandas
data = pd.read_csv(url1)
test = pd.read_csv(url2)


# Codificación de las variables categoricas
cat = ['State','Make','Model']
for i in cat:
    idx, codex = pd.factorize(data[i])
    data[i] = idx

# Elimina los outliers
Q1 = data['Price'].quantile(0.25)
Q3 = data['Price'].quantile(0.75)
IQR = Q3 - Q1
BI_Calculado = (Q1 - 1.5 * IQR)
BS_Calculado = (Q3 + 1.5 * IQR)
ubicacion_outliers = (data['Price'] < BI_Calculado) | (data['Price'] > BS_Calculado)
outliers = data[ubicacion_outliers]
data = data[ubicacion_outliers == False]

# Separamos los datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(data.drop(['Price'], axis=1), data['Price'], test_size=0.33, random_state=40)

# defina los parámetros del modelo
learning_rate = 0.1
max_depth = 10
n_estimators = 300
subsample = 1

# Crea el modelo con los parámetros definidos
model = xgb.XGBRegressor(
    learning_rate=learning_rate,
    max_depth=max_depth,
    n_estimators=n_estimators,
    subsample=subsample,
    verbosity=0,
    random_state=random_state
)

# Entrena con los datos de entrenamiento
model.fit(X_train, y_train)

with open("model.pkl", "wb") as pkl_file:
    pickle.dump(model, pkl_file)

In [6]:
from nbconvert import PythonExporter
import nbformat

def notebook_to_python(notebook_path, python_path):
    """
    Convierte un notebook (.ipynb) a un archivo Python (.py).

    Parameters:
    - notebook_path (str): Ruta del notebook a convertir.
    - python_path (str): Ruta del archivo Python de salida.
    """
    with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
        notebook_content = nbformat.read(notebook_file, as_version=4)

    python_exporter = PythonExporter()
    python_code, _ = python_exporter.from_notebook_node(notebook_content)

    with open(python_path, 'w', encoding='utf-8') as python_file:
        python_file.write(python_code)

# Rutas de entrada y salida
notebook_path = 'train.ipynb'
python_path = 'train.py'

# Llama a la función para convertir el notebook a Python
notebook_to_python(notebook_path, python_path)
