In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [5]:
df_selected_clean = pd.read_csv('../Data/df_selected_clean.csv')
print(df_selected_clean.head())

    price  base_price  available_quantity  condition_binary
0    80.0        80.0                   1                 1
1  2650.0      2650.0                   1                 0
2    60.0        60.0                   1                 0
3   580.0       580.0                   1                 1
4    30.0        30.0                   1                 0


In [6]:
# Definir las características (X) y la variable objetivo (y)
X = df_selected_clean.drop(columns=['condition_binary'])  # Características
y = df_selected_clean['condition_binary']  # Variable objetivo

# Dividir en entrenamiento (80%) y prueba (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# Inicializar el modelo de regresión logística
model = LogisticRegression(class_weight='balanced')

# Entrenar el modelo
model.fit(X_train, y_train)

# Hacer predicciones sobre el conjunto de prueba
y_pred = model.predict(X_test)


In [10]:
# Calcular la precisión
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del modelo:", accuracy)

# Mostrar la matriz de confusión
print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))

# Mostrar el reporte de clasificación
print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred))


Precisión del modelo: 0.65315
Matriz de confusión:
[[9158  125]
 [6812 3905]]

Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.57      0.99      0.73      9283
           1       0.97      0.36      0.53     10717

    accuracy                           0.65     20000
   macro avg       0.77      0.68      0.63     20000
weighted avg       0.79      0.65      0.62     20000



In [12]:
from sklearn.model_selection import GridSearchCV

# Definir los valores de C para ajustar
param_grid = {'C': [0.01, 0.1, 1, 10, 100]}

# Realizar búsqueda en cuadrícula
grid_search = GridSearchCV(LogisticRegression(class_weight='balanced'), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Mejor parámetro:", grid_search.best_params_)


Mejor parámetro: {'C': 0.01}


In [11]:
from sklearn.ensemble import RandomForestClassifier

# Inicializar Random Forest
rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_rf = rf_model.predict(X_test)

# Evaluar el modelo
print("Precisión del modelo Random Forest:", accuracy_score(y_test, y_pred_rf))
print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred_rf))
print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred_rf))


Precisión del modelo Random Forest: 0.78625
Matriz de confusión:
[[8283 1000]
 [3275 7442]]

Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.72      0.89      0.79      9283
           1       0.88      0.69      0.78     10717

    accuracy                           0.79     20000
   macro avg       0.80      0.79      0.79     20000
weighted avg       0.81      0.79      0.79     20000



In [13]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 500],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 10, 20],
    'min_samples_leaf': [1, 2, 5]
}

grid_search_rf = GridSearchCV(RandomForestClassifier(class_weight='balanced', random_state=42), param_grid, cv=5)
grid_search_rf.fit(X_train, y_train)

print("Mejor parámetro Random Forest:", grid_search_rf.best_params_)

KeyboardInterrupt: 

In [14]:
# Importancia de las características
feature_importances = rf_model.feature_importances_
for name, importance in zip(X_train.columns, feature_importances):
    print(f"{name}: {importance}")


price: 0.22438410471784168
base_price: 0.22655385788026983
available_quantity: 0.5490620374018885
