In [1]:
import sys
import os

sys.path.append(os.path.abspath('..'))

from src.models.train import ModelTrainer
from src.models.evaluate import ModelEvaluator
from src.monitoring.experiment_config import MLFLOW_CONFIG
from src.data.preprocessing import DataPreprocessor
import pandas as pd

df = pd.read_csv('../data/raw/winequality-red.csv')
X = df.drop(columns=['quality'])
y = df['quality']

dp = DataPreprocessor()
X_train, X_test, y_train, y_test = dp.fit_transform(X, y)

Config: Project root detected at C:\Users\илья\Desktop\pet_projects\red-wine-quality-prediction\src
Config: Using MLflow directory at C:\Users\илья\Desktop\pet_projects\red-wine-quality-prediction\src/mlflow/mlruns


In [2]:
trainer = ModelTrainer(
    cv_folds=5,
    use_mlflow=True,
    mlflow_config=MLFLOW_CONFIG
)

Project root: C:\Users\илья\Desktop\pet_projects\red-wine-quality-prediction\src
MLflow directory: C:\Users\илья\Desktop\pet_projects\red-wine-quality-prediction\src\mlflow
Auto-configured tracking URI: file:///C:/Users/илья/Desktop/pet_projects/red-wine-quality-prediction/src/mlflow/mlruns
Final tracking URI: file:///C:/Users/илья/Desktop/pet_projects/red-wine-quality-prediction/src/mlflow/mlruns
Successfully set tracking URI
Current MLflow tracking URI: file:///C:/Users/илья/Desktop/pet_projects/red-wine-quality-prediction/src/mlflow/mlruns
Experiment 'Wine_Quality_Experiment' found (ID: 866552911109440203)
Active experiment set to: Wine_Quality_Experiment
✅ MLflow connection successful. Found 1 experiments.


  return FileStore(store_uri, store_uri)


In [3]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression


models_to_train = [
    (RandomForestClassifier(n_estimators=100, random_state=42), "RandomForest"),
    (LogisticRegression(max_iter=1000, random_state=42), "LogisticRegression"),
    (GradientBoostingClassifier(n_estimators=100, random_state=42), "GradientBoosting")
]

for model, model_name in models_to_train:
    print(f"\n{'='*50}")
    print(f"Training {model_name}")
    print('='*50)
    
    trainer.train(
        model=model,
        X_train=X_train,
        y_train=y_train,
        X_test=X_test,
        y_test=y_test,
        model_name=model_name,
        params={k: v for k, v in model.get_params().items() 
                if not k.startswith('base_')},
        use_cv=True,
    )
    
    trainer.save(f"../saved-models/{model_name}.pkl")

evaluator = ModelEvaluator(use_mlflow=True)

best_model = trainer.model 

metrics = evaluator.evaluate_classification(
    model=best_model,
    X_test=X_test,
    y_test=y_test,
    run_name="detailed_evaluation",
    tags={"phase": "final_evaluation"},
    average='weighted',
    plot_confusion_matrix=True,
    plot_roc_curve=True
)

print("\n" + "="*50)
print("Final Model Evaluation Results:")
print("="*50)
for key, value in metrics.items():
    if isinstance(value, (int, float)):
        print(f"{key}: {value:.4f}")

print("\n" + "="*50)
print("Comparing all trained models:")
print("="*50)


Training RandomForest




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]


Training LogisticRegression




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]


Training GradientBoosting




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Feature importance extracted from model (shape: (11,))

Final Model Evaluation Results:
accuracy: 0.9062
precision: 0.8973
recall: 0.9062
f1: 0.8953
roc_auc: 0.8963

Comparing all trained models:
