In [1]:
#!/usr/bin/env python
"""
Main CLI execution file for ML Pipeline
Execute different pipeline steps via command-line arguments.

Usage:
    python main.py --prepare              # Prepare data only
    python main.py --train                # Train model
    python main.py --evaluate             # Evaluate model
    python main.py --save                 # Save model
    python main.py --full                 # Run complete pipeline
    python main.py --load-predict         # Load and predict
"""

import argparse
import sys
from model_pipeline import *

def prepare_step():
    """Execute data preparation step."""
    print("\n" + "="*70)
    print("STEP 1: DATA PREPARATION")
    print("="*70)
    
    try:
        df, feature_cols = prepare_data(
            CONFIG['DATASET_PATH'],
            CONFIG['TARGET_COLUMN'],
            CONFIG['CATEGORICAL_COLS']
        )
        
        print(f"\n‚úÖ Data preparation completed!")
        print(f"   Dataset shape: {df.shape}")
        print(f"   Features: {len(feature_cols)}")
        print(f"   Missing values: {df.isnull().sum().sum()}")
        
        return df, feature_cols
    
    except Exception as e:
        print(f"\n‚ùå Error during data preparation: {str(e)}")
        sys.exit(1)

def train_step(df, feature_cols, tune=False):
    """Execute model training step."""
    print("\n" + "="*70)
    print("STEP 2: MODEL TRAINING")
    print("="*70)
    
    try:
        # Split data
        X_train, X_test, y_train, y_test = split_data(
            df, feature_cols, CONFIG['TARGET_COLUMN']
        )
        
        print(f"\nData split:")
        print(f"   Train set: {X_train.shape}")
        print(f"   Test set: {X_test.shape}")
        
        # Get numeric and categorical columns
        numeric_cols, categorical_cols = get_numeric_categorical_cols(
            df, CONFIG['CATEGORICAL_COLS']
        )
        
        # Create preprocessor
        preprocessor = create_preprocessor(numeric_cols, categorical_cols)
        
        # Train or tune model
        if tune:
            print(f"\nüîç Tuning model (this may take a moment)...")
            model = tune_model(X_train, y_train, preprocessor)
            print(f"‚úÖ Model tuning completed!")
        else:
            print(f"\nüöÄ Training model...")
            model = train_model(X_train, y_train, preprocessor, n_neighbors=5)
            print(f"‚úÖ Model training completed!")
        
        return model, X_train, X_test, y_train, y_test, preprocessor
    
    except Exception as e:
        print(f"\n‚ùå Error during model training: {str(e)}")
        sys.exit(1)

def evaluate_step(model, X_test, y_test):
    """Execute model evaluation step."""
    print("\n" + "="*70)
    print("STEP 3: MODEL EVALUATION")
    print("="*70)
    
    try:
        metrics = evaluate_model(model, X_test, y_test)
        
        print(f"\nüìä Model Performance Metrics:")
        print_metrics(metrics, "KNN Model")
        
        return metrics
    
    except Exception as e:
        print(f"\n‚ùå Error during evaluation: {str(e)}")
        sys.exit(1)

def save_step(model, preprocessor):
    """Execute model and preprocessor saving step."""
    print("\n" + "="*70)
    print("STEP 4: SAVING MODEL")
    print("="*70)
    
    try:
        save_model(model, CONFIG['MODEL_SAVE_PATH'])
        save_preprocessor(preprocessor, CONFIG['PREPROCESSOR_SAVE_PATH'])
        
        print(f"\n‚úÖ Model saved successfully!")
        print(f"   Model path: {CONFIG['MODEL_SAVE_PATH']}")
        print(f"   Preprocessor path: {CONFIG['PREPROCESSOR_SAVE_PATH']}")
    
    except Exception as e:
        print(f"\n‚ùå Error during saving: {str(e)}")
        sys.exit(1)

def load_predict_step():
    """Load model and make predictions."""
    print("\n" + "="*70)
    print("STEP 5: LOAD MODEL & PREDICT")
    print("="*70)
    
    try:
        # Load model and preprocessor
        model = load_model(CONFIG['MODEL_SAVE_PATH'])
        preprocessor = load_preprocessor(CONFIG['PREPROCESSOR_SAVE_PATH'])
        
        print(f"\n‚úÖ Model loaded successfully!")
        print(f"   Model: {CONFIG['MODEL_SAVE_PATH']}")
        print(f"   Preprocessor: {CONFIG['PREPROCESSOR_SAVE_PATH']}")
        
        # Prepare test data
        df, feature_cols = prepare_data(
            CONFIG['DATASET_PATH'],
            CONFIG['TARGET_COLUMN'],
            CONFIG['CATEGORICAL_COLS']
        )
        
        X_train, X_test, y_train, y_test = split_data(
            df, feature_cols, CONFIG['TARGET_COLUMN']
        )
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        metrics = evaluate_model(model, X_test, y_test)
        print_metrics(metrics, "Loaded KNN Model")
        
    except Exception as e:
        print(f"\n‚ùå Error during load/predict: {str(e)}")
        sys.exit(1)

def full_pipeline_step(tune=False):
    """Execute complete pipeline."""
    print("\n" + "="*70)
    print("COMPLETE ML PIPELINE")
    print("="*70)
    
    # Step 1: Prepare
    df, feature_cols = prepare_step()
    
    # Step 2: Train
    model, X_train, X_test, y_train, y_test, preprocessor = train_step(df, feature_cols, tune=tune)
    
    # Step 3: Evaluate
    metrics = evaluate_step(model, X_test, y_test)
    
    # Step 4: Save
    save_step(model, preprocessor)
    
    print("\n" + "="*70)
    print("‚úÖ PIPELINE COMPLETED SUCCESSFULLY")
    print("="*70 + "\n")

def main():
    """Main CLI entry point with argument parsing."""
    
    parser = argparse.ArgumentParser(
        description='ML Pipeline: Data Preparation, Training, Evaluation, Saving',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
EXAMPLES:
  python main.py --prepare              Run data preparation only
  python main.py --train                Run training only
  python main.py --evaluate             Run evaluation only
  python main.py --save                 Save trained model
  python main.py --full                 Run complete pipeline
  python main.py --full --tune          Run pipeline with hyperparameter tuning
  python main.py --load-predict         Load model and make predictions
  
PIPELINE WORKFLOW:
  1. --prepare   : Load and preprocess data
  2. --train     : Train KNN model
  3. --evaluate  : Evaluate model performance
  4. --save      : Save model and preprocessor
  5. --full      : Execute all steps (1-4)
        """
    )
    
    # Create mutually exclusive group for main operations
    operation = parser.add_mutually_exclusive_group(required=True)
    
    operation.add_argument(
        '--prepare',
        action='store_true',
        help='Execute data preparation step only'
    )
    
    operation.add_argument(
        '--train',
        action='store_true',
        help='Execute model training step only'
    )
    
    operation.add_argument(
        '--evaluate',
        action='store_true',
        help='Execute model evaluation step only'
    )
    
    operation.add_argument(
        '--save',
        action='store_true',
        help='Execute model saving step'
    )
    
    operation.add_argument(
        '--full',
        action='store_true',
        help='Execute complete pipeline (prepare ‚Üí train ‚Üí evaluate ‚Üí save)'
    )
    
    operation.add_argument(
        '--load-predict',
        action='store_true',
        help='Load saved model and make predictions'
    )
    
    # Optional arguments
    parser.add_argument(
        '--tune',
        action='store_true',
        help='Use hyperparameter tuning during training (slower but better)'
    )
    
    parser.add_argument(
        '--dataset',
        type=str,
        default=CONFIG['DATASET_PATH'],
        help=f"Path to dataset (default: {CONFIG['DATASET_PATH']})"
    )
    
    parser.add_argument(
        '--model-path',
        type=str,
        default=CONFIG['MODEL_SAVE_PATH'],
        help=f"Path to save/load model (default: {CONFIG['MODEL_SAVE_PATH']})"
    )
    
    # Parse arguments
    args = parser.parse_args()
    
    # Update config if custom paths provided
    if args.dataset:
        CONFIG['DATASET_PATH'] = args.dataset
    if args.model_path:
        CONFIG['MODEL_SAVE_PATH'] = args.model_path
    
    # Execute based on arguments
    try:
        if args.prepare:
            df, feature_cols = prepare_step()
            print("\nüíæ To train model, run: python main.py --train")
        
        elif args.train:
            print("\n‚ö†Ô∏è  Running training requires prepared data.")
            print("   Run 'python main.py --full' for complete pipeline")
            print("   Or run 'python main.py --prepare' first")
            df, feature_cols = prepare_step()
            model, X_train, X_test, y_train, y_test, preprocessor = train_step(
                df, feature_cols, tune=args.tune
            )
            print("\nüíæ To evaluate, run: python main.py --evaluate")
        
        elif args.evaluate:
            print("\n‚ö†Ô∏è  Running evaluation requires trained model.")
            print("   Run 'python main.py --full' for complete pipeline")
            print("   Or run 'python main.py --train' first")
            df, feature_cols = prepare_step()
            model, X_train, X_test, y_train, y_test, preprocessor = train_step(
                df, feature_cols, tune=args.tune
            )
            metrics = evaluate_step(model, X_test, y_test)
            print("\nüíæ To save model, run: python main.py --save")
        
        elif args.save:
            print("\n‚ö†Ô∏è  Running save requires trained model.")
            print("   Run 'python main.py --full' for complete pipeline")
            print("   Or run 'python main.py --train' first")
            df, feature_cols = prepare_step()
            model, X_train, X_test, y_train, y_test, preprocessor = train_step(
                df, feature_cols, tune=args.tune
            )
            save_step(model, preprocessor)
        
        elif args.full:
            full_pipeline_step(tune=args.tune)
        
        elif args.load_predict:
            load_predict_step()
    
    except KeyboardInterrupt:
        print("\n\n‚ö†Ô∏è  Pipeline interrupted by user")
        sys.exit(0)
    except Exception as e:
        print(f"\n‚ùå Pipeline failed: {str(e)}")
        sys.exit(1)

if __name__ == '__main__':
    main()


ModuleNotFoundError: No module named 'model_pipeline'