# Amex Pipeline: Staged Execution
This notebook allows you to run the Amex pipeline in stages: data loading, cleaning, feature engineering, EDA, feature selection, model training, validation, and submission generation.

In [1]:
import os
import pandas as pd
import numpy as np
from data.data_loader import load_all_data
from data.data_cleaning import clean_all_data_advanced
from data.advanced_feature_engineering import create_full_feature_set_advanced
from eda.exploratory_analysis import (
    plot_target_distribution, plot_missing_values, plot_feature_distributions, 
    plot_correlation_heatmap, plot_new_feature_analysis
)
from utils.metrics import map7_from_dataframe
from utils.submission import generate_submission
import warnings
warnings.filterwarnings('ignore')

def validate_pipeline_data(data, stage_name):
    """Validate data at each pipeline stage"""
    print(f"\n=== {stage_name} Validation ===")
    print(f"Shape: {data.shape}")
    
    # Check for NaN values
    nan_count = data.isna().sum().sum()
    print(f"NaN values: {nan_count}")
    
    # Check for infinite values
    numeric_cols = data.select_dtypes(include=[np.number]).columns
    inf_count = np.isinf(data[numeric_cols]).sum().sum()
    print(f"Infinite values: {inf_count}")
    
    # Memory usage
    memory_mb = data.memory_usage(deep=True).sum() / 1024 / 1024
    print(f"Memory usage: {memory_mb:.1f} MB")
    
    if nan_count > 0 or inf_count > 0:
        print("⚠️  Data quality issues detected!")
        return False
    else:
        print("✅ Data validation passed")
        return True

In [2]:
# Stage 1: Load data with validation
print("=== STAGE 1: DATA LOADING ===")
try:
    data = load_all_data()
    print(f"✅ Data loaded successfully")
    print(f"Train shape: {data['train'].shape}")
    print(f"Test shape: {data['test'].shape}")
    
    # Validate loaded data
    validate_pipeline_data(data['train'], "Raw Train Data")
    
except Exception as e:
    print(f"❌ Data loading failed: {e}")
    raise

=== STAGE 1: DATA LOADING ===
✅ Data loaded successfully
Train shape: (770164, 372)
Test shape: (369301, 371)

=== Raw Train Data Validation ===
Shape: (770164, 372)
NaN values: 68296406
Infinite values: 0
Memory usage: 1084.8 MB
⚠️  Data quality issues detected!


In [3]:
# Stage 2: Advanced cleaning with comprehensive error handling
print("\n=== STAGE 2: ADVANCED DATA CLEANING ===")
try:
    cleaned_data = clean_all_data_advanced(data)
    print(f"✅ Advanced cleaning completed")
    print(f"Cleaned train shape: {cleaned_data['train'].shape}")
    
    # Validate cleaned data
    is_valid = validate_pipeline_data(cleaned_data['train'], "Cleaned Train Data")
    
    if not is_valid:
        print("⚠️  Performing emergency data cleanup...")
        # Emergency cleanup
        numeric_cols = cleaned_data['train'].select_dtypes(include=[np.number]).columns
        cleaned_data['train'][numeric_cols] = cleaned_data['train'][numeric_cols].fillna(0)
        cleaned_data['train'][numeric_cols] = cleaned_data['train'][numeric_cols].replace([np.inf, -np.inf], 0)
        
        cleaned_data['test'][numeric_cols] = cleaned_data['test'][numeric_cols].fillna(0)
        cleaned_data['test'][numeric_cols] = cleaned_data['test'][numeric_cols].replace([np.inf, -np.inf], 0)
        
        print("✅ Emergency cleanup completed")
        validate_pipeline_data(cleaned_data['train'], "Emergency Cleaned Data")
    
except Exception as e:
    print(f"❌ Data cleaning failed: {e}")
    print("Falling back to basic cleaning...")
    
    # Emergency fallback
    cleaned_data = {
        'train': data['train'].fillna(0),
        'test': data['test'].fillna(0)
    }
    print("✅ Basic cleaning completed as fallback")



=== STAGE 2: ADVANCED DATA CLEANING ===
Starting robust advanced data cleaning pipeline...
Starting robust advanced data cleaning...
Starting robust advanced data cleaning...
Removed 0 duplicate rows
Removed 0 duplicate rows
Creating focused customer behavioral features with priority features...
Creating focused customer behavioral features with priority features...
Creating priority customer aggregations...
Creating priority customer aggregations...
Created customer aggregations for 7 priority features
Creating customer segments with priority features...
Created customer aggregations for 7 priority features
Creating customer segments with priority features...
Created 3 customer segments with 8 priority features
Created 3 customer segments with 8 priority features
Performing focused imputation with priority features...
Performing focused imputation with priority features...
Phase 1: Priority features imputation...
Phase 2: Simplified imputation for remaining features...
Applying focus

In [4]:
# Stage 3: Advanced feature engineering with robust error handling
print("\n=== STAGE 3: ADVANCED FEATURE ENGINEERING ===")
try:
    # Validate input data before feature engineering
    print("Validating input data...")
    input_valid = validate_pipeline_data(cleaned_data['train'], "Pre-Feature Engineering")
    
    if not input_valid:
        print("⚠️  Input data has issues, performing pre-processing cleanup...")
        numeric_cols = cleaned_data['train'].select_dtypes(include=[np.number]).columns
        cleaned_data['train'][numeric_cols] = cleaned_data['train'][numeric_cols].fillna(0)
        cleaned_data['train'][numeric_cols] = cleaned_data['train'][numeric_cols].replace([np.inf, -np.inf], 0)
    
    print("Starting feature engineering...")
    train_engineered, selected_features = create_full_feature_set_advanced(cleaned_data['train'])
    test_engineered, _ = create_full_feature_set_advanced(cleaned_data['test'])
    
    print(f"✅ Feature engineering completed")
    print(f"Final train shape: {train_engineered.shape}")
    print(f"Selected features: {len(selected_features)}")
    
    validate_pipeline_data(train_engineered, "Engineered Train Data")
    
    print("\n=== Feature Engineering Quality Report ===")
    numeric_cols = train_engineered.select_dtypes(include=[np.number]).columns
    missing_indicator_count = (train_engineered[numeric_cols] == -999).sum().sum()
    print(f"Remaining -999 values: {missing_indicator_count}")
    print(f"Data types: {train_engineered.dtypes.value_counts().to_dict()}")
    
    if selected_features:
        print(f"\nTop 10 selected features:")
        for i, feature in enumerate(selected_features[:10]):
            print(f"{i+1:2d}. {feature}")
    
except Exception as e:
    print(f"❌ Feature engineering failed: {e}")
    print("Using original cleaned data without advanced features...")
    
    train_engineered = cleaned_data['train'].copy()
    test_engineered = cleaned_data['test'].copy()
    selected_features = [col for col in train_engineered.columns if col.startswith('f')][:50]
    print(f"✅ Using {len(selected_features)} basic features as fallback")



=== STAGE 3: ADVANCED FEATURE ENGINEERING ===
Validating input data...

=== Pre-Feature Engineering Validation ===
Shape: (770164, 380)
NaN values: 0
NaN values: 0
Infinite values: 0
Infinite values: 0
Memory usage: 1815.9 MB
✅ Data validation passed
Starting feature engineering...
Starting advanced feature engineering...

=== Data Quality Check: Initial Input ===
Memory usage: 1815.9 MB
✅ Data validation passed
Starting feature engineering...
Starting advanced feature engineering...

=== Data Quality Check: Initial Input ===
Memory usage: 1815.9 MB
✓ Data quality check passed for Initial Input
Creating interaction features...
Created 2 interaction features

=== Data Quality Check: After Interaction Features ===
Memory usage: 1815.9 MB
✓ Data quality check passed for Initial Input
Creating interaction features...
Created 2 interaction features

=== Data Quality Check: After Interaction Features ===
Memory usage: 1821.8 MB
✓ Data quality check passed for After Interaction Features
Crea

In [9]:
# Stage 4: Final validation and summary
print("\n=== STAGE 4: FINAL VALIDATION ===")

def final_pipeline_validation_fixed(train_data, test_data, selected_features):
    """Enhanced validation for pipeline readiness with correct column handling"""
    print("\n=== FINAL PIPELINE VALIDATION (FIXED) ===")
    issues = []
    
    # Check basic shapes
    print(f"Train data shape: {train_data.shape}")
    print(f"Test data shape: {test_data.shape}")
    
    # Check available columns in test data
    print(f"Test data columns: {test_data.columns.tolist()}")
    
    # Check required columns for submission (based on your template)
    required_submit_cols = ['id1', 'id2', 'id3', 'id5']
    available_submit_cols = [col for col in required_submit_cols if col in test_data.columns]
    missing_submit_cols = [col for col in required_submit_cols if col not in test_data.columns]
    
    print(f"Available submission columns: {available_submit_cols}")
    if missing_submit_cols:
        print(f"⚠️ Missing submission columns: {missing_submit_cols}")
        print("Will create default values for missing columns")
    
    # Check selected features availability
    available_features = []
    for col in selected_features:
        if col in train_data.columns and col in test_data.columns:
            available_features.append(col)
        else:
            issues.append(f"Feature {col} missing in train or test data")
    
    print(f"Available features for modeling: {len(available_features)}")
    
    # Data quality checks
    train_nan = train_data[available_features].isna().sum().sum()
    test_nan = test_data[available_features].isna().sum().sum()
    
    if train_nan > 0 or test_nan > 0:
        issues.append(f"NaN values found - Train: {train_nan}, Test: {test_nan}")
    
    # Check for infinite values in common numeric columns
    train_numeric_cols = train_data.select_dtypes(include=[np.number]).columns
    test_numeric_cols = test_data.select_dtypes(include=[np.number]).columns
    common_numeric_cols = [col for col in train_numeric_cols if col in test_numeric_cols and col != 'y']
    
    if common_numeric_cols:
        train_inf = np.isinf(train_data[common_numeric_cols]).sum().sum()
        test_inf = np.isinf(test_data[common_numeric_cols]).sum().sum()
        
        if train_inf > 0 or test_inf > 0:
            issues.append(f"Infinite values found - Train: {train_inf}, Test: {test_inf}")
    
    if issues:
        print("\n⚠️ VALIDATION ISSUES DETECTED:")
        for i, issue in enumerate(issues, 1):
            print(f"{i}. {issue}")
        return False, available_features
    else:
        print("\n✅ ALL VALIDATION CHECKS PASSED")
        return True, available_features

# Run the fixed validation
validation_passed, final_features = final_pipeline_validation_fixed(
    train_engineered, test_engineered, selected_features
)

if not validation_passed:
    print("\n🔧 APPLYING FINAL FIXES...")
    # Only fix columns present in both train and test (and skip 'y' in test)
    train_numeric_cols = train_engineered.select_dtypes(include=[np.number]).columns
    test_numeric_cols = test_engineered.select_dtypes(include=[np.number]).columns
    common_numeric_cols = [col for col in train_numeric_cols if col in test_numeric_cols and col != 'y']
    train_engineered[common_numeric_cols] = train_engineered[common_numeric_cols].fillna(0).replace([np.inf, -np.inf], 0)
    test_engineered[common_numeric_cols] = test_engineered[common_numeric_cols].fillna(0).replace([np.inf, -np.inf], 0)

    validation_passed, final_features = final_pipeline_validation(
        train_engineered, test_engineered, selected_features
    )

print(f"\n=== PIPELINE SUMMARY ===")
print(f"Status: {'✅ READY FOR MODEL TRAINING' if validation_passed else '❌ ISSUES REMAIN'}")
print(f"Final train shape: {train_engineered.shape}")
print(f"Final test shape: {test_engineered.shape}")
print(f"Features for modeling: {len(final_features)}")
print(f"Memory usage: {(train_engineered.memory_usage(deep=True).sum() / 1024 / 1024):.1f} MB")



=== STAGE 4: FINAL VALIDATION ===

=== FINAL PIPELINE VALIDATION (FIXED) ===
Train data shape: (770164, 386)
Test data shape: (369301, 385)
Test data columns: ['id1', 'id2', 'id3', 'id4', 'id5', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28_x', 'f29_x', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69', 'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76', 'f77', 'f78', 'f79', 'f80', 'f81', 'f82', 'f83', 'f84', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 'f91', 'f92', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98', 'f99', 'f100', 'f101', 'f102', 'f103', 'f104', 'f105', 'f106', 'f107', 'f108', 'f109', 'f110', 'f111', 'f112', 'f113', 'f11

In [10]:
# Stage 5: Model Training (if validation passed)
print("\n=== STAGE 5: MODEL TRAINING ===")

# IMPORTANT: Only use train data for model selection/validation. Test data is for final predictions only.
if validation_passed and len(final_features) > 0:
    try:
        from sklearn.model_selection import train_test_split
        from sklearn.ensemble import RandomForestClassifier
        from sklearn.metrics import roc_auc_score, classification_report
        from sklearn.preprocessing import LabelEncoder
        
        X = train_engineered[final_features].copy()
        y = train_engineered['y']
        
        categorical_cols = X.select_dtypes(include=['object']).columns
        label_encoders = {}
        if len(categorical_cols) > 0:
            print(f"Encoding {len(categorical_cols)} categorical features with consistent mapping...")
            for col in categorical_cols:
                le = LabelEncoder()
                X[col] = le.fit_transform(X[col].astype(str))
                label_encoders[col] = le
        
        # Split only the training data for validation
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        print(f"Training set: {X_train.shape}")
        print(f"Validation set: {X_val.shape}")
        
        print("Training Random Forest model...")
        model = RandomForestClassifier(
            n_estimators=200,
            max_depth=12,
            min_samples_split=5,
            min_samples_leaf=2,
            random_state=42,
            n_jobs=-1
        )
        
        model.fit(X_train, y_train)
        
        val_pred = model.predict_proba(X_val)[:, 1]
        auc_score = roc_auc_score(y_val, val_pred)
        
        print(f"\n✅ Model training completed!")
        print(f"Validation AUC: {auc_score:.4f}")
        
        feature_importance = pd.DataFrame({
            'feature': final_features,
            'importance': model.feature_importances_
        }).sort_values('importance', ascending=False)
        
        print(f"\nTop 10 most important features:")
        print(feature_importance.head(10))
        
        # --- FINAL PREDICTION ON TEST DATA ---
        print("\n=== FINAL PREDICTION ON TEST DATA ===")
        X_test = test_engineered[final_features].copy()
        
        # Apply same categorical encoding to test data
        if len(categorical_cols) > 0:
            for col in categorical_cols:
                if col in X_test.columns:
                    le = label_encoders[col]
                    # Handle unseen categories in test data
                    X_test[col] = X_test[col].astype(str)
                    test_categories = set(X_test[col].unique())
                    train_categories = set(le.classes_)
                    unseen_categories = test_categories - train_categories
                    
                    if unseen_categories:
                        print(f"Warning: Found {len(unseen_categories)} unseen categories in {col}")
                        # Map unseen categories to most frequent training category
                        most_frequent = le.classes_[0]  # First class (alphabetically)
                        X_test[col] = X_test[col].apply(lambda x: most_frequent if x in unseen_categories else x)
                    
                    X_test[col] = le.transform(X_test[col])
        
        test_pred = model.predict_proba(X_test)[:, 1]
        print(f"Test predictions generated. Shape: {test_pred.shape}")
        print(f"Prediction range: [{test_pred.min():.4f}, {test_pred.max():.4f}]")
        
        print("\n🎯 Use these predictions for Kaggle/test set submission only. Do not use for validation!")
        
    except Exception as e:
        print(f"❌ Model training failed: {e}")
        print("Pipeline completed data preparation successfully, but model training needs debugging.")
        import traceback
        traceback.print_exc()
        
else:
    print("⚠️ Skipping model training due to validation issues.")
    print("Focus on fixing data quality issues first.")

print("\n🎉 PIPELINE EXECUTION COMPLETED!")


=== STAGE 5: MODEL TRAINING ===
Encoding 4 categorical features with consistent mapping...
Encoding 4 categorical features with consistent mapping...
Training set: (616131, 100)
Validation set: (154033, 100)
Training Random Forest model...
Training set: (616131, 100)
Validation set: (154033, 100)
Training Random Forest model...

✅ Model training completed!
Validation AUC: 0.9241

Top 10 most important features:
               feature  importance
85                f366    0.093896
87                f132    0.077656
96                f134    0.054268
75              f219_y    0.047381
91  ctr_merchant_offer    0.046218
89                f138    0.043298
98                f354    0.038760
86                f137    0.037122
45                f206    0.036689
44                f207    0.035889

=== FINAL PREDICTION ON TEST DATA ===

✅ Model training completed!
Validation AUC: 0.9241

Top 10 most important features:
               feature  importance
85                f366    0.093896
87   

In [12]:
# --- GENERATE SUBMISSION FILE ---
print("\n=== GENERATING SUBMISSION FILE ===")

def validate_submission_format(submission_df, required_cols=['id1', 'id2', 'id3', 'id5', 'pred']):
    """Validate submission file format"""
    print("Validating submission format...")
    
    issues = []
    
    # Check required columns
    missing_cols = [col for col in required_cols if col not in submission_df.columns]
    if missing_cols:
        issues.append(f"Missing columns: {missing_cols}")
    
    # Check for null values
    null_counts = submission_df.isnull().sum()
    if null_counts.sum() > 0:
        issues.append(f"Null values found: {null_counts.to_dict()}")
    
    # Check prediction range
    if 'pred' in submission_df.columns:
        pred_min = submission_df['pred'].min()
        pred_max = submission_df['pred'].max()
        if pred_min < 0 or pred_max > 1:
            issues.append(f"Predictions out of range [0,1]: min={pred_min:.4f}, max={pred_max:.4f}")
    
    if issues:
        print("⚠️ Submission validation issues:")
        for issue in issues:
            print(f"  - {issue}")
        return False
    else:
        print("✅ Submission format validation passed")
        return True

def generate_map7_submission(test_data, predictions, output_path="submission.csv"):
    """
    Generate submission file optimized for MAP@7 evaluation
    """
    print("Generating MAP@7 optimized submission...")
    
    # Create submission DataFrame with required columns
    submission = pd.DataFrame()
    
    # Map columns according to submission template
    # Use test data columns if they exist, otherwise create defaults
    submission['id1'] = test_data.get('id1', range(len(predictions)))
    submission['id2'] = test_data.get('id2', range(len(predictions)))
    submission['id3'] = test_data.get('id3', range(len(predictions))) 
    submission['id5'] = test_data.get('id5', pd.Timestamp.now())
    submission['pred'] = predictions
    
    # Sort by customer (id2) and prediction score for MAP@7 optimization
    submission = submission.sort_values(['id2', 'pred'], ascending=[True, False])
    
    # Validate submission format
    is_valid = validate_submission_format(submission)
    
    if not is_valid:
        print("⚠️ Attempting to fix submission format issues...")
        # Fix any issues
        submission = submission.fillna(0)
        submission['pred'] = np.clip(submission['pred'], 0, 1)
    
    # Save submission
    submission.to_csv(output_path, index=False)
    print(f"✅ MAP@7 submission saved as {output_path}")
    print(f"Submission shape: {submission.shape}")
    
    # Display sample
    print("\nSample submission (top 10 rows):")
    print(submission.head(10))
    
    return submission

# Generate submission if we have predictions
if 'test_pred' in locals() and 'model' in locals():
    try:
        # Generate the submission file
        submission = generate_map7_submission(
            test_data=test_engineered,
            predictions=test_pred,
            output_path="submission.csv"
        )
        
        print(f"\n✅ Submission file created successfully!")
        print(f"Total predictions: {len(test_pred)}")
        print(f"Unique customers: {submission['id2'].nunique()}")
        print(f"Average predictions per customer: {len(submission) / submission['id2'].nunique():.2f}")
        
        # Additional MAP@7 optimization
        print("\n=== MAP@7 OPTIMIZATION REPORT ===")
        customer_offer_counts = submission.groupby('id2').size()
        print(f"Customers with offers: {len(customer_offer_counts)}")
        print(f"Max offers per customer: {customer_offer_counts.max()}")
        print(f"Min offers per customer: {customer_offer_counts.min()}")
        print(f"Customers with >=7 offers: {(customer_offer_counts >= 7).sum()}")
        
    except Exception as e:
        print(f"❌ Submission generation failed: {e}")
        print("Creating emergency submission...")
        import traceback
        traceback.print_exc()
        
        # Emergency submission
        try:
            emergency_submission = pd.DataFrame({
                'id1': [f"emergency_{i}" for i in range(len(test_pred))],
                'id2': range(len(test_pred)),
                'id3': range(len(test_pred)),
                'id5': pd.Timestamp.now(),
                'pred': test_pred
            })
            
            emergency_submission.to_csv("emergency_submission.csv", index=False)
            print("✅ Emergency submission saved as emergency_submission.csv")
            
        except Exception as emergency_error:
            print(f"❌ Emergency submission also failed: {emergency_error}")
            
else:
    print("❌ No predictions available. Model training may have failed.")
    print("Please check the model training section above.")



=== GENERATING SUBMISSION FILE ===
Generating MAP@7 optimized submission...
Validating submission format...
✅ Submission format validation passed
✅ MAP@7 submission saved as submission.csv
Submission shape: (369301, 5)

Sample submission (top 10 rows):
                                                   id1      id2       id3  \
82037      1000061_62395_16-23_2023-11-05 09:28:07.805  1000061     62395   
153767   1000061_5420674_16-23_2023-11-05 09:28:04.153  1000061   5420674   
52697     1000061_430736_16-23_2023-11-05 09:28:12.807  1000061    430736   
126858    1000061_803982_16-23_2023-11-05 09:47:52.857  1000061    803982   
167476  1000061_80107221_16-23_2023-11-05 09:28:12.577  1000061  80107221   
240303     1000061_97081_16-23_2023-11-05 09:28:08.795  1000061     97081   
279821  1000061_97904824_16-23_2023-11-05 09:28:08.601  1000061  97904824   
288480     1000061_31794_16-23_2023-11-05 09:47:54.852  1000061     31794   
8135      1000061_403431_16-23_2023-11-05 09:28:10.59

In [13]:
# Additional utility functions for better submission handling

def optimize_for_map7(predictions_df, customer_col='id2', offer_col='id3', pred_col='pred', top_k=7):
    """
    Optimize predictions for MAP@7 by ensuring each customer has top-k ranked offers
    """
    print(f"Optimizing predictions for MAP@{top_k}...")
    
    # Sort by customer and prediction score
    sorted_df = predictions_df.sort_values([customer_col, pred_col], ascending=[True, False])
    
    # Keep only top-k predictions per customer
    optimized_df = sorted_df.groupby(customer_col).head(top_k).reset_index(drop=True)
    
    print(f"Original predictions: {len(predictions_df)}")
    print(f"Optimized predictions: {len(optimized_df)}")
    print(f"Average offers per customer: {len(optimized_df) / predictions_df[customer_col].nunique():.2f}")
    
    return optimized_df

def create_sample_submission_check():
    """Create a sample submission to verify format"""
    sample_data = {
        'id1': ['1000061_31794_16-23_2023-11-05 09:47:54.852', '1000061_16099_16-23_2023-11-05 09:28:11.514'],
        'id2': [1000061, 1000061],
        'id3': [31794, 16099], 
        'id5': ['11/5/2023', '11/5/2023'],
        'pred': [0.85, 0.72]
    }
    
    sample_df = pd.DataFrame(sample_data)
    print("Sample submission format:")
    print(sample_df)
    return sample_df

# Create sample to verify format
sample = create_sample_submission_check()


Sample submission format:
                                           id1      id2    id3        id5  \
0  1000061_31794_16-23_2023-11-05 09:47:54.852  1000061  31794  11/5/2023   
1  1000061_16099_16-23_2023-11-05 09:28:11.514  1000061  16099  11/5/2023   

   pred  
0  0.85  
1  0.72  
