In [1]:
# Run this in your Python console to debug the model file directly

import os
import pickle
import numpy as np

# Check if model file exists and get info
model_path = "models/top_model_2_ExtraTreesRegressor.pkl"
print(f"1. Model file exists: {os.path.exists(model_path)}")

if os.path.exists(model_path):
    file_size = os.path.getsize(model_path)
    print(f"2. File size: {file_size} bytes")
    
    # Try to load the model step by step
    print("3. Attempting to load model...")
    try:
        with open(model_path, 'rb') as f:
            model = pickle.load(f)
        
        print(f"4. ✅ Model loaded successfully!")
        print(f"5. Model type: {type(model)}")
        print(f"6. Has predict method: {hasattr(model, 'predict')}")
        
        # Check if it's a PyCaret model
        if hasattr(model, 'named_steps'):
            print("7. This appears to be a Pipeline model")
            print(f"   Pipeline steps: {list(model.named_steps.keys())}")
        elif hasattr(model, '_final_estimator'):
            print("7. This appears to be a PyCaret model")
            print(f"   Final estimator: {type(model._final_estimator)}")
        else:
            print("7. This appears to be a direct sklearn model")
        
        # Try a simple prediction
        print("8. Testing prediction...")
        # Create dummy input with the right number of features
        n_features = 27  # Adjust based on your model
        dummy_input = np.random.rand(1, n_features)
        
        try:
            prediction = model.predict(dummy_input)
            print(f"9. ✅ Prediction successful: {prediction}")
        except Exception as pred_error:
            print(f"9. ❌ Prediction failed: {pred_error}")
            
    except Exception as e:
        print(f"4. ❌ Model loading failed: {e}")
        print(f"   Error type: {type(e).__name__}")
        
        # Check for specific error patterns
        error_str = str(e)
        if "cannot import name" in error_str:
            print("   → This looks like an import/module issue")
        elif "protocol" in error_str.lower():
            print("   → This looks like a pickle protocol issue")
        elif "sklearn" in error_str.lower():
            print("   → This looks like a sklearn-related issue")
        
else:
    print("Model file not found!")
    print("Available files in models folder:")
    if os.path.exists("models"):
        for file in os.listdir("models"):
            print(f"  - {file}")
    else:
        print("  Models folder doesn't exist!")

1. Model file exists: True
2. File size: 100440399 bytes
3. Attempting to load model...
4. ❌ Model loading failed: cannot import name '_print_elapsed_time' from 'sklearn.utils' (C:\Users\jdche\.conda\envs\pycaret311\Lib\site-packages\sklearn\utils\__init__.py)
   Error type: ImportError
   → This looks like an import/module issue


In [2]:
# Run this to check your environment details

import sys
import sklearn
print(f"Python version: {sys.version}")
print(f"Scikit-learn version: {sklearn.__version__}")

try:
    import pycaret
    print(f"PyCaret version: {pycaret.__version__}")
    print("PyCaret is available")
except ImportError as e:
    print(f"PyCaret not available: {e}")

try:
    import joblib
    print(f"Joblib version: {joblib.__version__}")
except ImportError:
    print("Joblib not available")

# Check if the specific sklearn utility exists
try:
    from sklearn.utils import _print_elapsed_time
    print("✅ _print_elapsed_time is available")
except ImportError as e:
    print(f"❌ _print_elapsed_time not available: {e}")
    print("This might be the issue!")

# Check sklearn.utils contents
print("\nAvailable in sklearn.utils:")
import sklearn.utils
attrs = [attr for attr in dir(sklearn.utils) if not attr.startswith('_')]
print(attrs[:10])  # Show first 10 attributes

Python version: 3.11.11 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:34:19) [MSC v.1929 64 bit (AMD64)]
Scikit-learn version: 1.6.1
PyCaret version: 3.3.2
PyCaret is available
Joblib version: 1.3.2
❌ _print_elapsed_time not available: cannot import name '_print_elapsed_time' from 'sklearn.utils' (C:\Users\jdche\.conda\envs\pycaret311\Lib\site-packages\sklearn\utils\__init__.py)
This might be the issue!

Available in sklearn.utils:


In [3]:
# Run this to investigate all files in your models folder

import os
import pickle
import numpy as np

def investigate_models_folder():
    """Investigate all files in the models folder to find the actual model"""
    
    models_dir = "models"
    
    if not os.path.exists(models_dir):
        print("❌ Models folder doesn't exist!")
        return
    
    print(f"📁 Investigating models folder: {models_dir}")
    print("=" * 60)
    
    files = os.listdir(models_dir)
    print(f"Found {len(files)} files:")
    
    for file in files:
        file_path = os.path.join(models_dir, file)
        file_size = os.path.getsize(file_path)
        
        print(f"\n📄 File: {file}")
        print(f"   Size: {file_size:,} bytes ({file_size/1024/1024:.1f} MB)")
        
        # Try to determine what's in each file
        if file.endswith('.pkl'):
            try:
                with open(file_path, 'rb') as f:
                    obj = pickle.load(f)
                
                obj_type = type(obj)
                print(f"   Type: {obj_type}")
                
                # Analyze based on type
                if isinstance(obj, np.ndarray):
                    print(f"   Array shape: {obj.shape}")
                    print(f"   Array dtype: {obj.dtype}")
                    if obj.size < 20:
                        print(f"   Sample values: {obj.flatten()[:10]}")
                    
                elif hasattr(obj, 'predict'):
                    print(f"   ✅ This looks like a model! Has predict method")
                    if hasattr(obj, 'fit'):
                        print(f"   ✅ Also has fit method")
                    if hasattr(obj, '_final_estimator'):
                        print(f"   🎯 PyCaret model detected")
                        print(f"       Final estimator: {type(obj._final_estimator)}")
                    if hasattr(obj, 'named_steps'):
                        print(f"   🎯 Pipeline detected")
                        print(f"       Steps: {list(obj.named_steps.keys())}")
                    
                elif isinstance(obj, dict):
                    print(f"   Dictionary with {len(obj)} keys")
                    print(f"   Keys: {list(obj.keys())[:5]}...")
                    
                elif isinstance(obj, (list, tuple)):
                    print(f"   {obj_type.__name__} with {len(obj)} items")
                    if len(obj) > 0:
                        print(f"   First item type: {type(obj[0])}")
                        
                else:
                    print(f"   Other object: {obj_type}")
                    if hasattr(obj, '__dict__'):
                        attrs = [attr for attr in dir(obj) if not attr.startswith('_')]
                        print(f"   Attributes: {attrs[:10]}...")
                
            except Exception as e:
                print(f"   ❌ Error loading: {e}")
        
        elif file.endswith('.joblib'):
            try:
                import joblib
                obj = joblib.load(file_path)
                print(f"   Type: {type(obj)}")
                if hasattr(obj, 'predict'):
                    print(f"   ✅ This looks like a model!")
            except Exception as e:
                print(f"   ❌ Error loading joblib: {e}")
        
        else:
            print(f"   Not a pickle/joblib file")
    
    print("\n" + "=" * 60)
    print("🔍 Analysis Summary:")
    print("Looking for files that contain actual models (objects with 'predict' method)...")

if __name__ == "__main__":
    investigate_models_folder()

📁 Investigating models folder: models
Found 7 files:

📄 File: others
   Size: 4,096 bytes (0.0 MB)
   Not a pickle/joblib file

📄 File: pycaret_processed_features_before_model_training.csv
   Size: 33,520 bytes (0.0 MB)
   Not a pickle/joblib file

📄 File: pycaret_processed_target_before_model_training.csv
   Size: 416 bytes (0.0 MB)
   Not a pickle/joblib file

📄 File: standard_scaler.pkl
   Size: 14,119 bytes (0.0 MB)
   Type: <class 'numpy.ndarray'>
   Array shape: (182,)
   Array dtype: object

📄 File: top_model_1_GradientBoostingRegressor.pkl
   Size: 275,322 bytes (0.3 MB)
   ❌ Error loading: cannot import name '_print_elapsed_time' from 'sklearn.utils' (C:\Users\jdche\.conda\envs\pycaret311\Lib\site-packages\sklearn\utils\__init__.py)

📄 File: top_model_2_ExtraTreesRegressor.pkl
   Size: 100,440,399 bytes (95.8 MB)
   ❌ Error loading: cannot import name '_print_elapsed_time' from 'sklearn.utils' (C:\Users\jdche\.conda\envs\pycaret311\Lib\site-packages\sklearn\utils\__init__.py)


In [4]:
# fix_all_models.py - Extract all your models with compatibility fix

import pickle
import numpy as np
import sklearn.utils
import os
import sys

def setup_sklearn_compatibility():
    """Setup compatibility shim for sklearn 1.6.1"""
    
    def mock_print_elapsed_time(func):
        """Mock version of the removed sklearn function"""
        def wrapper(*args, **kwargs):
            return func(*args, **kwargs)
        return wrapper
    
    # Inject into multiple possible locations
    sklearn.utils._print_elapsed_time = mock_print_elapsed_time
    
    # Also add to sys.modules to catch all imports
    if 'sklearn.utils' in sys.modules:
        sys.modules['sklearn.utils']._print_elapsed_time = mock_print_elapsed_time
    
    print("✅ Sklearn compatibility shim installed")

def extract_model(file_path, model_name):
    """Extract a single model with error handling"""
    
    print(f"\n🔄 Processing: {model_name}")
    print(f"   File: {file_path}")
    
    file_size = os.path.getsize(file_path)
    print(f"   Size: {file_size:,} bytes ({file_size/1024/1024:.1f} MB)")
    
    try:
        # Load the model
        with open(file_path, 'rb') as f:
            model = pickle.load(f)
        
        print(f"   ✅ Loaded successfully!")
        print(f"   Type: {type(model)}")
        
        # Extract core model if needed
        core_model = model
        extraction_info = "Direct model"
        
        if hasattr(model, '_final_estimator'):
            core_model = model._final_estimator
            extraction_info = f"Extracted from PyCaret wrapper: {type(core_model)}"
        elif hasattr(model, 'named_steps'):
            # Find the main estimator in pipeline
            for step_name, step in model.named_steps.items():
                if hasattr(step, 'predict') and hasattr(step, 'fit'):
                    if any(keyword in str(type(step)).lower() for keyword in ['tree', 'forest', 'boost', 'regressor']):
                        core_model = step
                        extraction_info = f"Extracted from pipeline step '{step_name}': {type(core_model)}"
                        break
        
        print(f"   🎯 {extraction_info}")
        
        # Test the model
        print(f"   🧪 Testing model...")
        
        # Determine number of features
        n_features = 27  # Default
        if hasattr(core_model, 'n_features_in_'):
            n_features = core_model.n_features_in_
        elif hasattr(core_model, 'feature_importances_'):
            n_features = len(core_model.feature_importances_)
        
        print(f"   Features expected: {n_features}")
        
        # Test prediction
        test_input = np.random.rand(1, n_features)
        prediction = core_model.predict(test_input)
        print(f"   ✅ Test prediction: {prediction[0]:.2f}")
        
        # Save the fixed model
        safe_name = model_name.lower().replace(' ', '_').replace('.pkl', '')
        new_file_path = f"models/fixed_{safe_name}.pkl"
        
        with open(new_file_path, 'wb') as f:
            pickle.dump(core_model, f)
        
        print(f"   💾 Saved to: {new_file_path}")
        
        # Verify the saved model
        with open(new_file_path, 'rb') as f:
            test_model = pickle.load(f)
        
        test_pred = test_model.predict(test_input)
        print(f"   ✅ Verification successful: {test_pred[0]:.2f}")
        
        return {
            'original_name': model_name,
            'new_path': new_file_path,
            'model_type': str(type(core_model)),
            'features': n_features,
            'test_prediction': prediction[0],
            'success': True
        }
        
    except Exception as e:
        print(f"   ❌ Failed: {e}")
        return {
            'original_name': model_name,
            'success': False,
            'error': str(e)
        }

def fix_all_models():
    """Fix all models in the models folder"""
    
    print("🚀 Starting batch model extraction...")
    print("=" * 70)
    
    # Setup compatibility
    setup_sklearn_compatibility()
    
    # Define models to process
    models_to_process = [
        ("top_model_1_GradientBoostingRegressor.pkl", "Gradient Boosting Regressor"),
        ("top_model_2_ExtraTreesRegressor.pkl", "Extra Trees Regressor"),
        ("top_model_3_RandomForestRegressor.pkl", "Random Forest Regressor")
    ]
    
    results = []
    
    for file_name, display_name in models_to_process:
        file_path = os.path.join("models", file_name)
        
        if os.path.exists(file_path):
            result = extract_model(file_path, display_name)
            results.append(result)
        else:
            print(f"\n❌ File not found: {file_path}")
            results.append({
                'original_name': display_name,
                'success': False,
                'error': 'File not found'
            })
    
    # Summary
    print("\n" + "=" * 70)
    print("📊 EXTRACTION SUMMARY")
    print("=" * 70)
    
    successful = [r for r in results if r['success']]
    failed = [r for r in results if not r['success']]
    
    print(f"✅ Successfully extracted: {len(successful)} models")
    print(f"❌ Failed: {len(failed)} models")
    
    if successful:
        print(f"\n🎉 SUCCESSFULLY EXTRACTED MODELS:")
        for result in successful:
            print(f"   📄 {result['original_name']}")
            print(f"      → {result['new_path']}")
            print(f"      → Type: {result['model_type']}")
            print(f"      → Features: {result['features']}")
            print(f"      → Test prediction: {result['test_prediction']:.2f}")
    
    if failed:
        print(f"\n❌ FAILED EXTRACTIONS:")
        for result in failed:
            print(f"   📄 {result['original_name']}: {result['error']}")
    
    print(f"\n🎯 NEXT STEPS:")
    print(f"1. Refresh your Streamlit app")
    print(f"2. Look for these new models in the dropdown:")
    for result in successful:
        model_name = os.path.basename(result['new_path']).replace('.pkl', '').replace('_', ' ').title()
        print(f"   - '{model_name}'")
    print(f"3. Select a model and try making predictions!")
    
    return successful, failed

if __name__ == "__main__":
    successful, failed = fix_all_models()
    
    if successful:
        print(f"\n🌟 SUCCESS! Extracted {len(successful)} models ready for use!")
    else:
        print(f"\n💔 No models were successfully extracted.")
        print(f"   You may need to retrain your models with sklearn 1.6.1")

🚀 Starting batch model extraction...
✅ Sklearn compatibility shim installed

🔄 Processing: Gradient Boosting Regressor
   File: models\top_model_1_GradientBoostingRegressor.pkl
   Size: 275,322 bytes (0.3 MB)
   ✅ Loaded successfully!
   Type: <class 'numpy.ndarray'>
   🎯 Direct model
   🧪 Testing model...
   Features expected: 27
   ❌ Failed: 'numpy.ndarray' object has no attribute 'predict'

🔄 Processing: Extra Trees Regressor
   File: models\top_model_2_ExtraTreesRegressor.pkl
   Size: 100,440,399 bytes (95.8 MB)
   ✅ Loaded successfully!
   Type: <class 'numpy.ndarray'>
   🎯 Direct model
   🧪 Testing model...
   Features expected: 27
   ❌ Failed: 'numpy.ndarray' object has no attribute 'predict'

🔄 Processing: Random Forest Regressor
   File: models\top_model_3_RandomForestRegressor.pkl
   Size: 63,474,591 bytes (60.5 MB)
   ✅ Loaded successfully!
   Type: <class 'numpy.ndarray'>
   🎯 Direct model
   🧪 Testing model...
   Features expected: 27
   ❌ Failed: 'numpy.ndarray' object ha