<a href="https://colab.research.google.com/github/ericyoc/nn_model_gestalt_reporter_poc/blob/main/model_gestalt_report_poc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
#!pip install huggingface_hub requests

# Optional but recommended for the full script
#!pip install pathlib2

In [8]:
"""
Hugging Face Model Downloader for Reverse Engineering
Downloads different types of models to /content/ for analysis
"""

import os
import requests
import json
from pathlib import Path
from huggingface_hub import hf_hub_download, snapshot_download
import warnings

warnings.filterwarnings('ignore')

def install_requirements():
    """Install required packages"""
    import subprocess
    import sys

    packages = ['huggingface_hub', 'requests']

    for package in packages:
        try:
            __import__(package.replace('-', '_'))
            print(f"[OK] {package} already installed")
        except ImportError:
            print(f"[INSTALL] Installing {package}...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '-q'])

class HuggingFaceModelDownloader:
    def __init__(self, download_dir="/content"):
        self.download_dir = Path(download_dir)
        self.download_dir.mkdir(exist_ok=True)

        # Curated list of different model types for reverse engineering
        self.model_catalog = {
            'image_classification': {
                'description': 'Image Classification Models',
                'models': [
                    {
                        'repo_id': 'microsoft/resnet-50',
                        'filename': 'pytorch_model.bin',
                        'local_name': 'resnet50_imagenet.bin',
                        'format': 'pytorch',
                        'dataset': 'ImageNet',
                        'size': 'Medium'
                    },
                    {
                        'repo_id': 'google/vit-base-patch16-224',
                        'filename': 'pytorch_model.bin',
                        'local_name': 'vit_base_imagenet.bin',
                        'format': 'pytorch',
                        'dataset': 'ImageNet',
                        'size': 'Large'
                    }
                ]
            },
            'text_classification': {
                'description': 'Text Classification Models',
                'models': [
                    {
                        'repo_id': 'cardiffnlp/twitter-roberta-base-sentiment-latest',
                        'filename': 'pytorch_model.bin',
                        'local_name': 'twitter_sentiment.bin',
                        'format': 'pytorch',
                        'dataset': 'Twitter',
                        'size': 'Medium'
                    },
                    {
                        'repo_id': 'distilbert-base-uncased-finetuned-sst-2-english',
                        'filename': 'pytorch_model.bin',
                        'local_name': 'distilbert_sst2.bin',
                        'format': 'pytorch',
                        'dataset': 'SST-2',
                        'size': 'Small'
                    }
                ]
            },
            'object_detection': {
                'description': 'Object Detection Models',
                'models': [
                    {
                        'repo_id': 'microsoft/table-transformer-detection',
                        'filename': 'pytorch_model.bin',
                        'local_name': 'table_detection.bin',
                        'format': 'pytorch',
                        'dataset': 'Custom',
                        'size': 'Medium'
                    }
                ]
            },
            'onnx_models': {
                'description': 'ONNX Format Models',
                'models': [
                    {
                        'repo_id': 'onnx/models',
                        'filename': 'vision/classification/mnist/model/mnist-8.onnx',
                        'local_name': 'mnist_digit_classifier.onnx',
                        'format': 'onnx',
                        'dataset': 'MNIST',
                        'size': 'Small'
                    },
                    {
                        'repo_id': 'onnx/models',
                        'filename': 'vision/classification/resnet/model/resnet50-v1-7.onnx',
                        'local_name': 'resnet50_imagenet.onnx',
                        'format': 'onnx',
                        'dataset': 'ImageNet',
                        'size': 'Large'
                    }
                ]
            },
            'audio_models': {
                'description': 'Audio Processing Models',
                'models': [
                    {
                        'repo_id': 'facebook/wav2vec2-base-960h',
                        'filename': 'pytorch_model.bin',
                        'local_name': 'wav2vec2_speech.bin',
                        'format': 'pytorch',
                        'dataset': 'LibriSpeech',
                        'size': 'Large'
                    }
                ]
            },
            'tensorflow_models': {
                'description': 'TensorFlow/Keras Models',
                'models': [
                    {
                        'repo_id': 'tensorflow/mobilenet_v2_1.4_224',
                        'filename': 'tf_model.h5',
                        'local_name': 'mobilenet_v2.h5',
                        'format': 'tensorflow',
                        'dataset': 'ImageNet',
                        'size': 'Medium'
                    }
                ]
            }
        }

    def list_available_models(self):
        """Display all available models"""
        print("="*80)
        print("AVAILABLE MODELS FOR DOWNLOAD")
        print("="*80)

        total_models = 0
        for category, info in self.model_catalog.items():
            print(f"\n{info['description']}:")
            print("-" * 50)

            for i, model in enumerate(info['models']):
                total_models += 1
                print(f"  {total_models}. {model['local_name']}")
                print(f"     Format: {model['format'].upper()}")
                print(f"     Dataset: {model['dataset']}")
                print(f"     Size: {model['size']}")
                print(f"     Repository: {model['repo_id']}")
                print()

        return total_models

    def download_model(self, repo_id, filename, local_name, format_type):
        """Download a specific model"""
        local_path = self.download_dir / local_name

        print(f"[DOWNLOAD] Starting download of {local_name}...")
        print(f"[INFO] Repository: {repo_id}")
        print(f"[INFO] File: {filename}")
        print(f"[INFO] Format: {format_type}")

        try:
            # Download the file
            downloaded_path = hf_hub_download(
                repo_id=repo_id,
                filename=filename,
                local_dir=str(self.download_dir),
                local_dir_use_symlinks=False
            )

            # Rename to our preferred name
            if Path(downloaded_path) != local_path:
                Path(downloaded_path).rename(local_path)

            # Get file size
            file_size = local_path.stat().st_size / (1024 * 1024)  # MB

            print(f"[SUCCESS] Downloaded: {local_name} ({file_size:.2f} MB)")
            return True

        except Exception as e:
            print(f"[ERROR] Failed to download {local_name}: {str(e)}")
            return False

    def download_custom_onnx_models(self):
        """Download some basic ONNX models manually"""
        print("[INFO] Downloading basic ONNX models...")

        # MNIST model from ONNX model zoo
        onnx_models = [
            {
                'url': 'https://github.com/onnx/models/raw/main/vision/classification/mnist/model/mnist-8.onnx',
                'filename': 'mnist_handwritten_digits.onnx'
            },
            {
                'url': 'https://github.com/onnx/models/raw/main/vision/classification/mnist/model/mnist-1.onnx',
                'filename': 'mnist_simple.onnx'
            }
        ]

        for model in onnx_models:
            try:
                print(f"[DOWNLOAD] {model['filename']}...")
                response = requests.get(model['url'], stream=True)
                response.raise_for_status()

                local_path = self.download_dir / model['filename']
                with open(local_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)

                file_size = local_path.stat().st_size / (1024 * 1024)
                print(f"[SUCCESS] Downloaded: {model['filename']} ({file_size:.2f} MB)")

            except Exception as e:
                print(f"[ERROR] Failed to download {model['filename']}: {e}")

    def create_sample_sklearn_models(self):
        """Create sample scikit-learn models"""
        print("[INFO] Creating sample scikit-learn models...")

        try:
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.linear_model import LogisticRegression
            from sklearn.svm import SVC
            from sklearn.datasets import make_classification
            import pickle

            # Generate sample data
            X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

            # Create different model types
            models = [
                ('random_forest_classifier.pkl', RandomForestClassifier(n_estimators=100, random_state=42)),
                ('logistic_regression.pkl', LogisticRegression(random_state=42)),
                ('svm_classifier.pkl', SVC(kernel='rbf', random_state=42))
            ]

            for filename, model in models:
                # Train the model
                model.fit(X, y)

                # Save to pickle
                model_path = self.download_dir / filename
                with open(model_path, 'wb') as f:
                    pickle.dump(model, f)

                file_size = model_path.stat().st_size / 1024  # KB
                print(f"[SUCCESS] Created: {filename} ({file_size:.2f} KB)")

        except ImportError:
            print("[WARNING] scikit-learn not available, skipping sklearn models")
        except Exception as e:
            print(f"[ERROR] Failed to create sklearn models: {e}")

    def download_interactive(self):
        """Interactive download interface"""
        total_models = self.list_available_models()

        print("="*80)
        print("DOWNLOAD OPTIONS")
        print("="*80)
        print("1. Download ALL models (may take a while)")
        print("2. Download by category")
        print("3. Download specific models")
        print("4. Download basic ONNX models only")
        print("5. Create sample sklearn models only")
        print("6. Quick start pack (small models)")

        try:
            choice = input("\nSelect option (1-6): ").strip()
        except KeyboardInterrupt:
            print("\nDownload cancelled.")
            return

        if choice == '1':
            self.download_all_models()
        elif choice == '2':
            self.download_by_category()
        elif choice == '3':
            self.download_specific_models()
        elif choice == '4':
            self.download_custom_onnx_models()
        elif choice == '5':
            self.create_sample_sklearn_models()
        elif choice == '6':
            self.download_quick_start()
        else:
            print("Invalid choice. Downloading quick start pack...")
            self.download_quick_start()

    def download_quick_start(self):
        """Download a small set of models for quick testing"""
        print("[INFO] Downloading quick start pack...")

        # Download basic ONNX models
        self.download_custom_onnx_models()

        # Create sklearn models
        self.create_sample_sklearn_models()

        # Try to download one small PyTorch model
        try:
            self.download_model(
                'distilbert-base-uncased-finetuned-sst-2-english',
                'pytorch_model.bin',
                'distilbert_sentiment.bin',
                'pytorch'
            )
        except:
            print("[WARNING] Could not download PyTorch model")

        print("\n[SUCCESS] Quick start pack downloaded!")

    def download_all_models(self):
        """Download all models in catalog"""
        print("[INFO] Downloading ALL models... This may take a while!")

        success_count = 0
        total_count = 0

        for category, info in self.model_catalog.items():
            print(f"\n[CATEGORY] {info['description']}")
            for model in info['models']:
                total_count += 1
                if self.download_model(
                    model['repo_id'],
                    model['filename'],
                    model['local_name'],
                    model['format']
                ):
                    success_count += 1

        # Also download custom models
        self.download_custom_onnx_models()
        self.create_sample_sklearn_models()

        print(f"\n[COMPLETE] Downloaded {success_count}/{total_count} models from catalog")

    def download_by_category(self):
        """Download models by category"""
        print("\nAvailable Categories:")
        categories = list(self.model_catalog.keys())

        for i, (category, info) in enumerate(self.model_catalog.items()):
            print(f"  {i+1}. {info['description']} ({len(info['models'])} models)")

        try:
            choice = int(input(f"\nSelect category (1-{len(categories)}): ")) - 1
            selected_category = categories[choice]

            print(f"\n[INFO] Downloading {self.model_catalog[selected_category]['description']}...")

            for model in self.model_catalog[selected_category]['models']:
                self.download_model(
                    model['repo_id'],
                    model['filename'],
                    model['local_name'],
                    model['format']
                )

        except (ValueError, IndexError):
            print("Invalid selection.")

    def show_download_summary(self):
        """Show what was downloaded"""
        print("\n" + "="*80)
        print("DOWNLOAD SUMMARY")
        print("="*80)

        # List all model files in download directory
        extensions = ['.onnx', '.h5', '.hdf5', '.bin', '.pkl', '.pt', '.pth']
        found_models = []

        for ext in extensions:
            found_models.extend(list(self.download_dir.glob(f"*{ext}")))

        if found_models:
            print(f"Found {len(found_models)} model files in {self.download_dir}:")
            print()

            total_size = 0
            for model_file in found_models:
                size_mb = model_file.stat().st_size / (1024 * 1024)
                total_size += size_mb
                print(f"  {model_file.name:<40} {size_mb:>8.2f} MB")

            print(f"\nTotal size: {total_size:.2f} MB")
            print(f"\nYou can now use the reverse engineering script to analyze these models!")
        else:
            print("No model files found. Try downloading some models first.")

def main():
    """Main function"""
    print("HUGGING FACE MODEL DOWNLOADER")
    print("Downloads various ML models for reverse engineering analysis")
    print("="*60)

    # Install requirements
    install_requirements()

    # Create downloader
    downloader = HuggingFaceModelDownloader()

    # Check if models already exist
    existing_models = list(Path("/content").glob("*.onnx")) + list(Path("/content").glob("*.h5")) + list(Path("/content").glob("*.pkl"))

    if existing_models:
        print(f"\n[INFO] Found {len(existing_models)} existing model files:")
        for model in existing_models[:5]:  # Show first 5
            print(f"  - {model.name}")
        if len(existing_models) > 5:
            print(f"  ... and {len(existing_models) - 5} more")

        overwrite = input("\nDownload additional models? (y/n): ").lower().strip()
        if overwrite != 'y':
            print("Exiting...")
            return

    # Run interactive download
    try:
        downloader.download_interactive()

        # Show summary
        downloader.show_download_summary()

        print("\n" + "="*60)
        print("NEXT STEPS:")
        print("1. Run the model analysis script")
        print("2. Select a model to analyze")
        print("3. View the reverse engineering results!")
        print("="*60)

    except KeyboardInterrupt:
        print("\n\nDownload interrupted by user.")
    except Exception as e:
        print(f"\nError during download: {e}")

if __name__ == "__main__":
    main()

HUGGING FACE MODEL DOWNLOADER
Downloads various ML models for reverse engineering analysis
[OK] huggingface_hub already installed
[OK] requests already installed

[INFO] Found 2 existing model files:
  - mnist-8.onnx
  - mnist-8-enhanced.onnx

Download additional models? (y/n): y
AVAILABLE MODELS FOR DOWNLOAD

Image Classification Models:
--------------------------------------------------
  1. resnet50_imagenet.bin
     Format: PYTORCH
     Dataset: ImageNet
     Size: Medium
     Repository: microsoft/resnet-50

  2. vit_base_imagenet.bin
     Format: PYTORCH
     Dataset: ImageNet
     Size: Large
     Repository: google/vit-base-patch16-224


Text Classification Models:
--------------------------------------------------
  3. twitter_sentiment.bin
     Format: PYTORCH
     Dataset: Twitter
     Size: Medium
     Repository: cardiffnlp/twitter-roberta-base-sentiment-latest

  4. distilbert_sst2.bin
     Format: PYTORCH
     Dataset: SST-2
     Size: Small
     Repository: distilbert-b

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

[SUCCESS] Downloaded: distilbert_sentiment.bin (255.44 MB)

[SUCCESS] Quick start pack downloaded!

DOWNLOAD SUMMARY
Found 8 model files in /content:

  mnist-8.onnx                                 0.03 MB
  mnist-8-enhanced.onnx                        1.65 MB
  distilbert_sentiment.bin                   255.44 MB
  vit_base_imagenet.bin                      330.31 MB
  resnet50_imagenet.bin                       97.82 MB
  logistic_regression.pkl                      0.00 MB
  svm_classifier.pkl                           0.08 MB
  random_forest_classifier.pkl                 1.26 MB

Total size: 686.58 MB

You can now use the reverse engineering script to analyze these models!

NEXT STEPS:
1. Run the model analysis script
2. Select a model to analyze
3. View the reverse engineering results!


In [17]:
#!pip install psutil

In [13]:
import os
import warnings
from pathlib import Path
import datetime
import time
import psutil
import numpy as np
import json

warnings.filterwarnings('ignore')

def analyze_model_file(model_path):
    """Enhanced model file analyzer with AI developer insights"""
    print("="*80)
    print("COMPREHENSIVE AI MODEL ANALYSIS")
    print("="*80)

    # Basic file info
    file_path = Path(model_path)
    file_stat = file_path.stat()
    file_size_mb = file_stat.st_size / (1024 * 1024)

    print(f"File Name:           {file_path.name}")
    print(f"File Size:           {file_size_mb:.2f} MB")
    print(f"File Extension:      {file_path.suffix}")
    print(f"File Type:           {file_path.suffix.upper()[1:]} Model")
    print(f"Modified:            {datetime.datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Memory Footprint:    ~{file_size_mb * 1.2:.2f} MB (estimated in RAM)")
    print("-" * 80)

    # Try to load and analyze based on extension
    ext = file_path.suffix.lower()

    try:
        if ext == '.onnx':
            analyze_onnx_model(model_path)
        elif ext in ['.h5', '.hdf5']:
            analyze_keras_model(model_path)
        elif ext in ['.pt', '.pth', '.bin']:
            analyze_pytorch_model(model_path)
        elif ext in ['.pkl', '.joblib']:
            analyze_sklearn_model(model_path)
        else:
            print(f"Model type {ext} not yet supported")
            print("Supported formats: .onnx, .h5, .hdf5, .pt, .pth, .bin, .pkl, .joblib")

    except Exception as e:
        print(f"Error analyzing model: {e}")
        print("Basic file analysis completed.")

def calculate_flops_estimate(model_info):
    """Estimate FLOPS based on model architecture"""
    flops = 0

    if 'layers' in model_info:
        for layer in model_info['layers']:
            layer_type = layer.get('type', '').lower()
            params = layer.get('params', 0)

            if 'conv' in layer_type:
                # Rough estimate: 2 * params for conv layers
                flops += params * 2
            elif 'dense' in layer_type or 'linear' in layer_type:
                # Dense layer: 2 * input_size * output_size
                flops += params * 2
            elif 'attention' in layer_type:
                # Attention is computationally expensive
                flops += params * 4

    elif 'operations' in model_info:
        # ONNX model estimation
        op_flops = {
            'Conv': 1000000,  # ~1M FLOPS per conv
            'MatMul': 500000,  # ~500K FLOPS per matmul
            'Add': 1000,
            'Relu': 1000,
            'Softmax': 10000
        }

        for op_type, count in model_info.get('operation_types', {}).items():
            flops += op_flops.get(op_type, 1000) * count

    return flops

def estimate_hardware_requirements(model_size_mb, total_params, model_type):
    """Estimate hardware requirements"""
    requirements = {
        'minimum_ram': f"{max(4, model_size_mb * 2):.0f} GB",
        'recommended_ram': f"{max(8, model_size_mb * 4):.0f} GB",
        'gpu_memory': f"{max(2, model_size_mb * 1.5):.0f} GB",
        'cpu_cores': 4 if total_params > 100000000 else 2,
        'storage': f"{model_size_mb * 1.2:.0f} MB"
    }

    # Adjust based on model type
    if model_type in ['transformer', 'bert', 'gpt']:
        requirements['gpu_memory'] = f"{max(6, model_size_mb * 2):.0f} GB"
        requirements['minimum_ram'] = f"{max(16, model_size_mb * 3):.0f} GB"

    return requirements

def analyze_security_risks(model_info, filename):
    """Analyze potential security and robustness issues"""
    risks = []
    recommendations = []

    # Check for common vulnerabilities
    if 'total_params' in model_info:
        params = model_info['total_params']
        if params > 1000000000:  # > 1B parameters
            risks.append("Large model - potential for adversarial attacks")
            recommendations.append("Implement input validation and adversarial training")

    # Check model type risks
    filename_lower = filename.lower()
    if 'bert' in filename_lower or 'gpt' in filename_lower:
        risks.append("Language model - risk of generating harmful content")
        recommendations.append("Implement content filtering and safety checks")

    if 'vision' in filename_lower or 'image' in filename_lower:
        risks.append("Vision model - susceptible to image-based attacks")
        recommendations.append("Add input preprocessing and validation")

    # Generic recommendations
    recommendations.extend([
        "Monitor model outputs for unexpected behavior",
        "Implement confidence thresholds",
        "Log all inference requests for audit trail"
    ])

    return risks, recommendations

def generate_deployment_checklist(model_info, filename):
    """Generate deployment readiness checklist"""
    checklist = {
        'model_validation': [
            "Test model loading and inference",
            "Validate input/output shapes",
            "Check numerical stability",
            "Verify expected performance"
        ],
        'infrastructure': [
            "Set up monitoring and logging",
            "Configure auto-scaling",
            "Test backup and recovery",
            "Set up health checks"
        ],
        'security': [
            "Input validation implementation",
            "Rate limiting configuration",
            "Authentication setup",
            "Audit logging enabled"
        ],
        'performance': [
            "Benchmark inference speed",
            "Memory usage profiling",
            "Load testing completed",
            "Optimization applied (if needed)"
        ]
    }

    return checklist

def analyze_onnx_model(model_path):
    """Enhanced ONNX model analysis"""
    print("ONNX MODEL ANALYSIS")
    print("-" * 80)

    try:
        import onnx
        model = onnx.load(model_path)

        # Basic model info
        producer = getattr(model, 'producer_name', 'Unknown')
        producer_version = getattr(model, 'producer_version', 'Unknown')
        onnx_version = getattr(model, 'model_version', 'Unknown')

        print(f"Producer:            {producer}")
        print(f"Producer Version:    {producer_version}")
        print(f"ONNX Version:        {onnx_version}")
        print(f"Training Framework:  {producer} {producer_version}")

        # Graph analysis
        graph = model.graph
        graph_name = getattr(graph, 'name', 'Unnamed')
        print(f"Graph Name:          {graph_name}")

        # Parameter analysis
        total_params = 0
        param_details = []
        weight_tensors = 0

        if hasattr(graph, 'initializer'):
            for init in graph.initializer:
                if hasattr(init, 'dims'):
                    param_count = 1
                    for dim in init.dims:
                        param_count *= dim
                    total_params += param_count
                    weight_tensors += 1

                    param_details.append({
                        'name': init.name,
                        'shape': list(init.dims),
                        'count': param_count,
                        'type': 'weight' if 'weight' in init.name.lower() else 'bias'
                    })

        print(f"Total Parameters:    {total_params:,}")
        print(f"Weight Tensors:      {weight_tensors}")

        # Memory analysis
        model_size_mb = Path(model_path).stat().st_size / (1024 * 1024)
        params_memory_mb = (total_params * 4) / (1024 * 1024)  # Assuming float32
        overhead_mb = model_size_mb - params_memory_mb

        print(f"Parameters Memory:   {params_memory_mb:.2f} MB")
        print(f"Model Overhead:      {overhead_mb:.2f} MB")

        # Operations analysis
        num_operations = len(graph.node) if hasattr(graph, 'node') else 0
        print(f"Total Operations:    {num_operations}")

        op_types = {}
        computational_ops = 0

        if hasattr(graph, 'node') and graph.node:
            for node in graph.node:
                op_type = getattr(node, 'op_type', 'Unknown')
                op_types[op_type] = op_types.get(op_type, 0) + 1

                # Count computationally intensive operations
                if op_type in ['Conv', 'MatMul', 'Gemm', 'ConvTranspose']:
                    computational_ops += 1

            print(f"Compute-heavy Ops:   {computational_ops}")

            print("\nOperation Breakdown:")
            for op_type, count in sorted(op_types.items()):
                percentage = (count / num_operations) * 100
                print(f"  {op_type:<20} {count:>3} ({percentage:>5.1f}%)")

        # I/O Analysis with detailed shapes
        analyze_model_io(graph)

        # Performance estimates
        flops = calculate_flops_estimate({'operation_types': op_types})
        print(f"\nPERFORMANCE ESTIMATES")
        print("-" * 80)
        print(f"Estimated FLOPS:     {flops:,}")
        print(f"Inference Speed:     ~{1000/max(flops/1e9, 0.001):.1f} FPS (estimated)")

        # Hardware requirements
        requirements = estimate_hardware_requirements(model_size_mb, total_params, 'onnx')
        print(f"\nHARDWARE REQUIREMENTS")
        print("-" * 80)
        for req, value in requirements.items():
            print(f"{req.replace('_', ' ').title():<20} {value}")

        # Deployment information
        print_deployment_info(model_size_mb, total_params, op_types)

        # Security analysis
        risks, recommendations = analyze_security_risks(
            {'total_params': total_params, 'operation_types': op_types},
            model_path.name
        )
        print_security_analysis(risks, recommendations)

        # Dataset detection with enhanced info
        detect_dataset_comprehensive(model_path.name, op_types, graph)

        # Generate deployment checklist
        checklist = generate_deployment_checklist(
            {'total_params': total_params},
            model_path.name
        )
        print_deployment_checklist(checklist)

    except ImportError:
        print("ONNX not installed. Install with: !pip install onnx")
    except Exception as e:
        print(f"Error loading ONNX model: {e}")

def analyze_model_io(graph):
    """Analyze model inputs and outputs with detailed information"""
    print(f"\nINPUT/OUTPUT ANALYSIS")
    print("-" * 80)

    # Input analysis
    if hasattr(graph, 'input') and graph.input:
        print(f"Inputs ({len(graph.input)}):")
        for i, inp in enumerate(graph.input):
            input_name = getattr(inp, 'name', f'input_{i}')

            shape_info = "Unknown shape"
            data_type = "Unknown type"
            estimated_size = "Unknown size"

            if hasattr(inp, 'type') and hasattr(inp.type, 'tensor_type'):
                tensor_type = inp.type.tensor_type

                # Get data type
                if hasattr(tensor_type, 'elem_type'):
                    type_map = {1: 'float32', 2: 'uint8', 3: 'int8', 6: 'int32', 7: 'int64', 11: 'double'}
                    data_type = type_map.get(tensor_type.elem_type, f'type_{tensor_type.elem_type}')

                # Get shape
                if hasattr(tensor_type, 'shape') and hasattr(tensor_type.shape, 'dim'):
                    dims = []
                    total_elements = 1
                    for dim in tensor_type.shape.dim:
                        if hasattr(dim, 'dim_value') and dim.dim_value > 0:
                            dims.append(str(dim.dim_value))
                            total_elements *= dim.dim_value
                        elif hasattr(dim, 'dim_param'):
                            dims.append(dim.dim_param)
                            total_elements = -1  # Dynamic

                    if dims:
                        shape_info = f"[{', '.join(dims)}]"
                        if total_elements > 0:
                            bytes_per_element = 4 if 'float32' in data_type else 1
                            estimated_size = f"{(total_elements * bytes_per_element / 1024):.2f} KB"

            print(f"  {i+1}. {input_name}")
            print(f"     Shape: {shape_info}")
            print(f"     Type:  {data_type}")
            print(f"     Size:  {estimated_size}")

            # Suggest preprocessing based on shape
            if '[1, 1, 28, 28]' in shape_info:
                print(f"     Preprocessing: Normalize to [0,1], grayscale, 28x28 resize")
            elif '[1, 3, 224, 224]' in shape_info:
                print(f"     Preprocessing: ImageNet normalization, RGB, 224x224 resize")

    # Output analysis
    if hasattr(graph, 'output') and graph.output:
        print(f"\nOutputs ({len(graph.output)}):")
        for i, out in enumerate(graph.output):
            output_name = getattr(out, 'name', f'output_{i}')

            shape_info = "Unknown shape"
            interpretation = ""

            if hasattr(out, 'type') and hasattr(out.type, 'tensor_type'):
                tensor_type = out.type.tensor_type
                if hasattr(tensor_type, 'shape') and hasattr(tensor_type.shape, 'dim'):
                    dims = []
                    for dim in tensor_type.shape.dim:
                        if hasattr(dim, 'dim_value'):
                            dims.append(str(dim.dim_value))
                        elif hasattr(dim, 'dim_param'):
                            dims.append(dim.dim_param)

                    if dims:
                        shape_info = f"[{', '.join(dims)}]"

                        # Interpret output shape
                        if len(dims) == 2 and dims[1] in ['10', '1000', '21']:
                            classes = dims[1]
                            interpretation = f"Classification probabilities ({classes} classes)"
                        elif 'regression' in output_name.lower():
                            interpretation = "Regression output"

            print(f"  {i+1}. {output_name}")
            print(f"     Shape: {shape_info}")
            if interpretation:
                print(f"     Type:  {interpretation}")

def print_deployment_info(model_size_mb, total_params, op_types=None):
    """Print deployment-specific information"""
    print(f"\nDEPLOYMENT INFORMATION")
    print("-" * 80)

    # Quantization potential
    if total_params > 1000000:  # > 1M parameters
        print("Quantization:        Recommended (INT8/FP16)")
        print(f"Quantized Size:      ~{model_size_mb/2:.1f} MB (estimated)")
    else:
        print("Quantization:        Optional")

    # Platform compatibility
    print("Compatibility:")
    print("  CPU Inference:     Supported")
    print("  GPU Inference:     Supported (CUDA/OpenCL)")
    print("  Mobile (iOS):      Supported (Core ML conversion)")
    print("  Mobile (Android):  Supported (TensorFlow Lite)")
    print("  Web Browser:       Supported (ONNX.js)")
    print("  Edge Devices:      Supported (optimized versions)")

    # Optimization suggestions
    print("\nOptimization Options:")
    if op_types and 'Conv' in op_types:
        print("  - Convert to TensorRT for NVIDIA GPUs")
        print("  - Use OpenVINO for Intel hardware")
    print("  - Apply dynamic quantization")
    print("  - Batch processing for throughput")
    print("  - Model pruning for size reduction")

def print_security_analysis(risks, recommendations):
    """Print security analysis"""
    print(f"\nSECURITY & ROBUSTNESS ANALYSIS")
    print("-" * 80)

    if risks:
        print("Potential Risks:")
        for risk in risks:
            print(f"  ⚠ {risk}")
    else:
        print("Potential Risks:     Low (based on analysis)")

    print("\nSecurity Recommendations:")
    for rec in recommendations:
        print(f"  ✓ {rec}")

def detect_dataset_comprehensive(filename, operations, graph=None):
    """Comprehensive dataset detection with business context"""
    print(f"\nDATASET & USE CASE ANALYSIS")
    print("-" * 80)

    filename_lower = filename.lower()

    # Enhanced detection with business context
    dataset_info = {
        'mnist': {
            'dataset': 'MNIST Handwritten Digits',
            'confidence': 'High',
            'task': 'Digit Recognition',
            'business_use': 'Document digitization, form processing',
            'data_source': '28x28 grayscale images',
            'accuracy_expectation': '99%+',
            'licensing': 'Public domain'
        },
        'cifar': {
            'dataset': 'CIFAR-10/100 Object Recognition',
            'confidence': 'High',
            'task': 'Object Classification',
            'business_use': 'Image categorization, content moderation',
            'data_source': '32x32 color images',
            'accuracy_expectation': '95%+',
            'licensing': 'Academic use'
        },
        'imagenet': {
            'dataset': 'ImageNet Large Scale Visual Recognition',
            'confidence': 'High',
            'task': 'Object Classification/Detection',
            'business_use': 'E-commerce, content tagging, surveillance',
            'data_source': '224x224+ color images',
            'accuracy_expectation': '80%+ top-5',
            'licensing': 'Research use'
        },
        'bert': {
            'dataset': 'Large Text Corpora (Books, Wikipedia)',
            'confidence': 'High',
            'task': 'Natural Language Understanding',
            'business_use': 'Chatbots, document analysis, search',
            'data_source': 'Text sequences',
            'accuracy_expectation': 'Task-dependent',
            'licensing': 'Apache 2.0'
        },
        'sentiment': {
            'dataset': 'Sentiment Analysis Dataset',
            'confidence': 'High',
            'task': 'Sentiment Classification',
            'business_use': 'Social media monitoring, review analysis',
            'data_source': 'Text reviews/comments',
            'accuracy_expectation': '85%+',
            'licensing': 'Varies'
        }
    }

    detected_info = None
    for keyword, info in dataset_info.items():
        if keyword in filename_lower:
            detected_info = info
            break

    if detected_info:
        for key, value in detected_info.items():
            print(f"{key.replace('_', ' ').title():<20} {value}")
    else:
        # Fallback analysis
        print("Dataset Type:        Unknown")
        print("Confidence:          Low")
        print("Business Use:        Requires domain analysis")

        # Try to infer from operations
        if operations:
            if 'Conv' in operations:
                print("Likely Domain:       Computer Vision")
                print("Suggested Use:       Image processing applications")

    # Add compliance information
    print(f"\nCOMPLIANCE CONSIDERATIONS")
    print("-" * 40)
    print("GDPR Compliance:     Depends on training data")
    print("Bias Assessment:     Recommended before deployment")
    print("Fairness Testing:    Required for production use")
    print("Data Lineage:        Document training data sources")

def print_deployment_checklist(checklist):
    """Print deployment readiness checklist"""
    print(f"\nDEPLOYMENT READINESS CHECKLIST")
    print("-" * 80)

    for category, items in checklist.items():
        print(f"\n{category.replace('_', ' ').title()}:")
        for item in items:
            print(f"  ☐ {item}")

def analyze_keras_model(model_path):
    """Enhanced Keras model analysis with AI developer insights"""
    print("TENSORFLOW/KERAS MODEL ANALYSIS")
    print("-" * 80)

    try:
        import tensorflow as tf

        # Load model and measure loading time
        start_time = time.time()
        model = tf.keras.models.load_model(model_path)
        load_time = time.time() - start_time

        print(f"Model Type:          {type(model).__name__}")
        print(f"Loading Time:        {load_time:.2f} seconds")
        print(f"TensorFlow Version:  {tf.__version__}")

        # Parameter analysis
        total_params = model.count_params()
        trainable_params = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
        non_trainable_params = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])

        print(f"Total Parameters:    {total_params:,}")
        print(f"Trainable Params:    {trainable_params:,} ({trainable_params/total_params*100:.1f}%)")
        print(f"Non-trainable:       {non_trainable_params:,} ({non_trainable_params/total_params*100:.1f}%)")
        print(f"Total Layers:        {len(model.layers)}")

        # Memory analysis
        model_size_mb = Path(model_path).stat().st_size / (1024 * 1024)
        estimated_inference_memory = model_size_mb * 2.5  # Rough estimate

        print(f"Model Size:          {model_size_mb:.2f} MB")
        print(f"Inference Memory:    ~{estimated_inference_memory:.0f} MB (estimated)")

        # Architecture analysis
        input_shape = getattr(model, 'input_shape', None)
        output_shape = getattr(model, 'output_shape', None)

        if input_shape:
            print(f"Input Shape:         {input_shape}")
            # Calculate input size
            if isinstance(input_shape, tuple) and len(input_shape) > 1:
                input_size = 1
                for dim in input_shape[1:]:  # Skip batch dimension
                    if dim:
                        input_size *= dim
                input_mb = (input_size * 4) / (1024 * 1024)  # float32
                print(f"Input Size:          {input_size:,} elements ({input_mb:.2f} MB)")

        if output_shape:
            print(f"Output Shape:        {output_shape}")

        # Layer analysis with performance insights
        layer_types = {}
        layer_details = []
        total_layer_params = 0

        print(f"\nLAYER ARCHITECTURE")
        print("-" * 80)

        for i, layer in enumerate(model.layers):
            layer_type = type(layer).__name__
            layer_types[layer_type] = layer_types.get(layer_type, 0) + 1
            layer_params = layer.count_params()
            total_layer_params += layer_params

            if i < 15:  # Show first 15 layers
                layer_details.append({
                    'index': i,
                    'name': layer.name,
                    'type': layer_type,
                    'params': layer_params,
                    'output_shape': str(getattr(layer, 'output_shape', 'Unknown')),
                    'trainable': layer.trainable
                })

        # Print layer details
        for layer in layer_details:
            trainable_mark = "✓" if layer['trainable'] else "✗"
            print(f"  {layer['index']+1:2}. {layer['name']:<25} {layer['type']:<15} {layer['params']:>8,} {trainable_mark}")

        if len(model.layers) > 15:
            print(f"       ... and {len(model.layers) - 15} more layers")

        print(f"\nLayer Type Summary:")
        for layer_type, count in sorted(layer_types.items()):
            print(f"  {layer_type:<20} {count:>3}")

        # Performance estimation
        flops = calculate_flops_estimate({'layers': layer_details})

        print(f"\nPERFORMANCE ANALYSIS")
        print("-" * 80)
        print(f"Estimated FLOPS:     {flops:,}")

        # Try to do a simple benchmark
        try:
            if input_shape and len(input_shape) > 1:
                # Create dummy input
                dummy_input = tf.random.normal([1] + list(input_shape[1:]))

                # Warmup
                for _ in range(3):
                    _ = model(dummy_input, training=False)

                # Benchmark
                start_time = time.time()
                for _ in range(10):
                    _ = model(dummy_input, training=False)
                avg_time = (time.time() - start_time) / 10

                print(f"Inference Time:      {avg_time*1000:.2f} ms (average of 10)")
                print(f"Throughput:          {1/avg_time:.1f} FPS")
        except:
            print("Inference Time:      Could not benchmark")

        # Hardware requirements
        requirements = estimate_hardware_requirements(model_size_mb, total_params, 'keras')
        print(f"\nHARDWARE REQUIREMENTS")
        print("-" * 80)
        for req, value in requirements.items():
            print(f"{req.replace('_', ' ').title():<20} {value}")

        # Deployment information
        print(f"\nDEPLOYMENT OPTIONS")
        print("-" * 80)
        print("TensorFlow Serving:  Ready")
        print("TensorFlow Lite:     Convertible")
        print("TensorFlow.js:       Convertible")
        print("ONNX Export:         Supported")
        print("SavedModel Format:   Native")

        # Model compilation info
        try:
            if hasattr(model, 'optimizer') and model.optimizer:
                print(f"\nTRAINING CONFIGURATION")
                print("-" * 80)
                print(f"Optimizer:           {type(model.optimizer).__name__}")
                try:
                    lr = model.optimizer.learning_rate
                    if hasattr(lr, 'numpy'):
                        lr = lr.numpy()
                    print(f"Learning Rate:       {lr}")
                except:
                    print("Learning Rate:       Not available")
        except:
            pass

        # Security analysis
        risks, recommendations = analyze_security_risks(
            {'total_params': total_params, 'layers': layer_details},
            model_path.name
        )
        print_security_analysis(risks, recommendations)

        # Dataset detection
        input_shape_str = str(input_shape) if input_shape else ""
        detect_dataset_comprehensive(model_path.name, layer_types, input_shape_str)

        # Deployment checklist
        checklist = generate_deployment_checklist(
            {'total_params': total_params, 'layers': layer_details},
            model_path.name
        )
        print_deployment_checklist(checklist)

    except ImportError:
        print("TensorFlow not installed. Install with: !pip install tensorflow")
    except Exception as e:
        print(f"Error loading Keras model: {e}")

def analyze_pytorch_model(model_path):
    """Enhanced PyTorch model analysis"""
    print("PYTORCH MODEL ANALYSIS")
    print("-" * 80)

    try:
        import torch

        # Load model and measure time
        start_time = time.time()
        model = torch.load(model_path, map_location='cpu')
        load_time = time.time() - start_time

        print(f"PyTorch Version:     {torch.__version__}")
        print(f"Loading Time:        {load_time:.2f} seconds")

        if isinstance(model, dict):
            analyze_pytorch_state_dict(model, model_path)
        elif hasattr(model, 'parameters'):
            analyze_pytorch_model_object(model, model_path)
        else:
            print(f"Model Type:          {type(model).__name__}")
            print(f"Model Content:       Unknown format")

    except ImportError:
        print("PyTorch not installed. Install with: !pip install torch")
    except Exception as e:
        print(f"Error loading PyTorch model: {e}")

def analyze_pytorch_state_dict(model, model_path):
    """Analyze PyTorch state dictionary"""
    print("Model Type:          State Dictionary")

    # Parameter analysis
    total_params = 0
    tensor_count = 0
    param_details = []
    layer_info = {}

    for key, param in model.items():
        if torch.is_tensor(param):
            tensor_count += 1
            param_count = param.numel()
            total_params += param_count

            # Extract layer name
            layer_name = key.split('.')[0] if '.' in key else key
            if layer_name not in layer_info:
                layer_info[layer_name] = {'params': 0, 'tensors': 0}
            layer_info[layer_name]['params'] += param_count
            layer_info[layer_name]['tensors'] += 1

            param_details.append({
                'name': key,
                'shape': list(param.shape),
                'count': param_count,
               'dtype': str(param.dtype),
               'layer': layer_name
           })

    print(f"Total Parameters:    {total_params:,}")
    print(f"Parameter Tensors:   {tensor_count}")
    print(f"Unique Layers:       {len(layer_info)}")

    # Memory analysis
    model_size_mb = Path(model_path).stat().st_size / (1024 * 1024)
    param_memory_mb = (total_params * 4) / (1024 * 1024)  # Assuming float32

    print(f"Model Size:          {model_size_mb:.2f} MB")
    print(f"Parameters Memory:   {param_memory_mb:.2f} MB")
    print(f"Overhead:            {model_size_mb - param_memory_mb:.2f} MB")

    # Layer breakdown
    print(f"\nLAYER BREAKDOWN")
    print("-" * 80)
    for i, (layer_name, info) in enumerate(layer_info.items()):
       if i < 10:  # Show first 10 layers
           param_pct = (info['params'] / total_params) * 100
           print(f"  {layer_name:<30} {info['params']:>10,} params ({param_pct:>5.1f}%)")

    if len(layer_info) > 10:
       print(f"       ... and {len(layer_info) - 10} more layers")

    # Parameter details
    print(f"\nPARAMETER DETAILS (First 10)")
    print("-" * 80)
    for i, param in enumerate(param_details[:10]):
       shape_str = str(param['shape'])
       print(f"  {i+1:2}. {param['name']:<35} {shape_str:<20} {param['count']:>8,}")

    if len(param_details) > 10:
       print(f"       ... and {len(param_details) - 10} more parameters")

    # Try to infer architecture type
    architecture_hints = []
    for key in model.keys():
       if 'conv' in key.lower():
           architecture_hints.append('Convolutional Neural Network')
           break
       elif 'transformer' in key.lower() or 'attention' in key.lower():
           architecture_hints.append('Transformer Architecture')
           break
       elif 'lstm' in key.lower() or 'gru' in key.lower():
           architecture_hints.append('Recurrent Neural Network')
           break

    if architecture_hints:
       print(f"\nARCHITECTURE TYPE")
       print("-" * 80)
       for hint in architecture_hints:
           print(f"Detected:            {hint}")

    # Performance estimates
    print(f"\nPERFORMANCE ESTIMATES")
    print("-" * 80)
    estimated_flops = total_params * 2  # Rough estimate
    print(f"Estimated FLOPS:     {estimated_flops:,}")
    print(f"Memory Bandwidth:    ~{param_memory_mb * 2:.0f} MB/s required")

    # Hardware requirements
    requirements = estimate_hardware_requirements(model_size_mb, total_params, 'pytorch')
    print(f"\nHARDWARE REQUIREMENTS")
    print("-" * 80)
    for req, value in requirements.items():
       print(f"{req.replace('_', ' ').title():<20} {value}")

    # Deployment options
    print(f"\nDEPLOYMENT OPTIONS")
    print("-" * 80)
    print("TorchServe:          Ready")
    print("ONNX Export:         torch.onnx.export()")
    print("TorchScript:         torch.jit.trace()")
    print("Mobile (iOS):        PyTorch Mobile")
    print("Mobile (Android):    PyTorch Mobile")
    print("C++ Deployment:      LibTorch")

    # Security analysis
    risks, recommendations = analyze_security_risks(
       {'total_params': total_params},
       model_path.name
     )
    print_security_analysis(risks, recommendations)

    # Dataset detection
    detect_dataset_comprehensive(model_path.name, {})

def analyze_pytorch_model_object(model, model_path):
   """Analyze PyTorch model object"""
   print(f"Model Type:          {type(model).__name__}")

   # Parameter analysis
   total_params = sum(p.numel() for p in model.parameters())
   trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
   non_trainable_params = total_params - trainable_params

   print(f"Total Parameters:    {total_params:,}")
   print(f"Trainable Params:    {trainable_params:,} ({trainable_params/total_params*100:.1f}%)")
   print(f"Non-trainable:       {non_trainable_params:,} ({non_trainable_params/total_params*100:.1f}%)")

   # Module analysis
   modules = list(model.named_modules())
   print(f"Total Modules:       {len(modules) - 1}")  # Exclude root

   # Module breakdown
   module_types = {}
   module_details = []

   for name, module in modules:
       if name:  # Skip root module
           module_type = type(module).__name__
           module_types[module_type] = module_types.get(module_type, 0) + 1

           # Calculate parameters for this module
           module_params = sum(p.numel() for p in module.parameters())

           module_details.append({
               'name': name,
               'type': module_type,
               'parameters': module_params
           })

   print(f"\nMODULE TYPES")
   print("-" * 80)
   for module_type, count in sorted(module_types.items()):
       print(f"  {module_type:<25} {count:>3}")

   # Show module hierarchy (first 15)
   print(f"\nMODULE HIERARCHY (First 15)")
   print("-" * 80)
   for i, module in enumerate(module_details[:15]):
       print(f"  {i+1:2}. {module['name']:<35} {module['type']:<15} {module['parameters']:>8,}")

   if len(module_details) > 15:
       print(f"       ... and {len(module_details) - 15} more modules")

   # Try to perform inference benchmark
   try:
       model.eval()
       # Try to infer input shape from first layer
       first_layer = next(model.children())
       if hasattr(first_layer, 'in_features'):
           # Linear layer
           dummy_input = torch.randn(1, first_layer.in_features)
       elif hasattr(first_layer, 'in_channels'):
           # Conv layer - assume common image size
           dummy_input = torch.randn(1, first_layer.in_channels, 224, 224)
       else:
           dummy_input = None

       if dummy_input is not None:
           print(f"\nPERFORMANCE BENCHMARK")
           print("-" * 80)

           # Warmup
           with torch.no_grad():
               for _ in range(3):
                   try:
                       _ = model(dummy_input)
                   except:
                       break
               else:
                   # Actual benchmark
                   start_time = time.time()
                   for _ in range(10):
                       _ = model(dummy_input)
                   avg_time = (time.time() - start_time) / 10

                   print(f"Inference Time:      {avg_time*1000:.2f} ms (average)")
                   print(f"Throughput:          {1/avg_time:.1f} FPS")
                   print(f"Input Shape:         {list(dummy_input.shape)}")
   except:
       print(f"\nPERFORMANCE BENCHMARK")
       print("-" * 80)
       print("Benchmark:           Could not run (unknown input shape)")

def analyze_sklearn_model(model_path):
   """Enhanced scikit-learn model analysis"""
   print("SCIKIT-LEARN MODEL ANALYSIS")
   print("-" * 80)

   try:
       import pickle
       import sklearn

       with open(model_path, 'rb') as f:
           model = pickle.load(f)

       print(f"Model Type:          {type(model).__name__}")
       print(f"Module:              {model.__module__}")
       print(f"Scikit-learn Ver:    {sklearn.__version__}")

       # Model properties analysis
       properties = []

       # Basic properties
       if hasattr(model, 'n_features_in_'):
           properties.append(f"Input Features: {model.n_features_in_}")
       if hasattr(model, 'n_classes_'):
           properties.append(f"Output Classes: {model.n_classes_}")
       if hasattr(model, 'classes_'):
           classes = getattr(model, 'classes_')
           properties.append(f"Class Labels: {list(classes)[:5]}{'...' if len(classes) > 5 else ''}")

       # Model-specific properties
       if hasattr(model, 'n_estimators'):
           properties.append(f"Estimators: {model.n_estimators}")
       if hasattr(model, 'max_depth'):
           properties.append(f"Max Depth: {model.max_depth}")
       if hasattr(model, 'C'):
           properties.append(f"Regularization C: {model.C}")
       if hasattr(model, 'kernel'):
           properties.append(f"Kernel: {model.kernel}")

       print(f"\nMODEL PROPERTIES")
       print("-" * 80)
       for prop in properties:
           print(f"  {prop}")

       # Hyperparameters
       try:
           params = model.get_params()
           print(f"\nHYPERPARAMETERS ({len(params)} total)")
           print("-" * 80)

           # Show most important parameters
           important_params = ['n_estimators', 'max_depth', 'learning_rate', 'C', 'gamma',
                             'kernel', 'random_state', 'max_features', 'min_samples_split']

           shown_params = 0
           for param_name in important_params:
               if param_name in params and shown_params < 10:
                   print(f"  {param_name:<20} {params[param_name]}")
                   shown_params += 1

           if len(params) > shown_params:
               print(f"  ... and {len(params) - shown_params} more parameters")

       except:
           print("Hyperparameters:     Could not extract")

       # Model complexity analysis
       model_complexity = analyze_sklearn_complexity(model)
       print(f"\nCOMPLEXITY ANALYSIS")
       print("-" * 80)
       for metric, value in model_complexity.items():
           print(f"{metric:<20} {value}")

       # Performance characteristics
       print(f"\nPERFORMANCE CHARACTERISTICS")
       print("-" * 80)

       model_type = type(model).__name__.lower()
       if 'forest' in model_type or 'tree' in model_type:
           print("Training Speed:      Fast to Medium")
           print("Inference Speed:     Fast")
           print("Memory Usage:        Medium")
           print("Interpretability:    High (tree-based)")
           print("Handles Missing:     Yes")
           print("Feature Importance:  Available")
       elif 'svm' in model_type:
           print("Training Speed:      Slow")
           print("Inference Speed:     Medium")
           print("Memory Usage:        Medium to High")
           print("Interpretability:    Low")
           print("Handles Missing:     No")
           print("Kernel Trick:        Yes")
       elif 'linear' in model_type or 'logistic' in model_type:
           print("Training Speed:      Fast")
           print("Inference Speed:     Very Fast")
           print("Memory Usage:        Low")
           print("Interpretability:    High (coefficients)")
           print("Handles Missing:     No")
           print("Regularization:      Available")

       # Business applications
       print(f"\nBUSINESS APPLICATIONS")
       print("-" * 80)

       if 'classifier' in model_type:
           print("Use Case:            Classification problems")
           print("Output:              Class probabilities/labels")
           print("Applications:        Fraud detection, spam filtering, diagnosis")
       elif 'regressor' in model_type or 'regression' in model_type:
           print("Use Case:            Regression problems")
           print("Output:              Continuous values")
           print("Applications:        Price prediction, demand forecasting")
       elif 'cluster' in model_type:
           print("Use Case:            Unsupervised clustering")
           print("Output:              Cluster assignments")
           print("Applications:        Customer segmentation, anomaly detection")

       # Deployment recommendations
       print(f"\nDEPLOYMENT RECOMMENDATIONS")
       print("-" * 80)
       print("Serialization:       Pickle (current), Joblib (recommended)")
       print("Production:          Flask/FastAPI REST API")
       print("Scaling:             Stateless, easily parallelizable")
       print("Monitoring:          Track prediction distribution drift")
       print("Updates:             Retrain periodically with new data")

       # Feature importance if available
       if hasattr(model, 'feature_importances_'):
           importances = model.feature_importances_
           print(f"\nFEATURE IMPORTANCE (Top 10)")
           print("-" * 80)
           top_features = sorted(enumerate(importances), key=lambda x: x[1], reverse=True)[:10]
           for i, (feature_idx, importance) in enumerate(top_features):
               print(f"  Feature {feature_idx:<3} {importance:>8.4f}")

       # Model coefficients if available
       elif hasattr(model, 'coef_'):
           coef = model.coef_
           print(f"\nMODEL COEFFICIENTS")
           print("-" * 80)
           if len(coef.shape) == 1:
               print(f"Coefficients Shape:  {coef.shape}")
               print(f"Top Coefficients:    {coef[:5]} ...")
           else:
               print(f"Coefficients Shape:  {coef.shape}")
               print(f"Classes x Features:  {coef.shape[0]} x {coef.shape[1]}")

       # Security and robustness
       risks, recommendations = analyze_security_risks({'model_type': model_type}, model_path.name)
       print_security_analysis(risks, recommendations)

       # Dataset detection
       detect_dataset_comprehensive(model_path.name, {})

   except Exception as e:
       print(f"Error analyzing sklearn model: {e}")

def analyze_sklearn_complexity(model):
   """Analyze sklearn model complexity"""
   complexity = {}
   model_type = type(model).__name__

   # Memory complexity
   model_size = len(pickle.dumps(model)) / 1024  # KB
   complexity['Model Size'] = f"{model_size:.1f} KB"

   # Time complexity estimates
   if hasattr(model, 'n_estimators'):
       n_est = model.n_estimators
       complexity['Time Complexity'] = f"O(n_estimators * tree_depth) ≈ O({n_est} * depth)"
   elif 'SVM' in model_type:
       complexity['Time Complexity'] = "O(n_support_vectors * n_features)"
   elif 'Linear' in model_type:
       complexity['Time Complexity'] = "O(n_features)"
   else:
       complexity['Time Complexity'] = "Model-dependent"

   # Feature scaling requirements
   if 'SVM' in model_type or 'Logistic' in model_type:
       complexity['Feature Scaling'] = "Required"
   elif 'Tree' in model_type or 'Forest' in model_type:
       complexity['Feature Scaling'] = "Not required"
   else:
       complexity['Feature Scaling'] = "Recommended"

   return complexity

def main():
   """Enhanced main function with system info"""
   print("COMPREHENSIVE AI MODEL ANALYZER")
   print("Advanced analysis for AI developers")
   print("="*80)

   # System information
   print(f"System Info:")
   print(f"  Python Version:    {sys.version.split()[0]}")
   print(f"  Available RAM:     {psutil.virtual_memory().total / (1024**3):.1f} GB")
   print(f"  Available Disk:    {psutil.disk_usage('/').free / (1024**3):.1f} GB")

   try:
       import GPUtil
       gpus = GPUtil.getGPUs()
       if gpus:
           print(f"  GPU Available:     {gpus[0].name} ({gpus[0].memoryTotal}MB)")
       else:
           print(f"  GPU Available:     None detected")
   except:
       print(f"  GPU Available:     Unknown")

   print()

   # Find models
   content_dir = Path("/content")
   if not content_dir.exists():
       content_dir = Path(".")
       print("Using current directory (not in Colab environment)")

   extensions = ['.onnx', '.h5', '.hdf5', '.pt', '.pth', '.bin', '.pkl', '.joblib']
   models = []

   print(f"Scanning {content_dir} for AI models...")
   for ext in extensions:
       found = list(content_dir.glob(f"*{ext}"))
       models.extend(found)
       if found:
           print(f"  Found {len(found)} {ext} files")

   if not models:
       print("\nNo model files found!")
       print(f"Supported formats: {', '.join(extensions)}")
       print("\nTip: Use the Hugging Face downloader script first")
       return

   # Remove duplicates and sort by size
   unique_models = sorted(list(set(models)), key=lambda x: x.stat().st_size, reverse=True)

   print(f"\nFound {len(unique_models)} model file(s) (sorted by size):")
   total_size = 0
   for i, model in enumerate(unique_models):
       size_mb = model.stat().st_size / (1024 * 1024)
       total_size += size_mb
       age_days = (time.time() - model.stat().st_mtime) / (24 * 3600)
       print(f"  {i+1:2}. {model.name:<40} {size_mb:>8.2f} MB  ({age_days:>3.0f} days old)")

   print(f"\nTotal model storage: {total_size:.2f} MB")

   # Select model
   if len(unique_models) == 1:
       selected = unique_models[0]
       print(f"\nAuto-selected: {selected.name}")
   else:
       print(f"\nAnalysis options:")
       print(f"  0. Analyze ALL models")
       for i, model in enumerate(unique_models):
           print(f"  {i+1}. {model.name}")

       try:
           choice = input(f"\nSelect option (0-{len(unique_models)}): ").strip()
           if choice == '0':
               # Analyze all models
               for model in unique_models:
                   print(f"\n" + "="*100)
                   print(f"ANALYZING: {model.name}")
                   print("="*100)
                   analyze_model_file(model)
               return
           else:
               choice = int(choice) - 1
               if 0 <= choice < len(unique_models):
                   selected = unique_models[choice]
               else:
                   selected = unique_models[0]
                   print(f"Invalid choice, using: {selected.name}")
       except (ValueError, KeyboardInterrupt):
           selected = unique_models[0]
           print(f"Using first model: {selected.name}")

   print(f"\nStarting comprehensive analysis of: {selected.name}")
   print("="*80)

   # Analyze the selected model
   analyze_model_file(selected)

   print(f"\n{'='*80}")
   print("ANALYSIS COMPLETE - Ready for production deployment!")
   print(f"{'='*80}")

   # Final recommendations
   print(f"\nNEXT STEPS FOR AI DEVELOPERS:")
   print("1. Review security recommendations above")
   print("2. Test model with sample inputs")
   print("3. Benchmark performance on target hardware")
   print("4. Implement monitoring and logging")
   print("5. Plan model versioning strategy")
   print("6. Document API interfaces and data formats")

# Additional imports for enhanced functionality
try:
   import psutil
   import sys
except ImportError:
   print("Installing additional dependencies...")
   import subprocess
   subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'psutil', '-q'])
   import psutil
   import sys

# Run the enhanced analyzer
if __name__ == "__main__":
   main()


COMPREHENSIVE AI MODEL ANALYZER
Advanced analysis for AI developers
System Info:
  Python Version:    3.12.11
  Available RAM:     12.7 GB
  Available Disk:    185.4 GB
  GPU Available:     Unknown

Scanning /content for AI models...
  Found 2 .onnx files
  Found 3 .bin files
  Found 3 .pkl files

Found 8 model file(s) (sorted by size):
   1. vit_base_imagenet.bin                      330.31 MB  (  0 days old)
   2. distilbert_sentiment.bin                   255.44 MB  (  0 days old)
   3. resnet50_imagenet.bin                       97.82 MB  (  0 days old)
   4. mnist-8-enhanced.onnx                        1.65 MB  (  0 days old)
   5. random_forest_classifier.pkl                 1.26 MB  (  0 days old)
   6. svm_classifier.pkl                           0.08 MB  (  0 days old)
   7. mnist-8.onnx                                 0.03 MB  (  0 days old)
   8. logistic_regression.pkl                      0.00 MB  (  0 days old)

Total model storage: 686.58 MB

Analysis options:
  0. Analy