## Set up environment

In [23]:
import torch
import numpy as np
import random

def set_seed(seed=42):
    """Set seed for reproducibility"""
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    print(f"Seed set to {seed} for future runs")

set_seed(42)

Seed set to 42 for future runs


In [16]:
cd "/kaggle/working/"

/kaggle/working


In [18]:

!pwd
!git clone https://github.com/ShiqiYu/OpenGait.git


/kaggle/working
Cloning into 'OpenGait'...
remote: Enumerating objects: 2100, done.[K
remote: Counting objects: 100% (804/804), done.[K
remote: Compressing objects: 100% (290/290), done.[K
remote: Total 2100 (delta 675), reused 514 (delta 514), pack-reused 1296 (from 6)[K
Receiving objects: 100% (2100/2100), 20.36 MiB | 35.33 MiB/s, done.
Resolving deltas: 100% (1294/1294), done.


In [24]:
import sys
sys.path.append('/kaggle/working/OpenGait/')

In [5]:
import torch
print(torch.__version__)


2.6.0+cu124


## Prepare dataset- Casia B - Merge the frams in the dataset into pkl format

In [3]:
from pathlib import Path
src = Path("/kaggle/input/casia-b")

if src.exists():
    print("Input directory is exist")

Input directory is exist


In [19]:
import shutil 
src = "/kaggle/input/casia-b" 
dst = "/kaggle/working/casia-b" 
     
shutil.copytree(src, dst)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/casia-b'

In [10]:
!python3 OpenGait/datasets/pretreatment.py --input_path "/kaggle/working/casia-b/output" --output_path "/kaggle/working/CASIA-B-pkl"

Pretreating: 100%|████████████████████| 13592/13592 [07:57<00:00, 28.47folder/s]


## Training the Model with CASIA B

In [10]:
cd OpenGait

/kaggle/working/OpenGait


In [3]:
import yaml

# Load the existing config
with open("/kaggle/working/OpenGait/configs/gaitgl/gaitgl.yaml", "r") as f:
    config = yaml.safe_load(f)

# Make key changes
config["data_cfg"]["dataset_root"] = "/kaggle/working/CASIA-B-pkl"
config["evaluator_cfg"]["restore_hint"] = 80000
config["evaluator_cfg"]["sampler"]["batch_size"] = 2
config["trainer_cfg"]["with_test"] = False
config["trainer_cfg"]["restore_hint"] = 70000

# Save the modified config
output_path = "/kaggle/working/OpenGait/configs/gaitgl/gaitgl.yaml"
with open(output_path, "w") as f:
    yaml.dump(config, f, default_flow_style=False)

# Reload and print the updated config
with open(output_path, "r") as f:
    updated_config = f.read()

print("✅ Configuration updated and saved to gaitgl_geta.yaml\n")
print("📄 Updated YAML contents:\n")
print(updated_config)


✅ Configuration updated and saved to gaitgl_geta.yaml

📄 Updated YAML contents:

data_cfg:
  dataset_name: CASIA-B
  dataset_partition: ./datasets/CASIA-B/CASIA-B.json
  dataset_root: /kaggle/working/CASIA-B-pkl
  num_workers: 1
  remove_no_gallery: false
  test_dataset_name: CASIA-B
evaluator_cfg:
  enable_float16: false
  restore_ckpt_strict: true
  restore_hint: 80000
  sampler:
    batch_size: 2
    sample_type: all_ordered
    type: InferenceSampler
  save_name: GaitGL
loss_cfg:
- log_prefix: triplet
  loss_term_weight: 1.0
  margin: 0.2
  type: TripletLoss
- label_smooth: false
  log_accuracy: true
  log_prefix: softmax
  loss_term_weight: 1.0
  scale: 1
  type: CrossEntropyLoss
model_cfg:
  channels:
  - 32
  - 64
  - 128
  class_num: 74
  model: GaitGL
optimizer_cfg:
  lr: 0.0001
  solver: Adam
  weight_decay: 0.0005
scheduler_cfg:
  gamma: 0.1
  milestones:
  - 70000
  scheduler: MultiStepLR
trainer_cfg:
  enable_float16: true
  log_iter: 100
  restore_ckpt_strict: true
  rest

In [59]:
import yaml

# Load the existing config
with open("/kaggle/working/geta_gaitGL/OpenGait/configs/gaitgl/gaitgl_geta.yaml", "r") as f:
    config = yaml.safe_load(f)

# Make key changes
config["data_cfg"]["dataset_root"] = "/kaggle/working/CASIA-B-pkl"
config["evaluator_cfg"]["restore_hint"] = 80000
config["evaluator_cfg"]["sampler"]["batch_size"] = 2
config["trainer_cfg"]["with_test"] = False
config["trainer_cfg"]["restore_hint"] = 0

# Save the modified config
output_path = "/kaggle/working/geta_gaitGL/OpenGait/configs/gaitgl/gaitgl_geta.yaml"
with open(output_path, "w") as f:
    yaml.dump(config, f, default_flow_style=False)

# Reload and print the updated config
with open(output_path, "r") as f:
    updated_config = f.read()

print("✅ Configuration updated and saved to gaitgl_geta.yaml\n")
print("📄 Updated YAML contents:\n")
print(updated_config)


✅ Configuration updated and saved to gaitgl_geta.yaml

📄 Updated YAML contents:

compression_optimizer: geta
data_cfg:
  dataset_name: CASIA-B
  dataset_partition: ./datasets/CASIA-B/CASIA-B.json
  dataset_root: /kaggle/working/CASIA-B-pkl
  num_workers: 1
  remove_no_gallery: false
  test_dataset_name: CASIA-B
evaluator_cfg:
  enable_float16: false
  restore_ckpt_strict: true
  restore_hint: 80000
  sampler:
    batch_size: 2
    sample_type: all_ordered
    type: InferenceSampler
  save_name: GaitGL_GETA
geta_optimizer_cfg:
  first_momentum: 0.9
  lr: 0.0001
  lr_quant: 0.001
  pruning_periods: 10
  pruning_steps: 20000
  start_pruning_step: 15000
  target_group_sparsity: 0.3
  variant: adam
  weight_decay: 0.0005
hesso_optimizer_cfg:
  lr: 0.0001
  pruning_periods: 10
  pruning_steps: 20000
  start_pruning_step: 10000
  target_group_sparsity: 0.5
  variant: adam
  weight_decay: 0.0005
loss_cfg:
- log_prefix: triplet
  loss_term_weight: 1.0
  margin: 0.2
  type: TripletLoss
- label_s

In [60]:
cd /kaggle/working/geta_gaitGL/OpenGait

/kaggle/working/geta_gaitGL/OpenGait


In [45]:
!python -m torch.distributed.launch --nproc_per_node=2 opengait/main_geta.py --cfgs ./configs/gaitgl/gaitgl.yaml --phase train

and will be removed in future. Use torchrun.
Note that --use-env is set by default in torchrun.
If your script expects `--local-rank` argument to be set, please
change it to read from `os.environ['LOCAL_RANK']` instead. See 
https://pytorch.org/docs/stable/distributed.html#launch-utility for 
further instructions

  main()
W1001 17:52:43.116000 165 torch/distributed/run.py:792] 
W1001 17:52:43.116000 165 torch/distributed/run.py:792] *****************************************
W1001 17:52:43.116000 165 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W1001 17:52:43.116000 165 torch/distributed/run.py:792] *****************************************
/usr/bin/python3: can't open file '/kaggle/working/opengait/main.py': [Errno 2] No such file or directory
/usr/bin/python3: can't open file '/kaggle/working

In [58]:
!python -m torch.distributed.launch --nproc_per_node=2 opengait/main.py --cfgs ./configs/gaitgl/gaitgl.yaml --phase test

and will be removed in future. Use torchrun.
Note that --use-env is set by default in torchrun.
If your script expects `--local-rank` argument to be set, please
change it to read from `os.environ['LOCAL_RANK']` instead. See 
https://pytorch.org/docs/stable/distributed.html#launch-utility for 
further instructions

  main()
W1001 18:08:41.179000 190 torch/distributed/run.py:792] 
W1001 18:08:41.179000 190 torch/distributed/run.py:792] *****************************************
W1001 18:08:41.179000 190 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W1001 18:08:41.179000 190 torch/distributed/run.py:792] *****************************************
2025-10-01 18:08:44.585313: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory 

In [7]:
!python -m torch.distributed.launch --nproc_per_node=2 opengait/main.py --cfgs ./configs/gaitgl/gaitgl.yaml --phase train

and will be removed in future. Use torchrun.
Note that --use-env is set by default in torchrun.
If your script expects `--local-rank` argument to be set, please
change it to read from `os.environ['LOCAL_RANK']` instead. See 
https://pytorch.org/docs/stable/distributed.html#launch-utility for 
further instructions

  main()
W1001 13:39:40.621000 84 torch/distributed/run.py:792] 
W1001 13:39:40.621000 84 torch/distributed/run.py:792] *****************************************
W1001 13:39:40.621000 84 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W1001 13:39:40.621000 84 torch/distributed/run.py:792] *****************************************
2025-10-01 13:39:48.763723: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for 

In [61]:
!python -m torch.distributed.launch --nproc_per_node=2 opengait/main_geta.py --cfgs ./configs/gaitgl/gaitgl_geta.yaml --phase test

and will be removed in future. Use torchrun.
Note that --use-env is set by default in torchrun.
If your script expects `--local-rank` argument to be set, please
change it to read from `os.environ['LOCAL_RANK']` instead. See 
https://pytorch.org/docs/stable/distributed.html#launch-utility for 
further instructions

  main()
W1001 18:13:57.110000 231 torch/distributed/run.py:792] 
W1001 18:13:57.110000 231 torch/distributed/run.py:792] *****************************************
W1001 18:13:57.110000 231 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W1001 18:13:57.110000 231 torch/distributed/run.py:792] *****************************************
2025-10-01 18:14:00.520325: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory 

In [62]:
import os

def print_tree(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = '│   ' * level + '├── '
        print(f"{indent}{os.path.basename(root)}/")
        subindent = '│   ' * (level + 1) + '├── '
        for f in files:
            print(f"{subindent}{f}")

print_tree('/kaggle/working/OpenGait/output/CASIA-B/GaitGL/GaitGL/checkpoints/GaitGL-80000.pt')



├── /
├── GaitGL/
│   ├── summary/
│   │   ├── events.out.tfevents.1759068014.1a0949e6b7b1.237.0
│   │   ├── events.out.tfevents.1759067821.1a0949e6b7b1.198.0
│   │   ├── events.out.tfevents.1759326004.fd0f06dcee30.87.0
│   │   ├── events.out.tfevents.1759280164.c67e4998cba5.80.0
│   ├── checkpoints/
│   │   ├── GaitGL-60000.pt
│   │   ├── GaitGL-70000.pt
│   │   ├── GaitGL-10000.pt
│   │   ├── GaitGL-30000.pt
│   │   ├── GaitGL-80000.pt
│   │   ├── GaitGL-50000.pt
│   │   ├── GaitGL-40000.pt
│   │   ├── GaitGL-20000.pt


# Analyze GAIT GL Model

In [14]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import json
import time
from collections import OrderedDict
import sys


def analyze_gait_model(model_path, config_path=None, test_data_path=None, device='cuda'):
    """
    Comprehensive analysis of gait model with detailed metrics table.
    
    Args:
        model_path (str): Path to the saved model checkpoint
        config_path (str, optional): Path to the YAML config file
        test_data_path (str, optional): Path to test dataset for accuracy evaluation
        device (str): Device to load model on
    
    Returns:
        pd.DataFrame: Detailed analysis table
    """
    
    print("🔍 Starting comprehensive gait model analysis...")
    
    # Initialize results dictionary
    analysis_results = OrderedDict()
    
    # =========================
    # 1. BASIC MODEL INFO
    # =========================
    print("📊 Analyzing basic model information...")
    
    try:
        # Load checkpoint
        checkpoint = torch.load(model_path, map_location=device)
        
        # Extract model information
        analysis_results['Model Path'] = model_path
        analysis_results['Checkpoint Keys'] = list(checkpoint.keys())
        
        if 'iteration' in checkpoint:
            analysis_results['Training Iteration'] = checkpoint['iteration']
        
        if 'model' in checkpoint:
            model_state = checkpoint['model']
        elif 'model_state_dict' in checkpoint:
            model_state = checkpoint['model_state_dict']
        else:
            model_state = checkpoint
            
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return pd.DataFrame()
    
    # =========================
    # 2. PARAMETER ANALYSIS
    # =========================
    print("🔢 Analyzing model parameters...")
    
    def analyze_parameters(state_dict):
        total_params = 0
        trainable_params = 0
        layer_info = {}
        
        for name, param in state_dict.items():
            if isinstance(param, torch.Tensor):
                param_count = param.numel()
                total_params += param_count
                trainable_params += param_count  # Assume all loaded params are trainable
                
                # Categorize layers
                if 'conv' in name.lower():
                    layer_info['Conv Layers'] = layer_info.get('Conv Layers', 0) + param_count
                elif 'bn' in name.lower() or 'batchnorm' in name.lower():
                    layer_info['BatchNorm Layers'] = layer_info.get('BatchNorm Layers', 0) + param_count
                elif 'fc' in name.lower() or 'linear' in name.lower():
                    layer_info['Linear Layers'] = layer_info.get('Linear Layers', 0) + param_count
                elif 'embed' in name.lower():
                    layer_info['Embedding Layers'] = layer_info.get('Embedding Layers', 0) + param_count
                else:
                    layer_info['Other Layers'] = layer_info.get('Other Layers', 0) + param_count
        
        return total_params, trainable_params, layer_info
    
    total_params, trainable_params, layer_breakdown = analyze_parameters(model_state)
    
    analysis_results['Total Parameters'] = f"{total_params:,}"
    analysis_results['Trainable Parameters'] = f"{trainable_params:,}"
    analysis_results['Model Size (MB)'] = f"{total_params * 4 / (1024**2):.2f}"  # Assuming float32
    
    # Add layer breakdown
    for layer_type, param_count in layer_breakdown.items():
        analysis_results[f'{layer_type} Parameters'] = f"{param_count:,}"
    
    # =========================
    # 3. MODEL ARCHITECTURE ANALYSIS
    # =========================
    print("🏗️ Analyzing model architecture...")
    
    try:
        # Try to load config if provided
        if config_path and os.path.exists(config_path):
            import yaml
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
            
            analysis_results['Model Type'] = config.get('model_cfg', {}).get('model', 'Unknown')
            analysis_results['Backbone'] = config.get('model_cfg', {}).get('backbone_cfg', {}).get('type', 'Unknown')
            analysis_results['Dataset'] = config.get('data_cfg', {}).get('dataset_name', 'Unknown')
            
            # Training configuration
            analysis_results['Learning Rate'] = config.get('optimizer_cfg', {}).get('lr', 'Unknown')
            analysis_results['Batch Size'] = config.get('trainer_cfg', {}).get('sampler', {}).get('batch_size', 'Unknown')
            analysis_results['Total Iterations'] = config.get('trainer_cfg', {}).get('total_iter', 'Unknown')
            
    except Exception as e:
        print(f"⚠️ Could not load config: {e}")
    
    # Analyze layer structure from state dict
    layer_names = list(model_state.keys())
    analysis_results['Total Layers'] = len(layer_names)
    
    # Count different layer types
    conv_layers = len([name for name in layer_names if 'conv' in name.lower() and 'weight' in name])
    bn_layers = len([name for name in layer_names if ('bn' in name.lower() or 'batchnorm' in name.lower()) and 'weight' in name])
    fc_layers = len([name for name in layer_names if ('fc' in name.lower() or 'linear' in name.lower()) and 'weight' in name])
    
    analysis_results['Conv Layers Count'] = conv_layers
    analysis_results['BatchNorm Layers Count'] = bn_layers
    analysis_results['Linear Layers Count'] = fc_layers
    
    # =========================
    # 4. MEMORY ANALYSIS
    # =========================
    print("💾 Analyzing memory requirements...")
    
    # Calculate memory footprint
    param_memory = sum(param.numel() * param.element_size() for param in model_state.values() if isinstance(param, torch.Tensor))
    analysis_results['Parameter Memory (MB)'] = f"{param_memory / (1024**2):.2f}"
    
    # Estimate activation memory (rough estimate for typical gait input)
    # Assuming input size: (batch=8, frames=30, channels=1, height=64, width=44)
    estimated_activation_memory = 8 * 30 * 1 * 64 * 44 * 4  # 4 bytes per float32
    analysis_results['Estimated Activation Memory (MB)'] = f"{estimated_activation_memory / (1024**2):.2f}"
    
    # =========================
    # 5. PERFORMANCE METRICS
    # =========================
    print("⚡ Analyzing performance characteristics...")
    
    # Try to estimate FLOPs (simplified)
    def estimate_flops(state_dict):
        total_flops = 0
        for name, param in state_dict.items():
            if 'conv' in name.lower() and 'weight' in name:
                # Rough FLOP estimation for conv layers
                if param.dim() == 4:  # Conv2D
                    out_channels, in_channels, kh, kw = param.shape
                    # Assuming typical gait input resolution
                    total_flops += out_channels * in_channels * kh * kw * 64 * 44  # Rough estimate
            elif ('fc' in name.lower() or 'linear' in name.lower()) and 'weight' in name:
                # FLOP estimation for linear layers
                if param.dim() == 2:
                    total_flops += param.shape[0] * param.shape[1]
        return total_flops
    
    estimated_flops = estimate_flops(model_state)
    analysis_results['Estimated FLOPs'] = f"{estimated_flops:,}"
    analysis_results['Estimated GFLOPs'] = f"{estimated_flops / (10**9):.2f}"
    
    # =========================
    # 6. FILE ANALYSIS
    # =========================
    print("📁 Analyzing checkpoint file...")
    
    file_size = os.path.getsize(model_path)
    analysis_results['Checkpoint Size (MB)'] = f"{file_size / (1024**2):.2f}"
    analysis_results['File Extension'] = Path(model_path).suffix
    
    # Check compression ratio
    try:
        import gzip
        with open(model_path, 'rb') as f:
            original_size = len(f.read())
        
        compressed_size = len(gzip.compress(open(model_path, 'rb').read()))
        compression_ratio = compressed_size / original_size
        analysis_results['Compression Ratio'] = f"{compression_ratio:.3f}"
    except:
        analysis_results['Compression Ratio'] = "N/A"
    
    # =========================
    # 7. LAYER DISTRIBUTION ANALYSIS
    # =========================
    print("📊 Analyzing layer distribution...")
    
    # Analyze parameter distribution across layers
    layer_sizes = {}
    for name, param in model_state.items():
        if isinstance(param, torch.Tensor) and 'weight' in name:
            layer_name = name.replace('.weight', '')
            layer_sizes[layer_name] = param.numel()
    
    if layer_sizes:
        # Find largest and smallest layers
        largest_layer = max(layer_sizes, key=layer_sizes.get)
        smallest_layer = min(layer_sizes, key=layer_sizes.get)
        
        analysis_results['Largest Layer'] = f"{largest_layer} ({layer_sizes[largest_layer]:,} params)"
        analysis_results['Smallest Layer'] = f"{smallest_layer} ({layer_sizes[smallest_layer]:,} params)"
        analysis_results['Avg Layer Size'] = f"{np.mean(list(layer_sizes.values())):.0f}"
    
    # =========================
    # 8. TRAINING INFORMATION
    # =========================
    print("🎯 Extracting training information...")
    
    if 'optimizer' in checkpoint:
        analysis_results['Optimizer State Available'] = "Yes"
    else:
        analysis_results['Optimizer State Available'] = "No"
    
    if 'scheduler' in checkpoint:
        analysis_results['Scheduler State Available'] = "Yes"
    else:
        analysis_results['Scheduler State Available'] = "No"
    
    # Extract any loss information
    if 'loss' in checkpoint:
        analysis_results['Final Loss'] = f"{checkpoint['loss']:.6f}"
    
    # =========================
    # 9. COMPATIBILITY ANALYSIS
    # =========================
    print("🔧 Analyzing compatibility...")
    
    analysis_results['PyTorch Version Compatible'] = f"PyTorch {torch.__version__}"
    analysis_results['CUDA Available'] = torch.cuda.is_available()
    
    if torch.cuda.is_available():
        analysis_results['CUDA Version'] = torch.version.cuda
        analysis_results['GPU Count'] = torch.cuda.device_count()
    
    # =========================
    # 10. CREATE SUMMARY TABLE
    # =========================
    print("📋 Creating summary table...")
    
    # Convert to DataFrame for nice display
    df = pd.DataFrame.from_dict(analysis_results, orient='index', columns=['Value'])
    df.index.name = 'Metric'
    
    # Add categories for better organization
    categories = []
    for metric in df.index:
        if metric in ['Model Path', 'Model Type', 'Backbone', 'Dataset']:
            categories.append('Basic Info')
        elif 'Parameters' in metric or 'Size' in metric:
            categories.append('Architecture')
        elif 'Memory' in metric or 'FLOP' in metric:
            categories.append('Performance')
        elif 'Layer' in metric:
            categories.append('Structure')
        elif any(word in metric for word in ['Iteration', 'Loss', 'Learning', 'Batch', 'Optimizer']):
            categories.append('Training')
        elif any(word in metric for word in ['File', 'Checkpoint', 'Compression']):
            categories.append('Storage')
        else:
            categories.append('Other')
    
    df['Category'] = categories
    
    # Reorder columns
    df = df[['Category', 'Value']]
    
    print("✅ Analysis complete!")
    
    return df

def save_analysis_table(df, output_path="model_analysis.csv", format_type="csv"):
    """Save the analysis table in various formats"""
    
    if format_type.lower() == "csv":
        df.to_csv(output_path)
    elif format_type.lower() == "excel":
        df.to_excel(output_path.replace('.csv', '.xlsx'))
    elif format_type.lower() == "html":
        df.to_html(output_path.replace('.csv', '.html'))
    
    print(f"💾 Analysis saved to {output_path}")

def display_analysis(df, group_by_category=True):
    """Display the analysis table in a formatted way"""
    
    print("=" * 80)
    print("🚀 GAIT MODEL ANALYSIS REPORT")
    print("=" * 80)
    
    if group_by_category:
        # Group by category for better readability
        for category in df['Category'].unique():
            print(f"\n📊 {category.upper()}")
            print("-" * 40)
            category_df = df[df['Category'] == category]
            for metric, row in category_df.iterrows():
                print(f"{metric:<30}: {row['Value']}")
    else:
        # Display all at once
        for metric, row in df.iterrows():
            print(f"{metric:<30}: {row['Value']}")
    
    print("=" * 80)



def analyze_my_gait_model():
    ''' # Usage '''
    # Define paths 
    model_path = "/kaggle/working/geta_gaitGL/OpenGait/output/CASIA-B/GaitGLGeta/GaitGL_GETA/checkpoints/GaitGL_GETA-80000.pt"
    config_path = "/kaggle/working/geta_gaitGL/OpenGait/configs/gaitgl/gaitgl_geta.yaml"
    
    # Check if model exists
    if not os.path.exists(model_path):
        print("❌ Model checkpoint not found!")
        print("Available checkpoints:")
        checkpoint_dir = "/kaggle/working/geta_gaitGL/OpenGait/output/CASIA-B/GaitGLGeta/GaitGL_GETA/checkpoints/"
        if os.path.exists(checkpoint_dir):
            for file in os.listdir(checkpoint_dir):
                if file.endswith('.pt'):
                    print(f"  📁 {file}")
        return None
    
    # Run analysis
    analysis_df = analyze_gait_model(
        model_path=model_path,
        config_path=config_path,
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )

    
    
    # Display results
    display_analysis(analysis_df)
    
    # Save results
    save_analysis_table(analysis_df, "geta_gait_analysis-1.csv")
    save_analysis_table(analysis_df, "geta_gait_analysis-1.xlsx", "excel")
    
    return analysis_df


# Run the analysis
if __name__ == "__main__":
    df = analyze_my_gait_model()

🔍 Starting comprehensive gait model analysis...
📊 Analyzing basic model information...
🔢 Analyzing model parameters...
🏗️ Analyzing model architecture...
💾 Analyzing memory requirements...
⚡ Analyzing performance characteristics...
📁 Analyzing checkpoint file...
📊 Analyzing layer distribution...
🎯 Extracting training information...
🔧 Analyzing compatibility...
📋 Creating summary table...
✅ Analysis complete!
🚀 GAIT MODEL ANALYSIS REPORT

📊 BASIC INFO
----------------------------------------
Model Path                    : /kaggle/working/geta_gaitGL/OpenGait/output/CASIA-B/GaitGLGeta/GaitGL_GETA/checkpoints/GaitGL_GETA-80000.pt
Model Type                    : GaitGLGeta
Backbone                      : Unknown
Dataset                       : CASIA-B

📊 STORAGE
----------------------------------------
Checkpoint Keys               : ['model', 'optimizer', 'scheduler', 'iteration']
File Extension                : .pt
Compression Ratio             : 0.935

📊 TRAINING
----------------------

In [63]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import json
import time
from collections import OrderedDict
import sys


def analyze_gait_model(model_path, config_path=None, test_data_path=None, device='cuda'):
    """
    Comprehensive analysis of gait model with detailed metrics table.
    
    Args:
        model_path (str): Path to the saved model checkpoint
        config_path (str, optional): Path to the YAML config file
        test_data_path (str, optional): Path to test dataset for accuracy evaluation
        device (str): Device to load model on
    
    Returns:
        pd.DataFrame: Detailed analysis table
    """
    
    print("🔍 Starting comprehensive gait model analysis...")
    
    # Initialize results dictionary
    analysis_results = OrderedDict()
    
    # =========================
    # 1. BASIC MODEL INFO
    # =========================
    print("📊 Analyzing basic model information...")
    
    try:
        # Load checkpoint
        checkpoint = torch.load(model_path, map_location=device)
        
        # Extract model information
        analysis_results['Model Path'] = model_path
        analysis_results['Checkpoint Keys'] = list(checkpoint.keys())
        
        if 'iteration' in checkpoint:
            analysis_results['Training Iteration'] = checkpoint['iteration']
        
        if 'model' in checkpoint:
            model_state = checkpoint['model']
        elif 'model_state_dict' in checkpoint:
            model_state = checkpoint['model_state_dict']
        else:
            model_state = checkpoint
            
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return pd.DataFrame()
    
    # =========================
    # 2. PARAMETER ANALYSIS
    # =========================
    print("🔢 Analyzing model parameters...")
    
    def analyze_parameters(state_dict):
        total_params = 0
        trainable_params = 0
        layer_info = {}
        
        for name, param in state_dict.items():
            if isinstance(param, torch.Tensor):
                param_count = param.numel()
                total_params += param_count
                trainable_params += param_count  # Assume all loaded params are trainable
                
                # Categorize layers
                if 'conv' in name.lower():
                    layer_info['Conv Layers'] = layer_info.get('Conv Layers', 0) + param_count
                elif 'bn' in name.lower() or 'batchnorm' in name.lower():
                    layer_info['BatchNorm Layers'] = layer_info.get('BatchNorm Layers', 0) + param_count
                elif 'fc' in name.lower() or 'linear' in name.lower():
                    layer_info['Linear Layers'] = layer_info.get('Linear Layers', 0) + param_count
                elif 'embed' in name.lower():
                    layer_info['Embedding Layers'] = layer_info.get('Embedding Layers', 0) + param_count
                else:
                    layer_info['Other Layers'] = layer_info.get('Other Layers', 0) + param_count
        
        return total_params, trainable_params, layer_info
    
    total_params, trainable_params, layer_breakdown = analyze_parameters(model_state)
    
    analysis_results['Total Parameters'] = f"{total_params:,}"
    analysis_results['Trainable Parameters'] = f"{trainable_params:,}"
    analysis_results['Model Size (MB)'] = f"{total_params * 4 / (1024**2):.2f}"  # Assuming float32
    
    # Add layer breakdown
    for layer_type, param_count in layer_breakdown.items():
        analysis_results[f'{layer_type} Parameters'] = f"{param_count:,}"
    
    # =========================
    # 3. MODEL ARCHITECTURE ANALYSIS
    # =========================
    print("🏗️ Analyzing model architecture...")
    
    try:
        # Try to load config if provided
        if config_path and os.path.exists(config_path):
            import yaml
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
            
            analysis_results['Model Type'] = config.get('model_cfg', {}).get('model', 'Unknown')
            analysis_results['Backbone'] = config.get('model_cfg', {}).get('backbone_cfg', {}).get('type', 'Unknown')
            analysis_results['Dataset'] = config.get('data_cfg', {}).get('dataset_name', 'Unknown')
            
            # Training configuration
            analysis_results['Learning Rate'] = config.get('optimizer_cfg', {}).get('lr', 'Unknown')
            analysis_results['Batch Size'] = config.get('trainer_cfg', {}).get('sampler', {}).get('batch_size', 'Unknown')
            analysis_results['Total Iterations'] = config.get('trainer_cfg', {}).get('total_iter', 'Unknown')
            
    except Exception as e:
        print(f"⚠️ Could not load config: {e}")
    
    # Analyze layer structure from state dict
    layer_names = list(model_state.keys())
    analysis_results['Total Layers'] = len(layer_names)
    
    # Count different layer types
    conv_layers = len([name for name in layer_names if 'conv' in name.lower() and 'weight' in name])
    bn_layers = len([name for name in layer_names if ('bn' in name.lower() or 'batchnorm' in name.lower()) and 'weight' in name])
    fc_layers = len([name for name in layer_names if ('fc' in name.lower() or 'linear' in name.lower()) and 'weight' in name])
    
    analysis_results['Conv Layers Count'] = conv_layers
    analysis_results['BatchNorm Layers Count'] = bn_layers
    analysis_results['Linear Layers Count'] = fc_layers
    
    # =========================
    # 4. MEMORY ANALYSIS
    # =========================
    print("💾 Analyzing memory requirements...")
    
    # Calculate memory footprint
    param_memory = sum(param.numel() * param.element_size() for param in model_state.values() if isinstance(param, torch.Tensor))
    analysis_results['Parameter Memory (MB)'] = f"{param_memory / (1024**2):.2f}"
    
    # Estimate activation memory (rough estimate for typical gait input)
    # Assuming input size: (batch=8, frames=30, channels=1, height=64, width=44)
    estimated_activation_memory = 8 * 30 * 1 * 64 * 44 * 4  # 4 bytes per float32
    analysis_results['Estimated Activation Memory (MB)'] = f"{estimated_activation_memory / (1024**2):.2f}"
    
    # =========================
    # 5. PERFORMANCE METRICS
    # =========================
    print("⚡ Analyzing performance characteristics...")
    
    # Try to estimate FLOPs (simplified)
    def estimate_flops(state_dict):
        total_flops = 0
        for name, param in state_dict.items():
            if 'conv' in name.lower() and 'weight' in name:
                # Rough FLOP estimation for conv layers
                if param.dim() == 4:  # Conv2D
                    out_channels, in_channels, kh, kw = param.shape
                    # Assuming typical gait input resolution
                    total_flops += out_channels * in_channels * kh * kw * 64 * 44  # Rough estimate
            elif ('fc' in name.lower() or 'linear' in name.lower()) and 'weight' in name:
                # FLOP estimation for linear layers
                if param.dim() == 2:
                    total_flops += param.shape[0] * param.shape[1]
        return total_flops
    
    estimated_flops = estimate_flops(model_state)
    analysis_results['Estimated FLOPs'] = f"{estimated_flops:,}"
    analysis_results['Estimated GFLOPs'] = f"{estimated_flops / (10**9):.2f}"
    
    # =========================
    # 6. FILE ANALYSIS
    # =========================
    print("📁 Analyzing checkpoint file...")
    
    file_size = os.path.getsize(model_path)
    analysis_results['Checkpoint Size (MB)'] = f"{file_size / (1024**2):.2f}"
    analysis_results['File Extension'] = Path(model_path).suffix
    
    # Check compression ratio
    try:
        import gzip
        with open(model_path, 'rb') as f:
            original_size = len(f.read())
        
        compressed_size = len(gzip.compress(open(model_path, 'rb').read()))
        compression_ratio = compressed_size / original_size
        analysis_results['Compression Ratio'] = f"{compression_ratio:.3f}"
    except:
        analysis_results['Compression Ratio'] = "N/A"
    
    # =========================
    # 7. LAYER DISTRIBUTION ANALYSIS
    # =========================
    print("📊 Analyzing layer distribution...")
    
    # Analyze parameter distribution across layers
    layer_sizes = {}
    for name, param in model_state.items():
        if isinstance(param, torch.Tensor) and 'weight' in name:
            layer_name = name.replace('.weight', '')
            layer_sizes[layer_name] = param.numel()
    
    if layer_sizes:
        # Find largest and smallest layers
        largest_layer = max(layer_sizes, key=layer_sizes.get)
        smallest_layer = min(layer_sizes, key=layer_sizes.get)
        
        analysis_results['Largest Layer'] = f"{largest_layer} ({layer_sizes[largest_layer]:,} params)"
        analysis_results['Smallest Layer'] = f"{smallest_layer} ({layer_sizes[smallest_layer]:,} params)"
        analysis_results['Avg Layer Size'] = f"{np.mean(list(layer_sizes.values())):.0f}"
    
    # =========================
    # 8. TRAINING INFORMATION
    # =========================
    print("🎯 Extracting training information...")
    
    if 'optimizer' in checkpoint:
        analysis_results['Optimizer State Available'] = "Yes"
    else:
        analysis_results['Optimizer State Available'] = "No"
    
    if 'scheduler' in checkpoint:
        analysis_results['Scheduler State Available'] = "Yes"
    else:
        analysis_results['Scheduler State Available'] = "No"
    
    # Extract any loss information
    if 'loss' in checkpoint:
        analysis_results['Final Loss'] = f"{checkpoint['loss']:.6f}"
    
    # =========================
    # 9. COMPATIBILITY ANALYSIS
    # =========================
    print("🔧 Analyzing compatibility...")
    
    analysis_results['PyTorch Version Compatible'] = f"PyTorch {torch.__version__}"
    analysis_results['CUDA Available'] = torch.cuda.is_available()
    
    if torch.cuda.is_available():
        analysis_results['CUDA Version'] = torch.version.cuda
        analysis_results['GPU Count'] = torch.cuda.device_count()
    
    # =========================
    # 10. CREATE SUMMARY TABLE
    # =========================
    print("📋 Creating summary table...")
    
    # Convert to DataFrame for nice display
    df = pd.DataFrame.from_dict(analysis_results, orient='index', columns=['Value'])
    df.index.name = 'Metric'
    
    # Add categories for better organization
    categories = []
    for metric in df.index:
        if metric in ['Model Path', 'Model Type', 'Backbone', 'Dataset']:
            categories.append('Basic Info')
        elif 'Parameters' in metric or 'Size' in metric:
            categories.append('Architecture')
        elif 'Memory' in metric or 'FLOP' in metric:
            categories.append('Performance')
        elif 'Layer' in metric:
            categories.append('Structure')
        elif any(word in metric for word in ['Iteration', 'Loss', 'Learning', 'Batch', 'Optimizer']):
            categories.append('Training')
        elif any(word in metric for word in ['File', 'Checkpoint', 'Compression']):
            categories.append('Storage')
        else:
            categories.append('Other')
    
    df['Category'] = categories
    
    # Reorder columns
    df = df[['Category', 'Value']]
    
    print("✅ Analysis complete!")
    
    return df

def save_analysis_table(df, output_path="model_analysis.csv", format_type="csv"):
    """Save the analysis table in various formats"""
    
    if format_type.lower() == "csv":
        df.to_csv(output_path)
    elif format_type.lower() == "excel":
        df.to_excel(output_path.replace('.csv', '.xlsx'))
    elif format_type.lower() == "html":
        df.to_html(output_path.replace('.csv', '.html'))
    
    print(f"💾 Analysis saved to {output_path}")

def display_analysis(df, group_by_category=True):
    """Display the analysis table in a formatted way"""
    
    print("=" * 80)
    print("🚀 GAIT MODEL ANALYSIS REPORT")
    print("=" * 80)
    
    if group_by_category:
        # Group by category for better readability
        for category in df['Category'].unique():
            print(f"\n📊 {category.upper()}")
            print("-" * 40)
            category_df = df[df['Category'] == category]
            for metric, row in category_df.iterrows():
                print(f"{metric:<30}: {row['Value']}")
    else:
        # Display all at once
        for metric, row in df.iterrows():
            print(f"{metric:<30}: {row['Value']}")
    
    print("=" * 80)



def analyze_my_gait_model():
    ''' # Usage '''
    # Define paths 
    model_path = "/kaggle/working/OpenGait/output/CASIA-B/GaitGL/GaitGL/checkpoints/GaitGL-80000.pt"
    config_path = "/kaggle/working/OpenGait/configs/gaitgl/gaitgl.yaml"
    
    # Check if model exists
    if not os.path.exists(model_path):
        print("❌ Model checkpoint not found!")
        print("Available checkpoints:")
        checkpoint_dir = "/kaggle/working/OpenGait/output/CASIA-B/GaitGL/GaitGL/checkpoints/"
        if os.path.exists(checkpoint_dir):
            for file in os.listdir(checkpoint_dir):
                if file.endswith('.pt'):
                    print(f"  📁 {file}")
        return None
    
    # Run analysis
    analysis_df = analyze_gait_model(
        model_path=model_path,
        config_path=config_path,
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )

    
    
    # Display results
    display_analysis(analysis_df)
    
    # Save results
    save_analysis_table(analysis_df, "original_gaitGL_analysis-1.csv")
    save_analysis_table(analysis_df, "original_gaitGL_-1.xlsx", "excel")
    
    return analysis_df


# Run the analysis
if __name__ == "__main__":
    df = analyze_my_gait_model()

🔍 Starting comprehensive gait model analysis...
📊 Analyzing basic model information...
🔢 Analyzing model parameters...
🏗️ Analyzing model architecture...
💾 Analyzing memory requirements...
⚡ Analyzing performance characteristics...
📁 Analyzing checkpoint file...
📊 Analyzing layer distribution...
🎯 Extracting training information...
🔧 Analyzing compatibility...
📋 Creating summary table...
✅ Analysis complete!
🚀 GAIT MODEL ANALYSIS REPORT

📊 BASIC INFO
----------------------------------------
Model Path                    : /kaggle/working/OpenGait/output/CASIA-B/GaitGL/GaitGL/checkpoints/GaitGL-80000.pt
Model Type                    : GaitGL
Backbone                      : Unknown
Dataset                       : CASIA-B

📊 STORAGE
----------------------------------------
Checkpoint Keys               : ['model', 'optimizer', 'scheduler', 'iteration']
File Extension                : .pt
Compression Ratio             : 0.916

📊 TRAINING
----------------------------------------
Training It