In [1]:
import time
notebook_start = time.time()

In [2]:
# Cell 1: Complete Environment Setup
# ========================================================

# 1. First, clean up everything - more thorough cleanup
import sys
import shutil
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
import json

# Clean up problematic installations
!pip uninstall -y numpy torch torchvision torchaudio transformers peft bitsandbytes 2>/dev/null || echo "No packages to uninstall"

# Remove problematic directories manually
problematic_path = "/usr/local/lib/python3.11/dist-packages/~vidia-cudnn-cu12"
if os.path.exists(problematic_path):
    shutil.rmtree(problematic_path)

# Clear pip cache
!pip cache purge

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: torch 2.2.1+cu121
Uninstalling torch-2.2.1+cu121:
  Successfully uninstalled torch-2.2.1+cu121
Found existing installation: torchvision 0.17.1+cu121
Uninstalling torchvision-0.17.1+cu121:
  Successfully uninstalled torchvision-0.17.1+cu121
Found existing installation: torchaudio 2.2.1+cu121
Uninstalling torchaudio-2.2.1+cu121:
  Successfully uninstalled torchaudio-2.2.1+cu121
Found existing installation: transformers 4.41.2
Uninstalling transformers-4.41.2:
  Successfully uninstalled transformers-4.41.2
Found existing installation: peft 0.10.0
Uninstalling peft-0.10.0:
  Successfully uninstalled peft-0.10.0
Found existing installation: bitsandbytes 0.43.0
Uninstalling bitsandbytes-0.43.0:
  Successfully uninstalled bitsandbytes-0.43.0
Files removed: 138


In [3]:
pip install -U langchain-community

Collecting numpy>=1.26.2 (from langchain-community)
  Downloading numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Downloading numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
accelerate 0.29.1 requires torch>=1.10.0, which is not installed.
easyocr 1.7.2 requires torch, which is not installed.
easyocr 1.7.2 requires torchvision>=0.5, which is not installed.
torchmetrics 1.7.1 requires torch>=2.0.0, which is not installed.
pytorch-lightning 2.5

In [6]:
# Create a virtual environment to isolate our dependencies
!python -m venv myenv
!source myenv/bin/activate

# Upgrade pip and setuptools first
!pip install --upgrade pip setuptools wheel

# Install core numerical packages with pinned versions
!pip install numpy==1.26.4 scipy==1.11.4

# Install PyTorch CPU version
!pip install torch==2.2.1+cpu torchvision==0.17.1+cpu torchaudio==2.2.1+cpu --index-url https://download.pytorch.org/whl/cpu

# Install transformer ecosystem with compatible versions
!pip install \
    transformers==4.41.2 \
    peft==0.10.0 \
    datasets==2.18.0 \
    accelerate==0.29.1 \
    einops==0.7.0 \
    tokenizers==0.19.1 \
    sentencepiece==0.2.0

# Install required ML utilities
!pip install scikit-learn==1.2.2 matplotlib==3.7.2

# Install project-specific dependencies
!pip install \
    langchain==0.1.16 \
    faiss-cpu==1.7.4 \
    tqdm==4.66.2 \
    pandas==2.2.2 \
    google-auth==2.38.0

# Verify installations
import sys
import numpy as np
import scipy
import torch
import transformers

print("\n=== Core Package Versions ===")
print(f"Python: {sys.version}")
print(f"NumPy: {np.__version__}")
print(f"SciPy: {scipy.__version__}")
print(f"PyTorch: {torch.__version__}")
print(f"Transformers: {transformers.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Now import your project packages
from datasets import Dataset, load_dataset, DatasetDict
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling
)
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import re

print("\n=== All Packages Imported Successfully ===")

Error: Command '['/kaggle/working/myenv/bin/python3', '-m', 'ensurepip', '--upgrade', '--default-pip']' returned non-zero exit status 1.
/bin/bash: line 1: myenv/bin/activate: No such file or directory
Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Collecting setuptools
  Downloading setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading setuptools-80.9.0-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: setuptools, pip
  Attempting uninstall: setuptools
    Found existing installation: setuptools 75.2.0
    Uninstalling setuptools-75.2.0:
      Successfully uninstalled setuptools-75.2.0
  Attempting uninstall: pip
    Found 

In [7]:
# Model Loading
# Define MODEL_NAME at the top of the cell (should match what is used in Cell 1)
MODEL_NAME = "gpt2"  # Change to "meta-llama/Llama-2-7b-chat-hf" for Llama

def print_memory():
    """Memory usage diagnostics for the environment"""
    ram = psutil.virtual_memory()
    print(f"RAM: {ram.percent:.1f}% ({ram.used/1024**3:.1f}/{ram.total/1024**3:.1f}GB)")

def load_model(model_name):
    """CPU-compatible model loading with safe error handling"""
    print(f"\n=== Loading Model: {model_name} ===")
    print_memory()
    
    # CPU-specific settings
    device = "cpu"
    torch_dtype = torch.float32
    
    # Check if we should trust remote code
    trust_remote_code = "phi" in model_name.lower()
    
    try:
        # Attempt 1: Try standard CPU load
        print("Attempting standard CPU load...")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            trust_remote_code=trust_remote_code,
            device_map=None,
            torch_dtype=torch_dtype
        ).to(device)
        
        print("\n✅ Model loaded successfully on CPU!")
        print_memory()
        return model
        
    except Exception as e:
        print(f"\n❌ Standard load failed: {str(e)}")
        
        try:
            # Attempt 2: Try with safe tensors
            print("Attempting safe tensors load...")
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                trust_remote_code=trust_remote_code,
                device_map=None,
                torch_dtype=torch_dtype,
                use_safetensors=True
            ).to(device)
            
            print("\n✅ Model loaded with safe tensors!")
            print_memory()
            return model
            
        except Exception as e:
            print(f"\n❌ Safe tensors load failed: {str(e)}")
            
            try:
                # Attempt 3: Try FP16 fallback
                print("Attempting FP16 fallback...")
                model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    trust_remote_code=trust_remote_code,
                    device_map=None,
                    torch_dtype=torch.float16
                ).to(device).float()  # Convert to FP32 after loading
                
                print("\n✅ Model loaded with FP16 fallback!")
                print_memory()
                return model
                
            except Exception as e:
                print(f"\n❌ All loading attempts failed: {str(e)}")
                raise RuntimeError("Unable to load model on CPU")

model = load_model(MODEL_NAME)


=== Loading Model: gpt2 ===
RAM: 5.0% (1.1/31.4GB)
Attempting standard CPU load...

✅ Model loaded successfully on CPU!
RAM: 6.6% (1.6/31.4GB)


In [8]:
# Tokenizer Setup
def load_tokenizer(model_name):
    """Load and configure tokenizer"""
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True,
            padding_side="right"
        )
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        print("Tokenizer loaded successfully")
        return tokenizer
    except Exception as e:
        print(f"Tokenizer loading failed: {str(e)}")
        raise

tokenizer = load_tokenizer(MODEL_NAME)

Tokenizer loaded successfully


In [9]:
# Robust Data Preparation
# 1. Dataset preparation with multiple fallbacks
def prepare_dataset(file_path="/kaggle/input/database4", max_samples=1000):
    """Prepare dataset with robust error handling"""
    try:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"❌ Path not found: {file_path}")
            
        # Load JSONL file directly using datasets library
        dataset = load_dataset('json', data_files=file_path, split='train[:{}]'.format(max_samples))
        
        # If the dataset has multiple columns, we just want the text
        if 'text' not in dataset.features:
            # Try to find the first text-like column
            text_columns = [col for col in dataset.features if any(t in col.lower() for t in ['text', 'content', 'body'])]
            if text_columns:
                dataset = dataset.rename_column(text_columns[0], 'text')
            else:
                # If no text column found, combine all string columns
                string_columns = [col for col in dataset.features if dataset.features[col].dtype == 'string']
                if string_columns:
                    def combine_columns(examples):
                        return {'text': ' '.join(str(examples[col]) for col in string_columns)}
                    dataset = dataset.map(combine_columns)
                else:
                    raise ValueError("No text columns found in dataset")
        
        print(f"✅ Loaded dataset with {len(dataset)} samples")
        return dataset
        
    except Exception as e:
        print(f"\n❌ Dataset preparation failed: {str(e)}")
        print("Creating minimal fallback dataset...")
        return Dataset.from_dict({"text": ["Sample text " + str(i) for i in range(10)]})

# 2. Tokenization function
def safe_tokenize(examples):
    """Tokenization with explicit numpy workarounds"""
    try:
        tokenized = tokenizer(
            examples["text"],
            truncation=True,
            max_length=160,
            padding="max_length",
            return_tensors="pt"
        )
        # Convert to lists explicitly
        return {
            "input_ids": tokenized["input_ids"].tolist(),
            "attention_mask": tokenized["attention_mask"].tolist(),
            "labels": tokenized["input_ids"].tolist()
        }
    except RuntimeError as e:
        if "Numpy is not available" in str(e):
            # Fallback using pure Python
            return {
                "input_ids": [[0]*160],
                "attention_mask": [[1]*160],
                "labels": [[0]*160]
            }
        raise

print("\n=== Starting Processing ===")
try:
    dataset = prepare_dataset()
    
    # Small batch test first
    test_batch = dataset.select(range(2))
    test_tokenized = test_batch.map(safe_tokenize, batched=True)
    
    # If test succeeds, process full dataset
    tokenized_dataset = dataset.map(safe_tokenize, batched=True, batch_size=4)
    
    # Set format for PyTorch
    tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    
    print("✅ Processing completed successfully!")
    
except Exception as e:
    print(f"\n❌ Error during processing: {str(e)}")
    print("Creating minimal fallback tokenized dataset...")
    # Create a Dataset instead of DatasetDict for consistency
    tokenized_dataset = Dataset.from_dict({
        "input_ids": [[0,1,2,3]],
        "attention_mask": [[1,1,1,1]],
        "labels": [[0,1,2,3]]
    })
    tokenized_dataset.set_format(type='torch')

# Split dataset
if not isinstance(tokenized_dataset, DatasetDict):
    try:
        # Ensure we have at least 2 samples to split
        if len(tokenized_dataset) < 2:
            # Create dummy data for splitting
            dummy_data = {
                "input_ids": [[0,1,2,3] for _ in range(2)],
                "attention_mask": [[1,1,1,1] for _ in range(2)],
                "labels": [[0,1,2,3] for _ in range(2)]
            }
            tokenized_dataset = Dataset.from_dict(dummy_data)
        
        split_datasets = tokenized_dataset.train_test_split(test_size=0.1)
        tokenized_dataset = DatasetDict({
            "train": split_datasets["train"],
            "test": split_datasets["test"]
        })
        print(f"✅ Dataset split: {len(tokenized_dataset['train'])} train, {len(tokenized_dataset['test'])} test")
    except Exception as e:
        print(f"❌ Dataset splitting failed: {str(e)}")
        print("Creating minimal DatasetDict...")
        tokenized_dataset = DatasetDict({
            "train": Dataset.from_dict({
                "input_ids": [[0,1,2,3]],
                "attention_mask": [[1,1,1,1]],
                "labels": [[0,1,2,3]]
            }),
            "test": Dataset.from_dict({
                "input_ids": [[4,5,6,7]],
                "attention_mask": [[1,1,1,1]],
                "labels": [[4,5,6,7]]
            })
        })
else:
    # Ensure we have both train and test splits
    if "train" not in tokenized_dataset or "test" not in tokenized_dataset:
        print("⚠️ DatasetDict missing train/test splits. Creating from existing data...")
        if "train" in tokenized_dataset:
            train_dataset = tokenized_dataset["train"]
            test_size = min(10, len(train_dataset) // 10)
            split_datasets = train_dataset.train_test_split(test_size=test_size)
            tokenized_dataset = DatasetDict({
                "train": split_datasets["train"],
                "test": split_datasets["test"]
            })
        else:
            print("Creating minimal DatasetDict...")
            tokenized_dataset = DatasetDict({
                "train": Dataset.from_dict({
                    "input_ids": [[0,1,2,3]],
                    "attention_mask": [[1,1,1,1]],
                    "labels": [[0,1,2,3]]
                }),
                "test": Dataset.from_dict({
                    "input_ids": [[4,5,6,7]],
                    "attention_mask": [[1,1,1,1]],
                    "labels": [[4,5,6,7]]
                })
            })

print(f"Final dataset structure: {type(tokenized_dataset)}")
if isinstance(tokenized_dataset, DatasetDict):
    print(f"- Train samples: {len(tokenized_dataset['train'])}")
    print(f"- Test samples: {len(tokenized_dataset['test'])}")
else:
    print(f"- Total samples: {len(tokenized_dataset)}")


=== Starting Processing ===

❌ Dataset preparation failed: Unable to find '/kaggle/input/database4'
Creating minimal fallback dataset...


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

✅ Processing completed successfully!
✅ Dataset split: 9 train, 1 test
Final dataset structure: <class 'datasets.dataset_dict.DatasetDict'>
- Train samples: 9
- Test samples: 1


In [11]:
# Training Configuration
# Enable gradient checkpointing to save memory
model.gradient_checkpointing_enable()

# LoRA configuration
from peft import LoraConfig

peft_config = LoraConfig(
    r=16,  
    lora_alpha=32,
    target_modules = ["attn.c_attn", "attn.c_proj", "mlp.c_fc", "mlp.c_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    fan_in_fan_out=False
)

# Training arguments optimized for Kaggle
training_args = TrainingArguments(
    output_dir=f"./{MODEL_NAME.split('/')[-1]}-crypto-expert",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-5,
    optim="adamw_torch",
    logging_steps=20,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_steps=500,
    fp16=torch.cuda.is_available(),
    max_grad_norm=0.3,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    report_to="none",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

# Prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

# Print trainable parameters
model.print_trainable_parameters()

trainable params: 2,359,296 || all params: 126,799,104 || trainable%: 1.8606566809809635




In [12]:
!pip install memory_profiler
from memory_profiler import profile

# 2. Tokenization function with memory optimizations
def safe_tokenize(examples):
    """Tokenization with explicit numpy workarounds and memory efficiency"""
    try:
        # Process text in chunks to avoid OOM
        tokenized = tokenizer(
            examples["text"],
            truncation=True,
            max_length=160,
            padding="max_length",
            return_tensors="pt",
            add_special_tokens=True
        )
        # Convert to lists and immediately free GPU memory
        result = {
            "input_ids": tokenized["input_ids"].cpu().tolist(),
            "attention_mask": tokenized["attention_mask"].cpu().tolist(),
            "labels": tokenized["input_ids"].cpu().tolist()
        }
        del tokenized  # Explicitly delete to free memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        return result
        
    except RuntimeError as e:
        if "out of memory" in str(e).lower():
            print("⚠️ OOM during tokenization. Using fallback")
            return empty_tokenization(160)
        elif "Numpy is not available" in str(e):
            print("⚠️ Numpy error. Using fallback")
            return empty_tokenization(160)
        raise

def empty_tokenization(length):
    """Create empty tokenization to prevent crashes"""
    return {
        "input_ids": [[0]*length],
        "attention_mask": [[1]*length],
        "labels": [[0]*length]
    }

# 3. Main processing function with memory profiling
@profile
def process_dataset():
    try:
        print("\n=== Starting Processing ===")
        dataset = prepare_dataset()
        
        # Small batch test first
        test_batch = dataset.select(range(2))
        test_tokenized = test_batch.map(safe_tokenize, batched=True)
        
        # Full processing with memory optimizations
        tokenized_dataset = dataset.map(
            safe_tokenize,
            batched=True,
            batch_size=8,  # Increased for better efficiency
            remove_columns=dataset.column_names,  # Critical for memory savings
            load_from_cache_file=False  # Prevents cache buildup
        )
        
        tokenized_dataset.set_format(type='torch')
        print(f"✅ Processing completed! Final size: {len(tokenized_dataset)} samples")
        return tokenized_dataset
        
    except Exception as e:
        print(f"\n❌ Critical Error: {str(e)}")
        print("Creating minimal fallback dataset...")
        return Dataset.from_dict({
            "input_ids": [torch.tensor([0]*160)],
            "attention_mask": [torch.tensor([1]*160)],
            "labels": [torch.tensor([0]*160)]
        })

# 4. Execute processing
final_dataset = process_dataset()

# 5. Verify output
print("\nDataset sample:", final_dataset[0])
print("Memory profiling complete!")

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0
ERROR: Could not find file /tmp/ipykernel_239/496354247.py

=== Starting Processing ===

❌ Dataset preparation failed: Unable to find '/kaggle/input/database4'
Creating minimal fallback dataset...


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

✅ Processing completed! Final size: 10 samples

Dataset sample: {'input_ids': tensor([36674,  2420,   657, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 

In [18]:
# Cell 6: Training Execution
# =========================

def train_model(model, tokenized_dataset, training_args):
    """Execute the training process"""
    # Disable cache if gradient checkpointing is enabled
    if training_args.gradient_checkpointing:
        model.config.use_cache = False
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset["train"],
        eval_dataset=tokenized_dataset["test"]
    )
    
    print("Starting training...")
    print_memory()
    trainer.train()
    print("Training completed!")
    return trainer

trainer = train_model(model, tokenized_dataset, training_args)

RecursionError: maximum recursion depth exceeded

In [None]:
# Cell 7: Enhanced Model Saving with Shard Support
# ===============================================

def save_model_artifacts(
    model, 
    tokenizer, 
    training_args: Optional[TrainingArguments] = None, 
    output_dir: str = "/kaggle/working/gpt2-lora-trained"
) -> str:
    """
    Save all model artifacts with comprehensive verification.
    Handles both single-file and sharded model formats.
    """
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    print(f"\n💾 Saving model artifacts to: {output_dir}")
    
    # For LoRA models - DON'T merge adapters before saving
    # We want to save the adapter separately
    print("💽 Saving model and adapter...")
    
    # Save the entire model (base model + adapter)
    model.save_pretrained(
        output_dir,
        safe_serialization=True,
        state_dict=model.state_dict()  # Save the complete state including LoRA
    )
    
    # Save tokenizer
    print("🔤 Saving tokenizer...")
    tokenizer.save_pretrained(output_dir)
    
    # Save training arguments if provided
    if training_args is not None:
        print("📝 Saving training arguments...")
        try:
            args_path = os.path.join(output_dir, "training_args.json")
            if hasattr(training_args, 'to_dict'):
                with open(args_path, "w") as f:
                    json.dump(training_args.to_dict(), f, indent=2)
            elif hasattr(training_args, 'to_json_string'):
                with open(args_path, "w") as f:
                    f.write(training_args.to_json_string())
            else:
                print("⚠️ Warning: TrainingArguments has no serialization method")
        except Exception as e:
            print(f"⚠️ Warning: Failed to save training args - {str(e)}")
    
    # Verify the adapter files were saved
    required_files = ['adapter_config.json', 'adapter_model.safetensors']
    missing_files = []
    for file in required_files:
        if not os.path.exists(os.path.join(output_dir, file)):
            missing_files.append(file)
    
    if missing_files:
        print(f"\n⚠️ Warning: Missing adapter files: {missing_files}")
        print("Trying alternative save method...")
        # Explicitly save the adapter
        model.save_pretrained(
            output_dir,
            safe_serialization=True,
            adapter_only=True  # This ensures adapter files are saved
        )
    
    print("\n🔍 Verifying saved files:")
    for file in os.listdir(output_dir):
        size = os.path.getsize(os.path.join(output_dir, file)) / 1024
        print(f"- {file} ({size:.2f} KB)")
    
    return output_dir

In [None]:
# Cell 8: Robust Model Loading and Testing with PEFT support
# ========================================================
def load_and_test_model(
    model_path: str = "/kaggle/working/gpt2-lora-trained", 
    max_length: int = 160,
    test_prompts: Optional[list] = None,
    is_peft_model: bool = True
):
    """
    Load and test a saved model with comprehensive error handling
    """
    print(f"\n🔍 Preparing to load model from: {model_path}")
    
    # Verify model directory exists
    if not os.path.exists(model_path):
        raise ValueError(f"Model directory {model_path} does not exist")
    
    # Show directory contents for debugging
    print("\n📂 Model directory contents:")
    for f in sorted(os.listdir(model_path)):
        size = os.path.getsize(os.path.join(model_path, f)) / 1024
        print(f"- {f} ({size:.2f} KB)")
    
    try:
        print("\n🔄 Loading tokenizer...")

        tokenizer = AutoTokenizer.from_pretrained(
            model_path,
            local_files_only=True
        )
        
        print("\n🔄 Loading model...")
        if is_peft_model:
            # First check if we have adapter files
            adapter_files = [
                f for f in os.listdir(model_path) 
                if f.startswith('adapter_') or f == 'adapter_config.json'
            ]
            
            if not adapter_files:
                print("⚠️ No adapter files found. Loading as regular model.")
                model = AutoModelForCausalLM.from_pretrained(
                    model_path,
                    device_map="auto",
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    local_files_only=True
                )
            else:
                print(f"Found adapter files: {adapter_files}")
                # Load base model first
                base_model = AutoModelForCausalLM.from_pretrained(
                    model_path,
                    device_map="auto",
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    local_files_only=True
                )
                
                # Then load the PEFT adapter
                model = PeftModel.from_pretrained(
                    base_model,
                    model_path,
                    local_files_only=True
                )
                
                # Merge and unload for inference
                model = model.merge_and_unload()
        else:
            # For regular models
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                device_map="auto",
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                local_files_only=True
            )
            
        print("\n🎉 Model loaded successfully!")
        
        # Default test prompts if none provided
        if test_prompts is None:
            test_prompts = [
                "What is hardware wallet?? ",
                "What is Proof of Work (PoW)?? ",
                "What is cryptography?? ",
                "What is Peer-to-Peer (P2P)?? ",
                "What is block chain?? ",
                "What is private key?? "
            ]
        
        # Create pipeline - REMOVED device parameter since we're using device_map="auto"
        print("\n🚀 Creating text generation pipeline...")
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        
        # Run tests
        print("\n🧪 Running generation tests...")
        for i, prompt in enumerate(test_prompts, 1):
            print(f"\n🔹 Test {i}: {prompt}")
            output = pipe(
                prompt,
                max_length=max_length,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                num_return_sequences=1,
                repetition_penalty=1.2
            )
            print("💬 Response:", output[0]['generated_text'])
            
        return model, tokenizer
        
    except Exception as e:
        print(f"\n❌ Critical error loading model: {str(e)}")
        print("\n🛠️ Debugging info:")
        print(f"- Path: {os.path.abspath(model_path)}")
        print(f"- Directory exists: {os.path.exists(model_path)}")
        if os.path.exists(model_path):
            print("- Contents:", os.listdir(model_path))
        raise

In [None]:
# Main execution
if __name__ == "__main__":
    model_path = "/kaggle/working/gpt2-lora-trained"
    
    # Save model artifacts
    save_model_artifacts(model, tokenizer, training_args)
    
    # Load with explicit path and PEFT flag
    load_and_test_model(model_path, is_peft_model=True)
    
    # Test with custom prompts
    custom_prompts = [
        "What is software wallet, and what's the difference between hardware and software wallet? ",
        "What is PoW? ",
        "Explain PoW in 1 sentence. ",
        "Describe the key features of PoW using 3 words. ",
        "What is PoM? Is it something related to cryptography? ",
        "What is a cryptographic product? ",
        "What is P2P? ",
        "What is block chain? ",
        "What is public key, and what's the difference between private and public key? "
    ]
    load_and_test_model(model_path, test_prompts=custom_prompts, is_peft_model=True)

In [None]:

notebook_end = time.time()
print(f"Total notebook execution time: {notebook_end - notebook_start:.2f} seconds")