In [11]:
import time
notebook_start = time.time()

In [12]:
# Kaggle Environment Setup
import os
import sys
import torch
import psutil

print("=== Initializing Environment ===")

# Verify GPU availability
if torch.cuda.is_available():
    print(f"GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
else:
    print("No GPU detected - running in CPU mode")

# Memory diagnostics
def print_memory():
    if torch.cuda.is_available():
        gpu_mem = torch.cuda.memory_allocated() / 1024**3
        print(f"GPU Memory: {gpu_mem:.2f}GB", end=" | ")
    ram = psutil.virtual_memory()
    print(f"RAM: {ram.percent}% ({ram.used/1024**3:.1f}/{ram.total/1024**3:.1f}GB)")

print("\nInitial system status:")
print_memory()

=== Initializing Environment ===
No GPU detected - running in CPU mode

Initial system status:
RAM: 5.2% (1.2/31.4GB)


In [13]:
# Install required packages with version pinning
!pip install -q torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121
!pip install -q transformers==4.41.2 datasets==2.18.0 peft==0.10.0 accelerate==0.29.1 bitsandbytes==0.43.0

# Verify installations
import importlib
for pkg in ['torch', 'transformers', 'datasets', 'peft', 'bitsandbytes']:
    try:
        importlib.import_module(pkg)
        print(f"✅ {pkg} installed successfully")
    except ImportError:
        print(f"❌ {pkg} not installed")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m✅ torch installed successfully
✅ transformers installed successfully
✅ datasets installed successfully
✅ peft installed successfully
✅ bitsandbytes installed successfully


In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import numpy as np
from collections import defaultdict
from typing import Dict, List, Tuple, Any
from datasets import Dataset, load_dataset
import json
import matplotlib.pyplot as plt
from IPython.display import clear_output
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import TrainingArguments
import shutil

In [15]:
MODEL_NAME = "gpt2"  # Using GPT-2 for Kaggle compatibility

def load_model(model_name: str):
    """Robust model loading with fallbacks"""
    print(f"\n=== Loading {model_name} ===")
    
    # Configure tokenizer first
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        tokenizer.pad_token = tokenizer.eos_token
        print("✅ Tokenizer loaded successfully")
    except Exception as e:
        print(f"❌ Tokenizer loading failed: {e}")
        raise

    # Configure quantization if GPU available
    if torch.cuda.is_available():
        print("Configuring for GPU with 4-bit quantization")
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16
        )
    else:
        print("Configuring for CPU without quantization")
        bnb_config = None

    # Attempt model loading with progressive fallbacks
    try:
        # First try with full configuration
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=bnb_config,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        print("✅ Model loaded with configured settings")
    except Exception as e:
        print(f"⚠️ Primary load failed: {e}")
        print("Attempting fallback to basic CPU loading...")
        try:
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="cpu",
                torch_dtype=torch.float32
            )
            print("✅ Model loaded on CPU")
        except Exception as e:
            print(f"❌ All loading attempts failed: {e}")
            raise

    print("\nFinal memory status:")
    print_memory()
    return model, tokenizer

# Load model and tokenizer
try:
    model, tokenizer = load_model(MODEL_NAME)
except Exception as e:
    print(f"\n❌ Critical error loading model: {e}")
    raise


=== Loading gpt2 ===
✅ Tokenizer loaded successfully
Configuring for CPU without quantization
✅ Model loaded with configured settings

Final memory status:
RAM: 5.2% (1.2/31.4GB)


In [16]:
def analyze_vocab_coverage(tokenizer, domain="cryptocurrency"):
    """Comprehensive vocabulary coverage analysis for specific domains"""
    # Define domain-specific term lists
    domains = {
        "cryptocurrency": [
            "blockchain", "cryptocurrency", "decentralized", "ledger", "mining", 
            "wallet", "hardware wallet", "software wallet", "private key", "public key", 
            "address", "transaction", "consensus", "proof of work", "proof of stake",
            "smart contract", "token", "exchange", "hash", "digital signature",
            "crypto", "bitcoin", "ethereum", "altcoin", "defi", "nft", "dao",
            "gas fee", "block reward", "halving", "fork", "node", "validator",
            "staking", "liquidity pool", "yield farming", "oracle", "zk-snark",
            "segwit", "taproot", "lightning network", "sidechain", "atomic swap"
        ],
        "finance": [
            "stock", "bond", "dividend", "portfolio", "asset allocation",
            "market cap", "liquidity", "volatility", "yield", "interest rate",
            "inflation", "deflation", "recession", "bull market", "bear market"
        ],
        "technology": [
            "algorithm", "encryption", "database", "cloud computing", "artificial intelligence",
            "machine learning", "neural network", "quantum computing", "cybersecurity", "blockchain"
        ]
    }
    
    # Select terms based on domain
    domain_terms = domains.get(domain, domains["cryptocurrency"])
    
    # Analyze coverage
    covered = []
    partially_covered = []
    missing = []
    
    for term in domain_terms:
        tokens = tokenizer.tokenize(term)
        token_presence = [token in tokenizer.vocab for token in tokens]
        
        if all(token_presence):
            covered.append(term)
        elif any(token_presence):
            partially_covered.append(term)
        else:
            missing.append(term)
    
    coverage_score = len(covered) / len(domain_terms)
    partial_score = len(partially_covered) / len(domain_terms)
    
    return {
        "domain": domain,
        "coverage_score": coverage_score,
        "partial_coverage_score": partial_score,
        "covered_terms": covered,
        "partially_covered_terms": partially_covered,
        "missing_terms": missing,
        "vocab_size": len(tokenizer.vocab),
        "domain_term_count": len(domain_terms)
    }

def load_and_analyze_tokenizer(model_name="gpt2", domain="cryptocurrency"):
    """Advanced tokenizer loading with domain-specific vocabulary analysis"""
    print(f"\n=== Loading and Analyzing Tokenizer: {model_name} ===")
    print(f"Domain: {domain.capitalize()}")
    
    try:
        # 1. Load tokenizer with error handling
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
        except OSError:
            print(f"⚠️ Model {model_name} not found, using GPT-2 as fallback")
            tokenizer = AutoTokenizer.from_pretrained("gpt2")
        
        # 2. Configure special tokens
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token or "<pad>"
        
        if tokenizer.eos_token is None:
            tokenizer.eos_token = "</s>"
        
        if tokenizer.bos_token is None:
            tokenizer.bos_token = "<s>"
        
        if tokenizer.unk_token is None:
            tokenizer.unk_token = "<unk>"
        
        print(f"✅ Tokenizer loaded successfully | Vocab size: {len(tokenizer.vocab):,}")
        
        # 3. Perform vocabulary analysis
        vocab_report = analyze_vocab_coverage(tokenizer, domain)
        
        # 4. Print detailed report
        print(f"\n📊 Vocabulary Coverage Report ({domain}):")
        print(f"- Full coverage: {vocab_report['coverage_score']:.1%} ({len(vocab_report['covered_terms'])} terms)")
        print(f"- Partial coverage: {vocab_report['partial_coverage_score']:.1%} ({len(vocab_report['partially_covered_terms'])} terms)")
        print(f"- Missing coverage: {1 - vocab_report['coverage_score'] - vocab_report['partial_coverage_score']:.1%} ({len(vocab_report['missing_terms'])} terms)")
        
        if vocab_report['missing_terms']:
            print(f"\n⚠️ Top missing terms:")
            for term in vocab_report['missing_terms'][:5]:
                tokens = tokenizer.tokenize(term)
                missing_tokens = [t for t in tokens if t not in tokenizer.vocab]
                print(f"  - '{term}': Missing tokens - {', '.join(missing_tokens)}")
        
        # 5. Special token verification
        print("\n🔍 Special Token Verification:")
        special_tokens = ["pad_token", "eos_token", "bos_token", "unk_token"]
        for token in special_tokens:
            value = getattr(tokenizer, token, None)
            print(f"- {token}: {value}")

        return tokenizer
        
    except Exception as e:
        print(f"\n❌ Critical error loading tokenizer: {str(e)}")
        print("Attempting minimal tokenizer creation...")
        from transformers import GPT2Tokenizer
        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
        tokenizer.pad_token = tokenizer.eos_token
        print("✅ Created minimal GPT-2 tokenizer")
        return tokenizer

# Load and analyze tokenizer with cryptocurrency domain focus
tokenizer = load_and_analyze_tokenizer("gpt2", domain="cryptocurrency")


=== Loading and Analyzing Tokenizer: gpt2 ===
Domain: Cryptocurrency
✅ Tokenizer loaded successfully | Vocab size: 50,257

📊 Vocabulary Coverage Report (cryptocurrency):
- Full coverage: 100.0% (43 terms)
- Partial coverage: 0.0% (0 terms)
- Missing coverage: 0.0% (0 terms)

🔍 Special Token Verification:
- pad_token: <|endoftext|>
- eos_token: <|endoftext|>
- bos_token: <|endoftext|>
- unk_token: <|endoftext|>


In [17]:
# Data Preparation

def analyze_dataset(dataset, tokenizer) -> Dict:
    """Comprehensive dataset quality analysis with robust error handling"""
    try:
        # Get vocabulary
        vocab = set(tokenizer.get_vocab().keys())
        dataset_tokens = set()
        text_lengths = []
        
        # Process each example
        for example in dataset:
            try:
                # Handle different dataset formats
                text = example['text'] if 'text' in example else str(example)
                tokens = tokenizer.tokenize(text)
                dataset_tokens.update(tokens)
                text_lengths.append(len(tokens))
            except Exception as e:
                print(f"⚠️ Error processing example: {e}")
                continue
        
        # Calculate vocabulary coverage
        coverage = len(dataset_tokens & vocab) / len(vocab) if vocab else 0
        
        # Statistical analysis
        length_stats = {
            'mean': np.mean(text_lengths) if text_lengths else 0,
            'std': np.std(text_lengths) if text_lengths else 0,
            'min': min(text_lengths) if text_lengths else 0,
            'max': max(text_lengths) if text_lengths else 0,
            'percentiles': np.percentile(text_lengths, [25, 50, 75]) if text_lengths else [0, 0, 0]
        }
        
        # Topic diversity analysis
        topics = defaultdict(int)
        for example in dataset:
            try:
                text = example['text'] if 'text' in example else str(example)
                text_lower = text.lower()
                for term in ['blockchain', 'wallet', 'mining', 'crypto', 'token', 
                             'bitcoin', 'ethereum', 'defi', 'nft', 'key']:
                    if term in text_lower:
                        topics[term] += 1
            except:
                continue
        
        return {
            'vocab_coverage': round(coverage, 4),
            'length_stats': length_stats,
            'topic_distribution': dict(topics),
            'total_samples': len(dataset),
            'processed_samples': len(text_lengths)
        }
        
    except Exception as e:
        print(f"❌ Dataset analysis failed: {e}")
        return {
            'vocab_coverage': 0.0,
            'length_stats': {},
            'topic_distribution': {},
            'total_samples': len(dataset),
            'processed_samples': 0
        }

def stratified_sample(dataset, stratify_by: str = 'label', n_samples: int = None) -> Dataset:
    """Stratified sampling for small datasets with fallback"""
    try:
        if n_samples is None:
            n_samples = min(1000, len(dataset))
        
        if stratify_by not in dataset.features:
            print(f"⚠️ Stratification column '{stratify_by}' not found, using random sampling")
            return dataset.select(range(n_samples))
        
        from sklearn.model_selection import train_test_split
        import pandas as pd
        
        df = pd.DataFrame(dataset)
        _, sample = train_test_split(
            df,
            train_size=n_samples,
            stratify=df[stratify_by],
            random_state=42
        )
        return Dataset.from_pandas(sample)
        
    except Exception as e:
        print(f"❌ Stratified sampling failed: {e}")
        print("Using random sampling instead")
        return dataset.select(range(min(n_samples, len(dataset)))) if n_samples else dataset

def prepare_dataset(file_path: str, tokenizer, max_samples: int = 1000) -> Dataset:
    """Robust dataset preparation with quality checks and fallbacks"""
    print(f"\n=== Preparing Dataset: {file_path} ===")
    
    try:
        # Verify file existence
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"Dataset path not found: {file_path}")
            
        # Load dataset
        try:
            dataset = load_dataset('json', data_files=file_path, split='train')
            print(f"✅ Raw dataset loaded | Samples: {len(dataset)}")
        except:
            # Try alternative loading methods
            try:
                with open(file_path, 'r') as f:
                    data = [json.loads(line) for line in f]
                dataset = Dataset.from_list(data)
                print(f"✅ Dataset loaded from JSON lines | Samples: {len(dataset)}")
            except:
                raise ValueError("Unsupported file format")
        
        # Standardize text column
        text_column = None
        for col in ['text', 'content', 'body', 'article', 'sentence']:
            if col in dataset.features:
                text_column = col
                break
                
        if text_column and text_column != 'text':
            print(f"⚠️ Renaming '{text_column}' to 'text'")
            dataset = dataset.rename_column(text_column, 'text')
        elif 'text' not in dataset.features:
            # Create text column by combining all string columns
            print("⚠️ No text column found, creating from string fields")
            
            def combine_columns(examples):
                text = ""
                for key, value in examples.items():
                    if isinstance(value, str):
                        text += value + " "
                return {'text': text.strip()}
                
            dataset = dataset.map(combine_columns)
        
        # Apply sampling if needed
        if len(dataset) > max_samples:
            print(f"⚠️ Large dataset ({len(dataset)} samples), sampling to {max_samples}")
            dataset = stratified_sample(dataset, n_samples=max_samples)
        
        # Quality analysis
        quality_report = analyze_dataset(dataset, tokenizer)
        
        print("\n📊 Dataset Quality Report:")
        print(f"- Samples: {quality_report['total_samples']} (processed: {quality_report['processed_samples']})")
        print(f"- Vocabulary coverage: {quality_report['vocab_coverage']:.1%}")
        print(f"- Text length: avg={quality_report['length_stats']['mean']:.1f} tokens")
        print(f"- Topics: {', '.join(f'{k}:{v}' for k,v in quality_report['topic_distribution'].items())[:100]}...")
        
        return dataset
        
    except Exception as e:
        print(f"❌ Dataset preparation failed: {e}")
        print("Creating minimal fallback dataset...")
        return Dataset.from_dict({"text": [
            "Blockchain is a decentralized ledger technology.",
            "Cryptocurrencies use public-key cryptography for security.",
            "Proof of Work requires miners to solve computational puzzles.",
            "Hardware wallets provide offline storage for private keys.",
            "Smart contracts enable automated transactions on blockchain networks."
        ]})
        
tokenizer = AutoTokenizer.from_pretrained("gpt2")
dataset = prepare_dataset("/kaggle/input/database-0530", tokenizer)


=== Preparing Dataset: /kaggle/input/database-0530 ===
❌ Dataset preparation failed: Unsupported file format
Creating minimal fallback dataset...


In [18]:
# Training Configuration

def get_gpu_usage() -> float:
    """Get current GPU memory usage percentage"""
    try:
        if torch.cuda.is_available():
            return torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated() * 100
        return 0.0
    except:
        return 0.0

def suggest_hyperparameters(model, dataset) -> Dict[str, Any]:
    """Auto-suggest training parameters based on model and data"""
    try:
        # Calculate base parameters
        suggested_batch_size = max(1, min(8, len(dataset) // 100))
        params = {
            'batch_size': suggested_batch_size,
            'learning_rate': 2e-5,
            'epochs': 1 if len(dataset) < 1000 else 3,
            'grad_accum': max(1, 32 // suggested_batch_size)
        }
        
        # Adjust for model size
        try:
            num_params = sum(p.numel() for p in model.parameters())
            if num_params > 1e9:  # Large model
                params['learning_rate'] = params['learning_rate'] / 2
                params['batch_size'] = max(1, params['batch_size'] // 2)
        except:
            print("⚠️ Could not calculate model parameters, using defaults")
        
        return params
    except Exception as e:
        print(f"❌ Hyperparameter suggestion failed: {e}")
        return {
            'batch_size': 1,
            'learning_rate': 2e-5,
            'epochs': 1,
            'grad_accum': 4
        }

def configure_training(model, dataset) -> Tuple[Any, TrainingArguments]:
    """Complete training configuration with LoRA and monitoring"""
    print("\n=== Configuring Training ===")
    
    try:
        # 1. Get hyperparameter suggestions
        hyperparams = suggest_hyperparameters(model, dataset)
        print(f"Suggested hyperparameters: batch_size={hyperparams['batch_size']}, "
              f"lr={hyperparams['learning_rate']}, epochs={hyperparams['epochs']}, "
              f"grad_accum={hyperparams['grad_accum']}")
        
        # 2. LoRA configuration
        # Try to detect target modules automatically
        target_modules = ["q_proj", "v_proj"]
        try:
            module_names = [name for name, _ in model.named_modules()]
            if "c_attn" in module_names:  # GPT-2 style
                target_modules = ["c_attn", "c_proj", "mlp"]
            elif "query_key_value" in module_names:  # LLaMA style
                target_modules = ["query_key_value"]
            print(f"Detected target modules: {target_modules}")
        except:
            print("⚠️ Using default target modules")
        
        peft_config = LoraConfig(
            r=8,  # Reduced for Kaggle memory constraints
            lora_alpha=16,
            target_modules=target_modules,
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )
        
        # 3. Training arguments
        training_args = TrainingArguments(
            output_dir="./results",
            per_device_train_batch_size=hyperparams['batch_size'],
            gradient_accumulation_steps=hyperparams['grad_accum'],
            num_train_epochs=hyperparams['epochs'],
            learning_rate=hyperparams['learning_rate'],
            fp16=torch.cuda.is_available(),
            logging_steps=10,
            save_strategy="steps",
            save_steps=200,  # Reduced for Kaggle
            report_to="none",
            optim="adamw_torch",
            max_grad_norm=0.3,
            warmup_ratio=0.1
        )
        
        # 4. Prepare model
        try:
            model = prepare_model_for_kbit_training(model)
        except:
            print("⚠️ k-bit training prep failed, proceeding without")
            
        try:
            model = get_peft_model(model, peft_config)
            model.print_trainable_parameters()
            print("✅ LoRA configured successfully")
        except Exception as e:
            print(f"❌ LoRA configuration failed: {e}")
            print("Proceeding without LoRA")
        
        # 5. Create training monitor
        monitor = TrainingMonitor()
        
        return model, training_args, monitor
        
    except Exception as e:
        print(f"❌ Training configuration failed: {e}")
        print("Creating minimal configuration...")
        
        # Fallback configuration
        training_args = TrainingArguments(
            output_dir="./results",
            per_device_train_batch_size=1,
            gradient_accumulation_steps=4,
            num_train_epochs=1,
            learning_rate=2e-5,
            report_to="none"
        )
        return model, training_args, TrainingMonitor()

class TrainingMonitor:
    """Real-time training monitoring with visualization"""
    def __init__(self):
        from collections import defaultdict
        self.metrics = defaultdict(list)
        self.start_time = time.time()
        self.epoch_start = time.time()
        
    def update(self, **kwargs):
        """Update metrics with new values"""
        for k, v in kwargs.items():
            self.metrics[k].append(v)
        
    def display_dashboard(self, epoch=None, step=None):
        """Display training dashboard"""
        try:
            clear_output(wait=True)
            fig, axes = plt.subplots(1, 3, figsize=(15, 4))
            
            # Plot 1: Training Loss
            if 'loss' in self.metrics and len(self.metrics['loss']) > 0:
                axes[0].plot(self.metrics['loss'])
                axes[0].set_title("Training Loss")
                axes[0].set_xlabel("Steps")
                axes[0].set_ylabel("Loss")
            
            # Plot 2: Gradient Norms
            if 'grad_norm' in self.metrics and len(self.metrics['grad_norm']) > 0:
                axes[1].plot(self.metrics['grad_norm'])
                axes[1].set_title("Gradient Norm")
                axes[1].set_xlabel("Steps")
                axes[1].set_ylabel("Norm")
            
            # Plot 3: Hardware Usage
            hardware_metrics = [
                psutil.cpu_percent(),
                get_gpu_usage(),
                psutil.virtual_memory().percent
            ]
            axes[2].bar(['CPU', 'GPU', 'RAM'], hardware_metrics, color=['blue', 'green', 'purple'])
            axes[2].set_title("Hardware Usage (%)")
            axes[2].set_ylim(0, 100)
            
            # Add title with epoch/step info
            title = "Training Monitor"
            if epoch is not None:
                title += f" | Epoch {epoch+1}"
            if step is not None:
                title += f" | Step {step}"
            plt.suptitle(title)
            
            plt.tight_layout()
            plt.show()
            
            # Print textual summary
            if 'loss' in self.metrics and len(self.metrics['loss']) > 0:
                print(f"Current loss: {self.metrics['loss'][-1]:.4f}")
            print(f"CPU: {hardware_metrics[0]:.1f}% | GPU: {hardware_metrics[1]:.1f}% | RAM: {hardware_metrics[2]:.1f}%")
            
        except Exception as e:
            print(f"⚠️ Dashboard error: {e}")
        
    def should_stop_early(self, patience=3, min_steps=10) -> bool:
        """Early stopping check with safeguards"""
        try:
            losses = self.metrics.get('loss', [])
            if len(losses) < max(patience * 2, min_steps):
                return False
                
            # Calculate moving averages
            recent_loss = np.mean(losses[-patience:])
            previous_loss = np.mean(losses[-patience*2:-patience])
            
            # Stop if loss hasn't improved
            return recent_loss > previous_loss * 0.99  # Allow 1% tolerance
            
        except Exception as e:
            print(f"⚠️ Early stopping check failed: {e}")
            return False

In [24]:
# Training Execution
# =====================
def train_model(model, tokenized_dataset, training_args):
    """Execute the training process"""
    # Disable cache if gradient checkpointing is enabled
    if training_args.gradient_checkpointing:
        model.config.use_cache = False
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=lambda data: {'input_ids': torch.stack([f['input_ids'] for f in data]),
                             'attention_mask': torch.stack([f['attention_mask'] for f in data]),
                             'labels': torch.stack([f['input_ids'] for f in data])}
    )
    
    print("Starting training...")
    print_memory()
    trainer.train()
    print("Training completed!")
    return trainer

def generate_contrastive_examples(example):
    """Generate contrastive examples for training"""
    # Generate negative sample by:
    # 1. Random Q/A from different category
    # 2. GPT-generated incorrect answer
    # 3. Perturbed correct answer
    return {
        'anchor': example['answer'],
        'positive': augment_answer(example['answer']),
        'negative': get_negative_sample(example)
    }

In [25]:
# Model Saving

import os
import shutil
import json
from typing import Optional, Dict, Any
from transformers import TrainingArguments

# Model Saving

def save_model_artifacts(
    model, 
    tokenizer, 
    training_args: Optional[TrainingArguments] = None, 
    output_dir: str = "/kaggle/working/model-output"
) -> Dict[str, Any]:
    """
    Save all model artifacts with comprehensive verification.
    Handles both full models and adapters (LoRA) with error recovery.
    Returns verification report.
    """
    # Initialize verification report
    verification_report = {
        "status": "started",
        "output_dir": output_dir,
        "saved_files": [],
        "missing_files": [],
        "errors": []
    }
    
    try:
        # 1. Create output directory with cleanup if needed
        print(f"\n💾 Saving model artifacts to: {output_dir}")
        if os.path.exists(output_dir):
            print("⚠️ Output directory exists, removing old files...")
            shutil.rmtree(output_dir)
        os.makedirs(output_dir, exist_ok=True)
        
        # 2. Save model with multiple attempts
        saved_model = False
        for attempt in range(3):
            try:
                print(f"💽 Saving model (attempt {attempt+1})...")
                model.save_pretrained(
                    output_dir,
                    safe_serialization=True,
                    max_shard_size="200MB"  # Shard large models
                )
                saved_model = True
                break
            except Exception as e:
                print(f"⚠️ Model save failed: {str(e)}")
                if attempt == 2:
                    verification_report["errors"].append(f"Model save failed: {str(e)}")
        
        # 3. Save tokenizer
        try:
            print("🔤 Saving tokenizer...")
            tokenizer.save_pretrained(output_dir)
        except Exception as e:
            print(f"⚠️ Tokenizer save failed: {str(e)}")
            verification_report["errors"].append(f"Tokenizer save failed: {str(e)}")
        
        # 4. Save training arguments if provided
        if training_args is not None:
            print("📝 Saving training arguments...")
            try:
                args_path = os.path.join(output_dir, "training_args.json")
                if hasattr(training_args, 'to_dict'):
                    with open(args_path, "w") as f:
                        json.dump(training_args.to_dict(), f, indent=2)
                elif hasattr(training_args, 'to_json_string'):
                    with open(args_path, "w") as f:
                        f.write(training_args.to_json_string())
                else:
                    print("⚠️ TrainingArguments has no serialization method")
                    verification_report["errors"].append("TrainingArguments has no serialization method")
            except Exception as e:
                print(f"⚠️ Failed to save training args: {str(e)}")
                verification_report["errors"].append(f"Training args save failed: {str(e)}")
        
        # 5. Verify critical files
        required_files = [
            'config.json', 'pytorch_model.bin', 
            'model.safetensors', 'tokenizer.json',
            'special_tokens_map.json', 'tokenizer_config.json'
        ]
        
        # For adapter models
        adapter_files = ['adapter_config.json', 'adapter_model.safetensors']
        
        print("\n🔍 Verifying saved files:")
        for file in os.listdir(output_dir):
            file_path = os.path.join(output_dir, file)
            size = os.path.getsize(file_path) / 1024  # Size in KB
            print(f"- {file} ({size:.2f} KB)")
            verification_report["saved_files"].append({"name": file, "size_kb": size})
            
            # Remove verified files from required list
            if file in required_files:
                required_files.remove(file)
            if file in adapter_files:
                adapter_files.remove(file)
        
        # 6. Handle missing files
        if required_files:
            print(f"⚠️ Missing core files: {required_files}")
            verification_report["missing_files"].extend(required_files)
            
            # Attempt recovery for adapter files
            if adapter_files:
                print("Trying to save adapter separately...")
                try:
                    model.save_pretrained(
                        output_dir,
                        safe_serialization=True,
                        adapter_only=True
                    )
                    # Re-check adapter files
                    for file in adapter_files:
                        if os.path.exists(os.path.join(output_dir, file)):
                            adapter_files.remove(file)
                    if adapter_files:
                        print(f"⚠️ Still missing adapter files: {adapter_files}")
                        verification_report["missing_files"].extend(adapter_files)
                except Exception as e:
                    print(f"⚠️ Adapter save failed: {str(e)}")
                    verification_report["errors"].append(f"Adapter save failed: {str(e)}")
        
        # 7. Final verification
        if not verification_report["missing_files"] and not verification_report["errors"]:
            print("✅ All files saved successfully!")
            verification_report["status"] = "success"
        else:
            print("⚠️ Verification completed with issues")
            verification_report["status"] = "partial_success"
        
        return verification_report
        
    except Exception as e:
        print(f"❌ Critical error during saving: {str(e)}")
        verification_report["status"] = "failed"
        verification_report["errors"].append(f"Critical error: {str(e)}")
        return verification_report

# Example usage:
# report = save_model_artifacts(model, tokenizer, training_args)
# print("Verification Report:", json.dumps(report, indent=2))

In [26]:
# Model Loading and Testing
# =====================
def load_and_test_model(
    model_path: str = "/kaggle/working/gpt2-lora-trained", 
    max_length: int = 250,
    test_prompts: Optional[list] = None,
    is_peft_model: bool = True
):
    """
    Load and test a saved model with comprehensive error handling
    """
    print(f"\n🔍 Preparing to load model from: {model_path}")
    
    # Verify model directory exists
    if not os.path.exists(model_path):
        raise ValueError(f"Model directory {model_path} does not exist")
    
    # Show directory contents for debugging
    print("\n📂 Model directory contents:")
    for f in sorted(os.listdir(model_path)):
        size = os.path.getsize(os.path.join(model_path, f)) / 1024
        print(f"- {f} ({size:.2f} KB)")
    
    try:
        print("\n🔄 Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(
            model_path,
            local_files_only=True
        )
        
        print("\n🔄 Loading model...")
        if is_peft_model:
            # First check if we have adapter files
            adapter_files = [
                f for f in os.listdir(model_path) 
                if f.startswith('adapter_') or f == 'adapter_config.json'
            ]
            
            if not adapter_files:
                print("⚠️ No adapter files found. Loading as regular model.")
                model = AutoModelForCausalLM.from_pretrained(
                    model_path,
                    device_map="auto",
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    local_files_only=True
                )
            else:
                print(f"Found adapter files: {adapter_files}")
                # Load base model first
                base_model = AutoModelForCausalLM.from_pretrained(
                    model_path,
                    device_map="auto",
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    local_files_only=True
                )
                
                # Then load the PEFT adapter
                model = PeftModel.from_pretrained(
                    base_model,
                    model_path,
                    local_files_only=True
                )
                
                # Merge and unload for inference
                model = model.merge_and_unload()
        else:
            # For regular models
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                device_map="auto",
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                local_files_only=True
            )
            
        print("\n🎉 Model loaded successfully!")
        
        # Default test prompts if none provided
        if test_prompts is None:
            test_prompts = [
                "What is hardware wallet?? ",
                "What is Proof of Work (PoW)?? ",
                "What is cryptography?? ",
                "What is Peer-to-Peer (P2P)?? ",
                "What is block chain?? ",
                "What is private key?? "
            ]
        
        # Create pipeline - REMOVED device parameter since we're using device_map="auto"
        print("\n🚀 Creating text generation pipeline...")
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        
        # Run tests
        print("\n🧪 Running generation tests...")
        for i, prompt in enumerate(test_prompts, 1):
            print(f"\n🔹 Test {i}: {prompt}")
            output = pipe(
                prompt,
                max_length=max_length,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                num_return_sequences=1,
                repetition_penalty=1.2
            )
            print("💬 Response:", output[0]['generated_text'])
            
        return model, tokenizer
        
    except Exception as e:
        print(f"\n❌ Critical error loading model: {str(e)}")
        print("\n🛠️ Debugging info:")
        print(f"- Path: {os.path.abspath(model_path)}")
        print(f"- Directory exists: {os.path.exists(model_path)}")
        if os.path.exists(model_path):
            print("- Contents:", os.listdir(model_path))
        raise

In [28]:
class EnhancedModelWrapper:
    """Advanced wrapper for constrained generation with technical enforcement"""
    
    def __init__(self, model, tokenizer, knowledge_base: Optional[Dict] = None):
        self.model = model
        self.tokenizer = tokenizer
        self.required_terms = []
        self.complete_sentences = True
        self.technical_terms = knowledge_base or {
            'blockchain': ['decentralized', 'immutable', 'consensus', 'ledger'],
            'wallet': ['private key', 'public key', 'address', 'security'],
            'PoW': ['mining', 'difficulty', 'hash', 'computational'],
            'cryptography': ['encryption', 'signature', 'asymmetric', 'algorithm'],
            'P2P': ['network', 'nodes', 'direct', 'decentralized']
        }
        self.banned_phrases = [
            "I don't know", "as an AI", "I'm not sure",
            "I can't answer", "my training data"
        ]

    def set_constraints(self, 
                      required_terms: List[str] = None,
                      complete_sentences: bool = True,
                      technical_focus: str = None):
        """Configure generation constraints"""
        self.required_terms = required_terms or []
        self.complete_sentences = complete_sentences
        
        if technical_focus:
            self.required_terms.extend(self.technical_terms.get(technical_focus, []))

    def generate(self, 
                prompt: str,
                max_length: int = 200,
                temperature: float = 0.7,
                **kwargs) -> Dict:
        """Generate response with multiple validation layers"""
        
        # Create generation config
        gen_config = GenerationConfig(
            max_length=max_length,
            temperature=temperature,
            do_sample=True,
            top_p=0.9,
            repetition_penalty=1.2,
            pad_token_id=self.tokenizer.eos_token_id,
            **kwargs
        )
        
        # Generate raw output
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        outputs = self.model.generate(**inputs, generation_config=gen_config)
        raw_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Apply processing pipeline
        processed_text = self._process_output(prompt, raw_text)
        
        # Validate and score
        validation = self._validate_response(prompt, processed_text)
        
        return {
            'raw': raw_text,
            'processed': processed_text,
            'validation': validation,
            'prompt_analysis': self._analyze_prompt(prompt)
        }

    def _process_output(self, prompt: str, text: str) -> str:
        """Apply all text processing constraints"""
        # Remove prompt from output
        if text.startswith(prompt):
            text = text[len(prompt):].strip()
        
        # Apply term enforcement
        if self.required_terms:
            text = self._enforce_terms(text)
        
        # Complete sentences
        if self.complete_sentences:
            text = self._complete_sentences(text)
            
        # Remove banned phrases
        for phrase in self.banned_phrases:
            text = text.replace(phrase, "")
            
        return text.strip()

    def _enforce_terms(self, text: str) -> str:
        """Ensure required technical terms are present"""
        missing = [t for t in self.required_terms 
                  if not re.search(rf'\b{re.escape(t)}\b', text, re.IGNORECASE)]
        
        if missing:
            # Try to naturally incorporate missing terms
            additions = []
            for term in missing:
                if term in self.technical_terms:
                    addition = f" {term} is important because {self._explain_term(term)}."
                    additions.append(addition)
            
            text += ''.join(additions) if additions else f"\n\n[Missing terms: {', '.join(missing)}]"
        
        return text

    def _complete_sentences(self, text: str) -> str:
        """Ensure output ends with complete sentence"""
        # Find last sentence boundary
        last_boundary = max(
            text.rfind('.'), 
            text.rfind('!'), 
            text.rfind('?'),
            text.rfind('\n')
        )
        
        if last_boundary > 0 and len(text) - last_boundary < 50:
            text = text[:last_boundary+1]
            
        # If no proper ending, add one
        if text and text[-1] not in {'.', '!', '?'}:
            text += '.' if not text.endswith(',') else '..'
            
        return text

    def _validate_response(self, prompt: str, response: str) -> Dict:
        """Comprehensive quality validation"""
        # Detect topic from prompt
        topic = next((t for t in self.technical_terms 
                     if re.search(rf'\b{t}\b', prompt, re.IGNORECASE)), None)
        
        # Check technical terms
        missing_terms = []
        if topic:
            missing_terms = [t for t in self.technical_terms[topic]
                          if not re.search(rf'\b{re.escape(t)}\b', response, re.IGNORECASE)]
        
        # Check for hallucinations
        hallucinations = any(
            phrase.lower() in response.lower() 
            for phrase in self.banned_phrases
        )
        
        # Calculate scores
        tech_score = 1 - (len(missing_terms) / len(self.technical_terms.get(topic, [''])))
        clarity_score = min(1, len(response.split()) / 50)  # Normalize to 0-1
        
        return {
            'technical_score': tech_score,
            'clarity_score': clarity_score,
            'missing_terms': missing_terms,
            'has_hallucinations': hallucinations,
            'is_complete': response[-1] in {'.', '!', '?'}
        }

    def _analyze_prompt(self, prompt: str) -> Dict:
        """Evaluate prompt quality"""
        return {
            'length': len(prompt.split()),
            'has_question': '?' in prompt,
            'technical_focus': any(
                term in prompt.lower() 
                for term in self.technical_terms
            ),
            'specificity': len(set(prompt.split())) / len(prompt.split())  # Unique words ratio
        }

    def _explain_term(self, term: str) -> str:
        """Generate simple explanations for technical terms"""
        explanations = {
            'blockchain': "it enables secure decentralized record-keeping",
            'private key': "it provides secure access to cryptocurrency funds",
            'mining': "it secures the network through computational work",
            'encryption': "it protects data through mathematical algorithms"
        }
        return explanations.get(term, f"it's a fundamental concept in cryptocurrency")

In [29]:
# Enhanced Generation
# =====================
CRYPTO_GENERATION_CONFIG = GenerationConfig(
    max_new_tokens=150,
    no_repeat_ngram_size=4,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=40,
    repetition_penalty=1.15,
    num_beams=3,
    early_stopping=True
)

def generate_with_validation(model, tokenizer, prompt, max_length=200):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # First pass generation
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        generation_config=CRYPTO_GENERATION_CONFIG
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Validation checks
    validation_passed = True
    validation_notes = []
    
    # 1. Technical term check
    last_term = model.get_last_term(prompt)
    if last_term in model.technical_terms:
        missing = [t for t in model.technical_terms[last_term] 
                  if t.lower() not in response.lower()]
        if missing:
            validation_passed = False
            validation_notes.append(f"Missing technical terms: {missing}")
    
    # 2. Hallucination check
    if any(phrase in response for phrase in model.banned_sequences):
        validation_passed = False
        validation_notes.append("Potential hallucination")
    
    # Generate final output
    if not validation_passed:
        print(f"⚠️ Validation issues: {validation_notes}")
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            generation_config=CRYPTO_GENERATION_CONFIG,
            bad_words_ids=[[tid] for tid in tokenizer.encode(" ".join(model.banned_sequences), add_special_tokens=False)]
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return {
        'response': response,
        'validation_passed': validation_passed,
        'validation_notes': validation_notes
    }

NameError: name 'GenerationConfig' is not defined

In [30]:
import os
import torch
from transformers import (
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    GenerationConfig
)
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, Dataset
import psutil
import numpy as np

def print_memory():
    """Print current memory usage"""
    ram = psutil.virtual_memory()
    gpu_mem = torch.cuda.memory_allocated() / 1e9 if torch.cuda.is_available() else 0
    print(f"RAM: {ram.percent}% | GPU: {gpu_mem:.2f}GB")

def main(model, tokenizer):
    # Configuration
    DATASET_PATH = "/kaggle/input/database-0530"
    OUTPUT_DIR = "/kaggle/working/output"
    
    try:
        # 1. Data Preparation with Quality Metrics
        print("\n=== Preparing Dataset ===")
        print_memory()
        
        try:
            # Try to load dataset
            dataset = load_dataset('json', data_files=DATASET_PATH, split='train')
            print(f"✅ Loaded dataset with {len(dataset)} samples")
            
            # Basic quality analysis
            text_lengths = [len(text.split()) for text in dataset['text']]
            print(f"Text length stats: Avg={np.mean(text_lengths):.1f}, "
                  f"Min={min(text_lengths)}, Max={max(text_lengths)}")
            
            # Small dataset fallback
            if len(dataset) < 10:
                print("⚠️ Small dataset detected, using fallback samples")
                dataset = dataset.add_item({"text": "Blockchain technology enables secure decentralized transactions."})
                dataset = dataset.add_item({"text": "Cryptocurrency wallets store private keys for digital asset management."})
                
        except Exception as e:
            print(f"❌ Dataset loading failed: {e}")
            print("Creating minimal fallback dataset...")
            dataset = Dataset.from_dict({
                "text": [
                    "Blockchain is a decentralized ledger technology.",
                    "Cryptocurrencies use cryptographic keys for security.",
                    "Proof of Work requires computational resources.",
                    "Hardware wallets provide offline storage for private keys.",
                    "Public-key cryptography enables secure transactions."
                ]
            })
            print(f"✅ Created fallback dataset with {len(dataset)} samples")

        # Tokenization with error handling
        print("\nTokenizing dataset...")
        try:
            def tokenize_function(examples):
                return tokenizer(
                    examples["text"],
                    truncation=True,
                    max_length=128,
                    padding="max_length",
                    return_tensors="pt"
                )
                
            tokenized_dataset = dataset.map(tokenize_function, batched=True)
            tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
            print("✅ Tokenization completed successfully")
            print_memory()
            
        except Exception as e:
            print(f"❌ Tokenization failed: {e}")
            print("Creating minimal tokenized dataset...")
            tokenized_dataset = Dataset.from_dict({
                "input_ids": [torch.tensor([0, 1, 2, 3])],
                "attention_mask": [torch.tensor([1, 1, 1, 1])]
            })
            tokenized_dataset.set_format(type='torch')

        # 2. Training Configuration
        print("\n=== Configuring Training ===")
        print_memory()
        
        # LoRA configuration for GPT-2
        peft_config = LoraConfig(
            r=8,  # Reduced for Kaggle memory constraints
            lora_alpha=16,
            target_modules=["c_attn", "c_proj", "c_fc"],  # GPT-2 specific modules
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )
        
        # Training arguments optimized for Kaggle
        training_args = TrainingArguments(
            output_dir=OUTPUT_DIR,
            per_device_train_batch_size=1,  # Reduced for memory
            gradient_accumulation_steps=8,  # Compensate for small batch size
            num_train_epochs=1,
            learning_rate=2e-5,
            fp16=torch.cuda.is_available(),
            logging_steps=10,
            save_strategy="no",  # Disable saving to save memory
            report_to="none",
            optim="adamw_torch"
        )
        
        # Apply LoRA
        try:
            model = prepare_model_for_kbit_training(model)
            model = get_peft_model(model, peft_config)
            model.print_trainable_parameters()
            print("✅ LoRA configured successfully")
        except Exception as e:
            print(f"❌ LoRA configuration failed: {e}")
            print("Proceeding without LoRA...")

        # 3. Training Execution
        print("\n=== Starting Training ===")
        print_memory()
        
        try:
            # Special data collator for causal LM
            data_collator = DataCollatorForLanguageModeling(
                tokenizer=tokenizer,
                mlm=False  # Causal language modeling
            )
            
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=tokenized_dataset,
                data_collator=data_collator,
            )
            
            # Start training
            trainer.train()
            print("✅ Training completed successfully")
            print_memory()
            
        except Exception as e:
            print(f"❌ Training failed: {e}")
            print("Skipping training, proceeding to saving...")

        # 4. Saving Model
        print("\n=== Saving Model ===")
        try:
            # Create output directory
            os.makedirs(OUTPUT_DIR, exist_ok=True)
            
            # Save model
            model.save_pretrained(OUTPUT_DIR)
            tokenizer.save_pretrained(OUTPUT_DIR)
            print(f"✅ Model saved to {OUTPUT_DIR}")
            
            # Verify files
            files = os.listdir(OUTPUT_DIR)
            print(f"Saved files: {', '.join(files)}")
            print_memory()
            
        except Exception as e:
            print(f"❌ Model saving failed: {e}")

        # 5. Testing
        print("\n=== Testing Model ===")
        try:
            # Move model to CPU if needed
            if torch.cuda.is_available():
                model.to('cuda')
            else:
                model.to('cpu')
                
            test_prompts = [
                "Explain blockchain technology:",
                "What is the difference between hardware and software wallets?",
                "Describe Proof of Work:",
                "How does cryptography secure transactions?"
            ]
            
            generation_config = GenerationConfig(
                max_new_tokens=100,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.2
            )
            
            for i, prompt in enumerate(test_prompts):
                print(f"\n🔹 Test {i+1}: {prompt}")
                inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
                
                try:
                    outputs = model.generate(**inputs, generation_config=generation_config)
                    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                    print(f"💬 Response: {response[len(prompt):].strip()}")
                except Exception as e:
                    print(f"❌ Generation failed: {e}")
                    print("Skipping this test...")
            
            print("\n=== Training Complete ===")
            
        except Exception as e:
            print(f"❌ Testing failed: {e}")
            
    except Exception as e:
        print(f"\n❌ Critical error: {str(e)}")
        raise

if __name__ == "__main__":
    # Load model and tokenizer first (from previous step)
    # This would be called from your environment setup cell
    # main(model, tokenizer)
    print("Call main(model, tokenizer) after loading your model")

2025-06-02 22:03:09.321610: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748901789.640890     485 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748901789.741017     485 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Call main(model, tokenizer) after loading your model


In [31]:
main(model, tokenizer)


=== Preparing Dataset ===
RAM: 5.9% | GPU: 0.00GB
❌ Dataset loading failed: Unable to find '/kaggle/input/database-0530'
Creating minimal fallback dataset...
✅ Created fallback dataset with 5 samples

Tokenizing dataset...


Map:   0%|          | 0/5 [00:00<?, ? examples/s]

❌ Tokenization failed: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.
Creating minimal tokenized dataset...

=== Configuring Training ===
RAM: 5.9% | GPU: 0.00GB
trainable params: 1,179,648 || all params: 125,619,456 || trainable%: 0.939064725769868
✅ LoRA configured successfully

=== Starting Training ===
RAM: 5.9% | GPU: 0.00GB




❌ Training failed: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.
Skipping training, proceeding to saving...

=== Saving Model ===




✅ Model saved to /kaggle/working/output
Saved files: tokenizer_config.json, adapter_config.json, tokenizer.json, README.md, special_tokens_map.json, merges.txt, vocab.json, adapter_model.safetensors
RAM: 6.0% | GPU: 0.00GB

=== Testing Model ===

🔹 Test 1: Explain blockchain technology:
💬 Response: Bitcoin, Ether and Litecoin


 (I'm using this to show the new possibilities in Ethereum. I will be writing an article here on how it should look like a cryptocurrency as well) In my opinion about why there are so many cryptocurrencies that have become more popular than any other? The problem is when you start looking for things people want or need they can just don't know what's going on with their wallet - if not something specific reasons such as currency being important because of its purpose).

🔹 Test 2: What is the difference between hardware and software wallets?
💬 Response: If you have a very good set of cards, there are two different types: an ATX-compatible version (a.k.) with just

In [32]:
notebook_end = time.time()
print(f"Total notebook execution time: {notebook_end - notebook_start:.2f} seconds")

Total notebook execution time: 954.98 seconds
