# Textbooks Are All You Need - Implementation with Alternative Datasets

This notebook follows the exact approach from the paper but uses different datasets.

In [1]:
# Install required packages
!pip install datasets transformers torch beautifulsoup4 requests tqdm

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

## Model Implementation

In [2]:
import torch
import torch.nn as nn
from torch.nn import functional as F

class Head(nn.Module):
    """One head of self-attention"""
    def __init__(self, head_size: int, n_embd: int, block_size: int, dropout: float):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, C = x.shape
        k = self.key(x)
        q = self.query(x)
        # Compute attention scores
        wei = q @ k.transpose(-2, -1) * C**-0.5
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)
        # Weighted aggregation of values
        v = self.value(x)
        out = wei @ v
        return out

class MultiHeadAttention(nn.Module):
    """Multiple heads of self-attention in parallel"""
    def __init__(self, config):
        super().__init__()
        head_size = config['n_embd'] // config['n_head']
        self.heads = nn.ModuleList([
            Head(head_size, config['n_embd'], config['block_size'], config['dropout'])
            for _ in range(config['n_head'])
        ])
        self.proj = nn.Linear(config['n_embd'], config['n_embd'])
        self.dropout = nn.Dropout(config['dropout'])

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        return out

class FeedForward(nn.Module):
    """Simple linear layer followed by non-linearity"""
    def __init__(self, config):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(config['n_embd'], 4 * config['n_embd']),
            nn.ReLU(),
            nn.Linear(4 * config['n_embd'], config['n_embd']),
            nn.Dropout(config['dropout']),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    """Transformer block: communication followed by computation"""
    def __init__(self, config):
        super().__init__()
        self.sa = MultiHeadAttention(config)
        self.ffwd = FeedForward(config)
        self.ln1 = nn.LayerNorm(config['n_embd'])
        self.ln2 = nn.LayerNorm(config['n_embd'])

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class GPT(nn.Module):
    """The main GPT language model"""
    def __init__(self, config, vocab_size):
        super().__init__()
        self.config = config

        self.token_embedding_table = nn.Embedding(vocab_size, config['n_embd'])
        self.position_embedding_table = nn.Embedding(config['block_size'], config['n_embd'])
        self.blocks = nn.Sequential(*[Block(config) for _ in range(config['n_layer'])])
        self.ln_f = nn.LayerNorm(config['n_embd'])
        self.lm_head = nn.Linear(config['n_embd'], vocab_size)

    def forward(self, idx, targets=None):
        B, T = idx.shape

        # Get token and position embeddings
        tok_emb = self.token_embedding_table(idx)
        pos_emb = self.position_embedding_table(torch.arange(T, device=idx.device))
        x = tok_emb + pos_emb

        # Apply transformer blocks and final layer norm
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.lm_head(x)

        # Compute loss if targets provided
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        """Generate new tokens from the model"""
        for _ in range(max_new_tokens):
            # Crop context to block_size
            idx_cond = idx[:, -self.config['block_size']:]
            # Get predictions
            logits, _ = self(idx_cond)
            # Focus on last time step
            logits = logits[:, -1, :]
            # Apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1)
            # Sample from distribution
            idx_next = torch.multinomial(probs, num_samples=1)
            # Append sampled index to running sequence
            idx = torch.cat((idx, idx_next), dim=1)
        return idx

## 1. Main Code Dataset - CodeParrot (Optimized)

In [3]:
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm.notebook import tqdm
import numpy as np

print("Loading CodeParrot dataset...")
# Load more examples to ensure enough data
code_dataset = load_dataset("codeparrot/codeparrot-clean", split="train[:50000]")
print(f"Loaded {len(code_dataset)} examples")

# Load quality classifier
print("Loading CodeBERT classifier...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
classifier = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base").to(device)
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")

def filter_quality_code_batch(codes: list, batch_size: int = 32) -> list:
    """Filter code based on quality using CodeBERT classifier with batch processing"""
    results = []

    # Process in batches
    for i in tqdm(range(0, len(codes), batch_size), desc="Filtering code"):
        batch = codes[i:i + batch_size]
        inputs = tokenizer(batch, return_tensors="pt", truncation=True, max_length=512, padding=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = classifier(**inputs)
            predictions = outputs.logits[:, 1] > 0.8
            results.extend(predictions.cpu().tolist())

    return results

# Filter dataset with batch processing
codes = code_dataset['content']
quality_mask = filter_quality_code_batch(codes)
filtered_code = [code for code, is_quality in zip(codes, quality_mask) if is_quality]
print(f"Filtered dataset size: {len(filtered_code)} examples")

# Save some memory
del code_dataset
del classifier
torch.cuda.empty_cache()

Loading CodeParrot dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/1.30k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/54 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/54 [00:00<?, ?files/s]

file-000000000001.json.gz:   0%|          | 0.00/246M [00:00<?, ?B/s]

file-000000000002.json.gz:   0%|          | 0.00/248M [00:00<?, ?B/s]

file-000000000003.json.gz:   0%|          | 0.00/247M [00:00<?, ?B/s]

file-000000000004.json.gz:   0%|          | 0.00/247M [00:00<?, ?B/s]

file-000000000005.json.gz:   0%|          | 0.00/247M [00:00<?, ?B/s]

file-000000000006.json.gz:   0%|          | 0.00/246M [00:00<?, ?B/s]

file-000000000007.json.gz:   0%|          | 0.00/246M [00:00<?, ?B/s]

file-000000000008.json.gz:   0%|          | 0.00/248M [00:00<?, ?B/s]

file-000000000009.json.gz:   0%|          | 0.00/245M [00:00<?, ?B/s]

file-000000000010.json.gz:   0%|          | 0.00/245M [00:00<?, ?B/s]

file-000000000011.json.gz:   0%|          | 0.00/244M [00:00<?, ?B/s]

file-000000000012.json.gz:   0%|          | 0.00/243M [00:00<?, ?B/s]

file-000000000013.json.gz:   0%|          | 0.00/245M [00:00<?, ?B/s]

file-000000000014.json.gz:   0%|          | 0.00/242M [00:00<?, ?B/s]

file-000000000015.json.gz:   0%|          | 0.00/243M [00:00<?, ?B/s]

file-000000000016.json.gz:   0%|          | 0.00/240M [00:00<?, ?B/s]

file-000000000017.json.gz:   0%|          | 0.00/242M [00:00<?, ?B/s]

file-000000000018.json.gz:   0%|          | 0.00/242M [00:00<?, ?B/s]

file-000000000019.json.gz:   0%|          | 0.00/241M [00:00<?, ?B/s]

file-000000000020.json.gz:   0%|          | 0.00/242M [00:00<?, ?B/s]

file-000000000021.json.gz:   0%|          | 0.00/236M [00:00<?, ?B/s]

file-000000000022.json.gz:   0%|          | 0.00/238M [00:00<?, ?B/s]

file-000000000023.json.gz:   0%|          | 0.00/240M [00:00<?, ?B/s]

file-000000000024.json.gz:   0%|          | 0.00/237M [00:00<?, ?B/s]

file-000000000025.json.gz:   0%|          | 0.00/238M [00:00<?, ?B/s]

file-000000000026.json.gz:   0%|          | 0.00/237M [00:00<?, ?B/s]

file-000000000027.json.gz:   0%|          | 0.00/238M [00:00<?, ?B/s]

file-000000000028.json.gz:   0%|          | 0.00/239M [00:00<?, ?B/s]

file-000000000029.json.gz:   0%|          | 0.00/238M [00:00<?, ?B/s]

file-000000000030.json.gz:   0%|          | 0.00/239M [00:00<?, ?B/s]

file-000000000031.json.gz:   0%|          | 0.00/237M [00:00<?, ?B/s]

file-000000000032.json.gz:   0%|          | 0.00/239M [00:00<?, ?B/s]

file-000000000033.json.gz:   0%|          | 0.00/236M [00:00<?, ?B/s]

file-000000000034.json.gz:   0%|          | 0.00/237M [00:00<?, ?B/s]

file-000000000035.json.gz:   0%|          | 0.00/235M [00:00<?, ?B/s]

file-000000000036.json.gz:   0%|          | 0.00/236M [00:00<?, ?B/s]

file-000000000037.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000038.json.gz:   0%|          | 0.00/235M [00:00<?, ?B/s]

file-000000000039.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000040.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000041.json.gz:   0%|          | 0.00/235M [00:00<?, ?B/s]

file-000000000042.json.gz:   0%|          | 0.00/236M [00:00<?, ?B/s]

file-000000000043.json.gz:   0%|          | 0.00/236M [00:00<?, ?B/s]

file-000000000044.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000045.json.gz:   0%|          | 0.00/237M [00:00<?, ?B/s]

file-000000000046.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000047.json.gz:   0%|          | 0.00/232M [00:00<?, ?B/s]

file-000000000048.json.gz:   0%|          | 0.00/232M [00:00<?, ?B/s]

file-000000000049.json.gz:   0%|          | 0.00/233M [00:00<?, ?B/s]

file-000000000050.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000051.json.gz:   0%|          | 0.00/233M [00:00<?, ?B/s]

file-000000000052.json.gz:   0%|          | 0.00/234M [00:00<?, ?B/s]

file-000000000053.json.gz:   0%|          | 0.00/230M [00:00<?, ?B/s]

file-000000000054.json.gz:   0%|          | 0.00/142M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Loaded 50000 examples
Loading CodeBERT classifier...


config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Filtering code:   0%|          | 0/1563 [00:00<?, ?it/s]

Filtered dataset size: 0 examples


## 2. Textbook Dataset - Python Documentation

In [4]:
import requests
from bs4 import BeautifulSoup

def get_python_docs():
    """Get Python documentation content"""
    base_url = "https://docs.python.org/3/"
    sections = [
        "tutorial/",
        "reference/",
        "library/"
    ]

    content = []
    print("Downloading Python documentation...")
    for section in tqdm(sections):
        url = base_url + section
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Get main content
        main_content = soup.find('div', {'class': 'document'})
        if main_content:
            # Remove navigation elements
            for nav in main_content.find_all(['div', 'nav'], {'class': ['navigation', 'sidebar']}):
                nav.decompose()
            content.append(main_content.get_text())

    return '\n'.join(content)

textbook_content = get_python_docs()
print(f"Textbook content size: {len(textbook_content)} characters")

Downloading Python documentation...


  0%|          | 0/3 [00:00<?, ?it/s]

Textbook content size: 22846 characters


## 3. Exercises Dataset - LeetCode Problems

In [5]:
def get_leetcode_problems():
    """Get LeetCode problems and solutions"""
    # Using LeetCode API to get problems
    api_url = "https://leetcode.com/api/problems/all/"
    response = requests.get(api_url)
    problems = response.json()

    exercises = []
    print("Processing LeetCode problems...")
    for problem in tqdm(problems['stat_status_pairs'][:500]):  # Increased to 500 problems
        if not problem['paid_only']:
            title_slug = problem['stat']['question__title_slug']
            problem_url = f"https://leetcode.com/problems/{title_slug}/description/"

            try:
                response = requests.get(problem_url)
                soup = BeautifulSoup(response.text, 'html.parser')
                content = soup.find('div', {'class': 'content__u3I1 question-content__JfgR'})
                if content:
                    exercises.append({
                        'title': problem['stat']['question__title'],
                        'content': content.get_text(),
                        'difficulty': problem['difficulty']['level']
                    })
            except Exception as e:
                print(f"Error processing problem {title_slug}: {e}")
                continue

    return exercises

exercises = get_leetcode_problems()
print(f"Exercise dataset size: {len(exercises)} problems")

Processing LeetCode problems...


  0%|          | 0/500 [00:00<?, ?it/s]

Exercise dataset size: 0 problems


## Combine Datasets

In [6]:
def prepare_combined_dataset():
    """Combine and prepare all datasets for training"""
    # Combine all data sources
    all_text = []

    # Add filtered code
    all_text.extend(filtered_code)

    # Add textbook content
    all_text.append(textbook_content)

    # Add exercises
    for exercise in exercises:
        all_text.append(f"Problem: {exercise['title']}\n{exercise['content']}")

    # Create vocabulary
    text = '\n'.join(all_text)
    chars = sorted(list(set(text)))
    vocab_size = len(chars)

    # Create encoding maps
    stoi = {ch: i for i, ch in enumerate(chars)}
    itos = {i: ch for i, ch in enumerate(chars)}

    return text, vocab_size, stoi, itos

text, vocab_size, stoi, itos = prepare_combined_dataset()
print(f"Combined text size: {len(text)} characters")
print(f"Vocabulary size: {vocab_size}")

Combined text size: 22846 characters
Vocabulary size: 85


## Training Process

In [7]:
# Training configuration with adjusted block size
config = {
    'batch_size': 32,      # Reduced batch size
    'block_size': 128,     # Reduced block size
    'max_iters': 5000,
    'learning_rate': 1e-4,
    'n_embd': 512,
    'n_head': 8,
    'n_layer': 8,
    'dropout': 0.2
}

def encode(s: str, stoi: dict) -> list:
    """Encode string to list of integers"""
    return [stoi[c] for c in s]

def decode(l: list, itos: dict) -> str:
    """Decode list of integers to string"""
    return ''.join([itos[i] for i in l])

# Encode text
data = torch.tensor(encode(text, stoi), dtype=torch.long)

# Split into train and validation
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]
print(f"Train size: {len(train_data)}, Validation size: {len(val_data)}")

# Verify data size is sufficient
assert len(train_data) > config['block_size'], "Training data must be larger than block_size"
assert len(val_data) > config['block_size'], "Validation data must be larger than block_size"

Train size: 20561, Validation size: 2285


## Model Training

In [8]:
def get_batch(split):
    """Generate a small batch of data"""
    data = train_data if split == 'train' else val_data
    # Add safety check
    max_ix = len(data) - config['block_size']
    if max_ix <= 0:
        raise ValueError(f"Data length ({len(data)}) must be greater than block_size ({config['block_size']})")

    ix = torch.randint(max_ix, (config['batch_size'],))
    x = torch.stack([data[i:i+config['block_size']] for i in ix])
    y = torch.stack([data[i+1:i+config['block_size']+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

@torch.no_grad()
def estimate_loss():
    """Estimate loss on train and validation sets"""
    out = {}
    model.eval()
    eval_iters = 200
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

# Initialize model and optimizer
model = GPT(config, vocab_size).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'])

# Training loop with error handling
try:
    for iter in tqdm(range(config['max_iters'])):
        if iter % 100 == 0:
            losses = estimate_loss()
            print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

        xb, yb = get_batch('train')
        logits, loss = model(xb, yb)
        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()
except Exception as e:
    print(f"Error during training: {e}")
    raise

  0%|          | 0/5000 [00:00<?, ?it/s]

step 0: train loss 4.5481, val loss 4.5536
step 100: train loss 2.4571, val loss 2.5254
step 200: train loss 2.3514, val loss 2.4911
step 300: train loss 2.2600, val loss 2.4506
step 400: train loss 2.1451, val loss 2.3835
step 500: train loss 1.9905, val loss 2.3109
step 600: train loss 1.7378, val loss 2.1940
step 700: train loss 1.4842, val loss 2.1312
step 800: train loss 1.2501, val loss 2.0711
step 900: train loss 1.0265, val loss 2.0951
step 1000: train loss 0.8277, val loss 2.1422
step 1100: train loss 0.6390, val loss 2.2503
step 1200: train loss 0.4923, val loss 2.3714
step 1300: train loss 0.3852, val loss 2.4638
step 1400: train loss 0.2984, val loss 2.5642
step 1500: train loss 0.2442, val loss 2.7080
step 1600: train loss 0.1997, val loss 2.8156
step 1700: train loss 0.1722, val loss 2.9014
step 1800: train loss 0.1552, val loss 2.9565
step 1900: train loss 0.1410, val loss 3.0513
step 2000: train loss 0.1320, val loss 3.1179
step 2100: train loss 0.1237, val loss 3.1843


## Generate Code

In [9]:
# Test the model
context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(decode(model.generate(context, max_new_tokens=500)[0].tolist(), itos))


tkinter.dnd â Drag and drop support
tkinter.ttk â Tk themed widgets
IDLE


Development Tools
typing â Support for type hints
pydoc â Documentation generator and online help system
Python Development Mode
doctestest â Test interactives byte ase clated, of The semantics

The Python Standard Library. For an informal intitroduction to lable many of
all code pointers and oftentional manuals expressions
6.4.5. paramp partorting
4.9.3. parsesword Argument Lists
4.9.5. Unpacking Argument Lists


# Testing the Trained Model

This notebook contains various test prompts to evaluate our trained model from textbook_study.ipynb. We'll test different types of coding tasks to assess the model's capabilities.

In [None]:
import torch
from typing import List, Dict, Any

# Load the trained model and tokenization functions
# Note: Run textbook_study.ipynb first to train the model

def test_model(prompt: str, max_tokens: int = 500) -> str:
    """Generate code from a prompt using the trained model"""
    context = torch.tensor([encode(prompt, stoi)], dtype=torch.long, device=device)
    generated = model.generate(context, max_new_tokens=max_tokens)[0].tolist()
    return decode(generated, itos)

## 1. Basic Algorithm Implementation

In [None]:
# Test 1: Binary Search
prompt1 = """
def binary_search(arr: List[int], target: int) -> int:
    """Implement binary search to find target in sorted array.
    Returns index of target if found, -1 otherwise.

    Example:
    >>> binary_search([1, 3, 5, 7, 9], 5)
    2
    """
"""

print("Binary Search Implementation:")
print(test_model(prompt1))

## 2. Data Structure Operations

In [None]:
# Test 2: Binary Tree Implementation
prompt2 = """
class TreeNode:
    def __init__(self, val=0):
        self.val = val
        self.left = None
        self.right = None

def insert_into_bst(root: TreeNode, val: int) -> TreeNode:
    """Insert a value into a Binary Search Tree.
    If root is None, create a new node and return it.
    Otherwise, insert into the appropriate subtree.
    """
"""

print("BST Insertion Implementation:")
print(test_model(prompt2))

## 3. String Manipulation

In [None]:
# Test 3: String Processing
prompt3 = """
def is_palindrome(s: str) -> bool:
    """Check if a string is a palindrome.
    Ignore case and non-alphanumeric characters.

    Example:
    >>> is_palindrome("A man, a plan, a canal: Panama")
    True
    """
"""

print("Palindrome Checker Implementation:")
print(test_model(prompt3))

## 4. Dynamic Programming

In [None]:
# Test 4: Dynamic Programming Problem
prompt4 = """
def longest_increasing_subsequence(nums: List[int]) -> int:
    """Find the length of the longest strictly increasing subsequence.

    Example:
    >>> longest_increasing_subsequence([10,9,2,5,3,7,101,18])
    4  # The longest increasing subsequence is [2,3,7,101]
    """
"""

print("LIS Implementation:")
print(test_model(prompt4))

## 5. Object-Oriented Design

In [None]:
# Test 5: Class Implementation
prompt5 = """
class ParkingLot:
    """Design a parking lot system with the following requirements:
    1. Multiple floors
    2. Multiple spots per floor
    3. Three types of vehicles: motorcycle, car, bus
    4. Three types of spots: small, medium, large

    Implement methods:
    - park_vehicle(vehicle)
    - remove_vehicle(vehicle)
    - get_available_spots()
    """
"""

print("Parking Lot System Implementation:")
print(test_model(prompt5))

## 6. Graph Algorithms

In [None]:
# Test 6: Graph Algorithm
prompt6 = """
from collections import defaultdict, deque

def shortest_path(graph: Dict[int, List[int]], start: int, end: int) -> List[int]:
    """Find shortest path between start and end nodes in an unweighted graph.
    Return the path as a list of nodes, or empty list if no path exists.

    Example:
    >>> graph = {0: [1, 2], 1: [2, 3], 2: [3], 3: []}
    >>> shortest_path(graph, 0, 3)
    [0, 1, 3]
    """
"""

print("Shortest Path Implementation:")
print(test_model(prompt6))

## 7. Advanced Python Features

In [None]:
# Test 7: Decorator Implementation
prompt7 = """
from functools import wraps
from typing import Callable, Any

def retry(max_attempts: int, delay: float = 1.0) -> Callable:
    """Create a decorator that retries a function on failure.

    Args:
        max_attempts: Maximum number of retry attempts
        delay: Delay between retries in seconds

    Example:
    @retry(max_attempts=3)
    def api_call():
        # Some API call that might fail
        pass
    """
"""

print("Retry Decorator Implementation:")
print(test_model(prompt7))

## 8. System Design

In [None]:
# Test 8: Cache Implementation
prompt8 = """
class LRUCache:
    """Implement a Least Recently Used (LRU) cache.

    Methods:
        - get(key): Get value for key, return -1 if not found
        - put(key, value): Set value for key, removing least recently used item if cache is full

    Example:
    >>> cache = LRUCache(2)  # capacity = 2
    >>> cache.put(1, 1)
    >>> cache.put(2, 2)
    >>> cache.get(1)  # returns 1
    >>> cache.put(3, 3)  # evicts key 2
    >>> cache.get(2)  # returns -1 (not found)
    """
"""

print("LRU Cache Implementation:")
print(test_model(prompt8))