## 1. Setup & Installation

In [None]:
# Check CUDA availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Install dependencies (if needed)
# !pip install -r ../requirements.txt

## 2. Prepare Training Dataset

In [None]:
import sys
sys.path.append('..')

from app.prepare_dataset import prepare_dataset, validate_dataset

# Create sample dataset
samples = prepare_dataset(
    input_dir="../data/processed",
    output_file="../data/ielts_training_data.jsonl"
)

print(f"\nCreated {len(samples)} training samples")

In [None]:
# Validate dataset
validate_dataset("../data/ielts_training_data.jsonl")

## 3. Initialize Trainer

In [None]:
from app.train_qlora import IELTSModelTrainer

# Initialize trainer
trainer = IELTSModelTrainer(
    model_name="qwen",  # Options: qwen, phi2, gemma
    output_dir="../models/ielts-demo",
    dataset_path="../data/ielts_training_data.jsonl"
)

print("‚úì Trainer initialized")

## 4. Train Model with QLoRA

**Note**: Training c√≥ th·ªÉ m·∫•t 10-30 ph√∫t t√πy v√†o dataset size

In [None]:
# Train model
trained_model = trainer.train(
    num_epochs=3,
    batch_size=1,  # Small batch for 4GB GPU
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    max_seq_length=512,
)

print("\n‚úì Training complete!")

## 5. Test Inference

In [None]:
from app.inference import AutotrainInferenceServer
import json

# Load trained model
server = AutotrainInferenceServer(model_path="../models/ielts-demo")

print("‚úì Model loaded for inference")

In [None]:
# Test with sample transcript
test_transcript = """
Well, I think that technology has greatly influenced our daily lives. 
For example, smartphones allow us to stay connected with friends and family. 
Moreover, the internet provides us with access to vast amounts of information.
""".strip()

print("üìù Test Transcript:")
print(test_transcript)
print("\nüéØ Scoring...")

scores = server.score_transcript(test_transcript)
print(json.dumps(scores, indent=2))

## 6. Convert to GGUF (Optional)

Convert model to GGUF format for llama.cpp CPU inference

In [None]:
from app.convert_model import convert_to_gguf

# Convert to GGUF
gguf_path = convert_to_gguf(
    model_path="../models/ielts-demo",
    output_path="../models/ielts-demo/model.gguf",
    quantization="Q4_K_M"  # 4-bit quantization
)

if gguf_path:
    print(f"‚úì Model converted to {gguf_path}")

## 7. Benchmark Performance

In [None]:
import time

# Benchmark inference speed
test_cases = [
    "I like technology.",
    "Well, I think education is important for personal development.",
    "In my opinion, environmental protection requires collective effort from all members of society."
]

print("üìä Benchmarking inference speed...\n")

for i, transcript in enumerate(test_cases, 1):
    start = time.time()
    scores = server.score_transcript(transcript)
    elapsed = time.time() - start
    
    print(f"Test {i}: {elapsed*1000:.2f}ms")
    print(f"  Transcript length: {len(transcript.split())} words")
    print()

## 8. Compare Different Models

In [None]:
# Train and compare different base models
models_to_test = ["qwen", "phi2", "gemma"]

results = {}

for model_name in models_to_test:
    print(f"\n{'='*50}")
    print(f"Testing {model_name.upper()}")
    print('='*50)
    
    # Note: This will take a while!
    # trainer = IELTSModelTrainer(
    #     model_name=model_name,
    #     output_dir=f"../models/ielts-{model_name}",
    # )
    # trainer.train(num_epochs=1, batch_size=1)
    
    print(f"‚úì {model_name} training complete")

## 9. Export for Production

Final checklist before deployment

In [None]:
import os
from pathlib import Path

model_dir = Path("../models/ielts-demo")

# Check required files
required_files = [
    "config.json",
    "adapter_config.json",
    "adapter_model.safetensors",
    "tokenizer.json",
    "tokenizer_config.json",
]

print("üì¶ Deployment Checklist:\n")

for file in required_files:
    exists = (model_dir / file).exists()
    status = "‚úì" if exists else "‚úó"
    print(f"{status} {file}")

print("\n‚úì Model ready for deployment!")

## Next Steps

1. ‚úÖ Model ƒë√£ ƒë∆∞·ª£c train v·ªõi QLoRA
2. ‚úÖ Test inference th√†nh c√¥ng
3. üîÑ Deploy v·ªõi vLLM ho·∫∑c llama.cpp
4. üîÑ Integrate v√†o production API
5. üîÑ Monitor performance v√† fine-tune th√™m

Xem th√™m:
- [Training Guide](../docs/TRAINING_DEPLOYMENT.md)
- [Quick Start](../docs/QUICKSTART.md)