In [None]:
# CTI Fine-tuning Backend with Hugging Face Hub Integration
# This notebook runs in Colab with GPU access using shared training functions
!pip install transformers datasets torch accelerate huggingface_hub
import torch
import json
import sys
import os

# Add shared training module to path
sys.path.append('/content')

print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')
else:
    print('Using CPU')

In [None]:
# Import shared training module
from shared_training import FineTuningTrainer, AVAILABLE_MODELS, create_training_data_from_csv, setup_huggingface_hub

# Initialize trainer for Colab GPU environment
trainer = FineTuningTrainer(device="cuda")

print("‚úÖ Shared training module loaded")
print(f"Available models: {len(AVAILABLE_MODELS)}")
print("\nColab-compatible models:")
for model_id, info in AVAILABLE_MODELS.items():
    if info.get('location') == 'colab':
        print(f"- {info['name']}: {info['size']}")

In [None]:
# Setup Hugging Face Hub authentication
print("üîê Setting up Hugging Face Hub authentication...")
hf_ready = setup_huggingface_hub()

if hf_ready:
    print("‚úÖ Hugging Face Hub ready - can push models")
else:
    print("‚ö†Ô∏è Hugging Face Hub not configured - models will be saved locally only")
    print("To enable Hub features, set HF_TOKEN environment variable")

In [None]:
# Training function using shared module with Hub support
def fine_tune_model_colab(model_name, training_data, epochs=3, learning_rate=5e-5, push_to_hub=False, hub_model_id=None):
    """Fine-tune model using shared training module optimized for Colab GPU"""
    print(f'Starting Colab GPU fine-tuning for {model_name}')
    print(f'Training examples: {len(training_data)}')
    
    if push_to_hub and hub_model_id:
        print(f'Will push to Hugging Face Hub as: {hub_model_id}')
    
    # Use shared trainer with GPU optimizations
    trainer_result, output_dir = trainer.fine_tune_model(
        model_name,
        training_data,
        epochs=epochs,
        learning_rate=learning_rate,
        output_dir="./models",
        push_to_hub=push_to_hub,
        hub_model_id=hub_model_id
    )
    
    print(f'Colab GPU training completed! Model saved to: {output_dir}')
    if push_to_hub and hub_model_id:
        print(f'Model also available on Hugging Face Hub: {hub_model_id}')
    
    return trainer_result, output_dir

In [None]:
# Test function using shared module
def test_fine_tuned_model_colab(model_path, test_prompt):
    """Test the fine-tuned model using shared module"""
    return trainer.test_fine_tuned_model(model_path, test_prompt)

In [None]:
# Example usage
print("üöÄ Colab Fine-tuning Backend Ready!")
print("\nUsage:")
print("1. Prepare training data as list of {'input': '...', 'output': '...'} dictionaries")
print("2. Call fine_tune_model_colab(model_name, training_data, epochs, learning_rate, push_to_hub, hub_model_id)")
print("3. Test with test_fine_tuned_model_colab(model_path, test_prompt)")
print("\nExample:")
print("training_data = [{'input': 'Attackers ran powershell.exe', 'output': 'SIGMA rule...'}]")
print("trainer_result, output_dir = fine_tune_model_colab('microsoft/Phi-3-mini-4k-instruct', training_data)")
print("\nWith Hub push:")
print("trainer_result, output_dir = fine_tune_model_colab('microsoft/Phi-3-mini-4k-instruct', training_data, push_to_hub=True, hub_model_id='username/my-cti-model')")