# NLP Final Project: Dataset Cartography for Artifact Mitigation
## Fast GPU Training in Google Colab

This notebook runs the complete training pipeline using GPU acceleration for fast results.

## 1. Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Install required packages
!pip install datasets transformers torch evaluate matplotlib seaborn scipy

In [None]:
# Clone repository
!git clone https://github.com/agsilver108/nlp-fa25-final-project.git
%cd nlp-fa25-final-project

## 2. Run Fast Training

In [None]:
# Run the complete training pipeline
exec(open('colab_training.py').read())

## 3. View Results

In [None]:
# Load and display results
import json
with open('/content/colab_training_results.json', 'r') as f:
    results = json.load(f)

print("ðŸŽ¯ Training Results Summary:")
print(f"\nBaseline Model:")
print(f"  Exact Match: {results['baseline']['exact_match']:.3f}")
print(f"  F1 Score: {results['baseline']['f1']:.3f}")
print(f"  Training Time: {results['baseline']['training_time']:.1f}s")

print(f"\nCartography Model:")
print(f"  Exact Match: {results['cartography']['exact_match']:.3f}")
print(f"  F1 Score: {results['cartography']['f1']:.3f}")
print(f"  Training Time: {results['cartography']['training_time']:.1f}s")

print(f"\nImprovement:")
print(f"  EM Diff: {results['improvement']['em_diff']:+.3f}")
print(f"  F1 Diff: {results['improvement']['f1_diff']:+.3f}")

## 4. Download Results

In [None]:
# Download trained models and results
from google.colab import files

# Zip results for download
!zip -r colab_results.zip /content/baseline_model /content/cartography_model /content/colab_training_results.json
files.download('colab_results.zip')