## 1. Setup Environment

In [None]:
# Mount Google Drive to save models
from google.colab import drive
drive.mount('/content/drive')

# Create directory for saving models
!mkdir -p /content/drive/MyDrive/sarcasm_models

In [None]:
# Clone repository
!git clone https://github.com/carlo-scr/Sarcasm.git
%cd Sarcasm

In [None]:
# Install dependencies
!pip install -q transformers datasets peft trl torch accelerate scikit-learn

## 2. Verify GPU

In [None]:
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 3. Create Data Splits

In [None]:
# Create train/test splits for iSarcasm
!python create_splits.py

## 4. Phase 1: Supervised Fine-Tuning (SFT)

Train on SARC dataset to learn general sarcasm patterns.

**Time:** ~5-10 minutes on T4 GPU

In [None]:
!python scripts/finetune_qwen.py

## 5. Phase 2: Direct Preference Optimization (DPO)

Refine with iSarcasm preferences for better accuracy.

**Time:** ~3-5 minutes on T4 GPU

In [None]:
!python scripts/dpo_train.py

## 6. Evaluate All Models

Compare Base → SFT → DPO on held-out test set.

In [None]:
!python scripts/evaluate_all_stages.py

## 7. View Results

In [None]:
import json

with open('comparative_results.json', 'r') as f:
    results = json.load(f)

print("\n" + "="*70)
print("COMPARATIVE RESULTS")
print("="*70)

for model in results['models']:
    print(f"\n{model['model_name']}:")
    print(f"  Accuracy:  {model['accuracy']:.2%}")
    print(f"  Precision: {model['precision']:.2%}")
    print(f"  Recall:    {model['recall']:.2%}")
    print(f"  F1 Score:  {model['f1_score']:.2%}")

## 8. Save Models to Google Drive

In [None]:
# Save models to Google Drive
!cp -r models/sft /content/drive/MyDrive/sarcasm_models/
!cp -r models/dpo_enhanced /content/drive/MyDrive/sarcasm_models/
!cp comparative_results.json /content/drive/MyDrive/sarcasm_models/

print("✓ Models saved to Google Drive: MyDrive/sarcasm_models/")

## 9. Download Results (Optional)

In [None]:
from google.colab import files

# Download results JSON
files.download('comparative_results.json')

## Summary

**Training Complete!**

Models saved to:
- Google Drive: `MyDrive/sarcasm_models/`
- Local (Colab): `models/sft/` and `models/dpo_enhanced/`

**Expected Performance:**
- Base Model: ~49% accuracy
- SFT Model: ~63% accuracy (+14 pts)
- DPO Model: ~68% accuracy (+5 pts)

**Next Steps:**
1. Download models from Google Drive
2. Use for inference in your application
3. Upload to Hugging Face Hub for sharing