# Dropout Experiments
This notebook runs experiments using:
    - BPE dropout for XLM-R
    - Word dropout for Glot500

In [None]:
import local_library.automation_util as automation
from huggingface_hub import login

## 1. XLM-R Experiments (BPE Dropout)

In [None]:
xlmr_parameters = [
    # English -> Wolof experiments
    {
        'tuning_codes': ['en_ewt'],        # English training data
        'test_code': 'wo_wtb',             # Wolof test data
        'model_name': 'xlm-roberta-base',  
        'tuned_model_name': 'xlmr-bpe-dropout-0.1-en-wo',
        'use_dropout': True,
        'dropout_prob': 0.1                # 10% dropout probability
    },
    {
        'tuning_codes': ['en_ewt'],
        'test_code': 'wo_wtb',
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.2-en-wo',
        'use_dropout': True,
        'dropout_prob': 0.2                # 20% dropout probability
    },
    
    # French -> Catalan experiments
    {
        'tuning_codes': ['fr_gsd'],        # French training data
        'test_code': 'ca_ancora',          # Catalan test data
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.1-fr-ca',
        'use_dropout': True,
        'dropout_prob': 0.1
    },
    {
        'tuning_codes': ['fr_gsd'],
        'test_code': 'ca_ancora',
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.2-fr-ca',
        'use_dropout': True,
        'dropout_prob': 0.2
    }
]

In [None]:
# Run XLM-R experiments
print("Starting XLM-R experiments with BPE dropout...")
xlmr_results = automation.batch_tune_eval(xlmr_parameters)

## 2. GLOT500 Experiments (Word Dropout)

In [None]:
# Define parameters for GLOT500 experiments
glot500_parameters = [
    # English -> Wolof experiments
    {
        'tuning_codes': ['en_ewt'],
        'test_code': 'wo_wtb',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.1-en-wo',
        'use_dropout': True,
        'dropout_prob': 0.1                # 10% word dropout
    },
    {
        'tuning_codes': ['en_ewt'],
        'test_code': 'wo_wtb',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.2-en-wo',
        'use_dropout': True,
        'dropout_prob': 0.2                # 20% word dropout
    },
    
    # French -> Catalan experiments
    {
        'tuning_codes': ['fr_gsd'],
        'test_code': 'ca_ancora',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.1-fr-ca',
        'use_dropout': True,
        'dropout_prob': 0.1
    },
    {
        'tuning_codes': ['fr_gsd'],
        'test_code': 'ca_ancora',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.2-fr-ca',
        'use_dropout': True,
        'dropout_prob': 0.2
    }
]

In [None]:
# Run GLOT500 experiments
print("Starting GLOT500 experiments with word dropout...")
glot500_results = automation.batch_tune_eval(glot500_parameters)

## Results Analysis

In [None]:
def print_experiment_results(results, experiment_name):
    """
    Print experiment results in a readable format
    
    Args:
        results: List of experiment results
        experiment_name: Name of the experiment set
    """
    print(f"\n{experiment_name} Results:")
    print("="*80)
    
    for result in results:
        print(f"\nModel: {result['model_name']}")
        print(f"Training Data: {result['tuning_codes']}")
        print(f"Test Data: {result['test_code']}")
        print(f"Dropout Type: {result.get('dropout_type', 'N/A')}")
        print(f"Dropout Probability: {result.get('dropout_prob', 'N/A')}")
        print("\nResults:")
        print(result['result'])
        print("-"*80)

In [None]:
# Print results for both experiment sets
print("\nFinal Results Summary")
print("="*80)
print_experiment_results(xlmr_results, "XLM-R BPE Dropout")
print_experiment_results(glot500_results, "GLOT500 Word Dropout")