# Dropout Experiments
This notebook runs experiments using:
    - BPE dropout for XLM-R
    - Word dropout for Glot500

## setup

In [4]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.3.0-py3-none-any.whl (484 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
!pip install conllu

Collecting conllu
  Downloading conllu-6.0.0-py3-none-any.whl.metadata (21 kB)
Downloading conllu-6.0.0-py3-none-any.whl (16 kB)
Installing collected packages: conllu
Successfully installed conllu-6.0.0


In [6]:
!pip install seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=ca030275f9c68eff1e4816752a7d898d806411f9bb5d44851442645e123b4f94
  Stored in directory: /root/.cache/pip/wheels/bc/92/f0/243288f899c2eacdfa8c5f9aede4c71a9bad0ee26a01dc5ead
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd '/content/drive/MyDrive/Lorraine/-Zero-Shot-for-Under-Resourced-Language'

/content/drive/MyDrive/Lorraine/-Zero-Shot-for-Under-Resourced-Language


In [7]:
import local_library.automation_util as automation
from huggingface_hub import login

## 1. XLM-R Experiments (BPE Dropout)

In [10]:
xlmr_parameters = [
    # English -> Wolof experiments
    {
        'tuning_codes': ['en_ewt'],        # English training data
        'test_code': 'wo_wtb',             # Wolof test data
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.1-en-wo',
        'character_level_injection': False,
        'injection_vocab': '',
        'injection_prob': 0.0,
        'sample_threshold': 10000,
        'use_dropout': True,
        'dropout_prob': 0.1                # 10% dropout probability
    },
    {
        'tuning_codes': ['en_ewt'],
        'test_code': 'wo_wtb',
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.2-en-wo',
        'character_level_injection': False,
        'injection_vocab': '',
        'injection_prob': 0.0,
        'sample_threshold': 10000,
        'use_dropout': True,
        'dropout_prob': 0.2                # 20% dropout probability
    },

    # French -> Catalan experiments
    {
        'tuning_codes': ['fr_gsd'],        # French training data
        'test_code': 'ca_ancora',          # Catalan test data
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.1-fr-ca',
        'character_level_injection': False,
        'injection_vocab': '',
        'injection_prob': 0.0,
        'sample_threshold': 10000,
        'use_dropout': True,
        'dropout_prob': 0.1
    },
    {
        'tuning_codes': ['fr_gsd'],
        'test_code': 'ca_ancora',
        'model_name': 'xlm-roberta-base',
        'tuned_model_name': 'xlmr-bpe-dropout-0.2-fr-ca',
        'character_level_injection': False,
        'injection_vocab': '',
        'injection_prob': 0.0,
        'sample_threshold': 10000,
        'use_dropout': True,
        'dropout_prob': 0.2
    }
]

In [None]:
# Run XLM-R experiments
print("Starting XLM-R experiments with BPE dropout...")
xlmr_results = automation.batch_tune_eval(xlmr_parameters)

Starting XLM-R experiments with BPE dropout...

Running experiment with BPE dropout on xlmr
Dropout probability: 0.1
Model: xlm-roberta-base
Training data: ['en_ewt']
Test data: wo_wtb


README.md:   0%|          | 0.00/191k [00:00<?, ?B/s]

universal_dependencies.py:   0%|          | 0.00/87.8k [00:00<?, ?B/s]

The repository for universal_dependencies contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/universal_dependencies.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/13.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.71M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.71M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/12543 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2002 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2077 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Started Training on Data: en_ewt




<IPython.core.display.Javascript object>

## 2. GLOT500 Experiments (Word Dropout)

In [None]:
# Define parameters for GLOT500 experiments
glot500_parameters = [
    # English -> Wolof experiments
    {
        'tuning_codes': ['en_ewt'],
        'test_code': 'wo_wtb',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.1-en-wo',
        'use_dropout': True,
        'dropout_prob': 0.1                # 10% word dropout
    },
    {
        'tuning_codes': ['en_ewt'],
        'test_code': 'wo_wtb',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.2-en-wo',
        'use_dropout': True,
        'dropout_prob': 0.2                # 20% word dropout
    },

    # French -> Catalan experiments
    {
        'tuning_codes': ['fr_gsd'],
        'test_code': 'ca_ancora',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.1-fr-ca',
        'use_dropout': True,
        'dropout_prob': 0.1
    },
    {
        'tuning_codes': ['fr_gsd'],
        'test_code': 'ca_ancora',
        'model_name': 'cis-lmu/glot500-base',
        'tuned_model_name': 'glot500-word-dropout-0.2-fr-ca',
        'use_dropout': True,
        'dropout_prob': 0.2
    }
]

In [None]:
# Run GLOT500 experiments
print("Starting GLOT500 experiments with word dropout...")
glot500_results = automation.batch_tune_eval(glot500_parameters)

## Results Analysis

In [None]:
def print_experiment_results(results, experiment_name):
    """
    Print experiment results in a readable format

    Args:
        results: List of experiment results
        experiment_name: Name of the experiment set
    """
    print(f"\n{experiment_name} Results:")
    print("="*80)

    for result in results:
        print(f"\nModel: {result['model_name']}")
        print(f"Training Data: {result['tuning_codes']}")
        print(f"Test Data: {result['test_code']}")
        print(f"Dropout Type: {result.get('dropout_type', 'N/A')}")
        print(f"Dropout Probability: {result.get('dropout_prob', 'N/A')}")
        print("\nResults:")
        print(result['result'])
        print("-"*80)

In [None]:
# Print results for both experiment sets
print("\nFinal Results Summary")
print("="*80)
print_experiment_results(xlmr_results, "XLM-R BPE Dropout")
print_experiment_results(glot500_results, "GLOT500 Word Dropout")