In [None]:
# Clone repository
!git clone https://github.com/dnanper/X-ALMA-Ablation-Experiment.git
%cd X-ALMA-Ablation-Experiment

In [None]:
# Install dependencies
!bash install_alma.sh

## Experiment 1: Unpretrained Base (Measure pretrain contribution)

In [None]:
%%time
# Run with unpretrained base (Llama-2-13b-hf)
!accelerate launch --config_file configs/deepspeed_eval_config_bf16.yaml \
    run_llmmt.py \
    --model_name_or_path haoranxu/X-ALMA-13B-Group5 \
    --custom_base_model meta-llama/Llama-2-13b-hf \
    --do_predict \
    --low_cpu_mem_usage \
    --language_pairs en-cs,cs-en \
    --mmt_data_path placeholder \
    --override_test_data_path haoranxu/WMT23-Test \
    --per_device_eval_batch_size 1 \
    --output_dir ./outputs/unpretrained \
    --predict_with_generate \
    --max_new_tokens 256 \
    --max_source_length 256 \
    --bf16 \
    --seed 42 \
    --num_beams 5 \
    --overwrite_cache \
    --overwrite_output_dir \
    --chat_style

## Experiment 2: Pretrained Base (Normal X-ALMA)

In [None]:
%%time
# Run with pretrained base (ALMA-13B-Pretrain)
!accelerate launch --config_file configs/deepspeed_eval_config_bf16.yaml \
    run_llmmt.py \
    --model_name_or_path haoranxu/X-ALMA-13B-Group5 \
    --custom_base_model haoranxu/ALMA-13B-Pretrain \
    --do_predict \
    --low_cpu_mem_usage \
    --language_pairs en-cs,cs-en \
    --mmt_data_path placeholder \
    --override_test_data_path haoranxu/WMT23-Test \
    --per_device_eval_batch_size 1 \
    --output_dir ./outputs/pretrained \
    --predict_with_generate \
    --max_new_tokens 256 \
    --max_source_length 256 \
    --bf16 \
    --seed 42 \
    --num_beams 5 \
    --overwrite_cache \
    --overwrite_output_dir \
    --chat_style

## View Results

In [None]:
# Display sample translations - Czech to English
print("=" * 80)
print("UNPRETRAINED BASE - Czech to English (First 10)")
print("=" * 80)
with open('./outputs/unpretrained/test-cs-en.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    for i, line in enumerate(lines[:10], 1):
        print(f"{i}. {line.strip()}")

print("\n" + "=" * 80)
print("PRETRAINED BASE - Czech to English (First 10)")
print("=" * 80)
with open('./outputs/pretrained/test-cs-en.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    for i, line in enumerate(lines[:10], 1):
        print(f"{i}. {line.strip()}")

In [None]:
# Display sample translations - English to Czech
print("=" * 80)
print("UNPRETRAINED BASE - English to Czech (First 10)")
print("=" * 80)
with open('./outputs/unpretrained/test-en-cs.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    for i, line in enumerate(lines[:10], 1):
        print(f"{i}. {line.strip()}")

print("\n" + "=" * 80)
print("PRETRAINED BASE - English to Czech (First 10)")
print("=" * 80)
with open('./outputs/pretrained/test-en-cs.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    for i, line in enumerate(lines[:10], 1):
        print(f"{i}. {line.strip()}")

## Download Results

In [None]:
# Create zip file with all results
!zip -r ablation_results.zip outputs/

# Download link will appear in output
from IPython.display import FileLink
FileLink('ablation_results.zip')

## Quick Comparison

In [None]:
# Count lines and show statistics
import os

def count_lines(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        return len(f.readlines())

print("=" * 80)
print("EXPERIMENT SUMMARY")
print("=" * 80)
print(f"\nUnpretrained Base (meta-llama/Llama-2-13b-hf):")
print(f"  - cs→en translations: {count_lines('./outputs/unpretrained/test-cs-en.txt')}")
print(f"  - en→cs translations: {count_lines('./outputs/unpretrained/test-en-cs.txt')}")

print(f"\nPretrained Base (haoranxu/ALMA-13B-Pretrain):")
print(f"  - cs→en translations: {count_lines('./outputs/pretrained/test-cs-en.txt')}")
print(f"  - en→cs translations: {count_lines('./outputs/pretrained/test-en-cs.txt')}")

print("\nNext steps:")
print("1. Download the results.zip file")
print("2. Calculate BLEU/COMET scores with reference translations")
print("3. Compare the scores to measure pretrain contribution")