# MediTranslator v3 En→Vi Training

This notebook trains the v3_en2vi model (278M parameters) for English→Vietnamese medical text translation.

In [None]:
# Cell 1: Install Dependencies
!pip install -q torch torchaudio torchvision
!pip install -q wandb pyyaml tqdm

In [None]:
# Cell 2: Clone Repository
import os
if not os.path.exists('MediTranslator'):
    !git clone https://github.com/MothMalone/MediTranslator.git
%cd MediTranslator
print('Repository cloned successfully')

In [None]:
# Cell 3: Setup Data Paths
import os
import shutil

# Setup paths
DATA_INPUT = '/kaggle/input/mediatranslator-training-data'
DATA_OUTPUT = './data/raw_opus100'
os.makedirs(DATA_OUTPUT, exist_ok=True)

# Copy training data files
files = ['train.en.txt', 'train.vi.txt', 'public_test.en.txt', 'public_test.vi.txt']

for file in files:
    src = f'{DATA_INPUT}/{file}'
    dst = f'{DATA_OUTPUT}/{file}'
    shutil.copy(src, dst)
    size = os.path.getsize(dst) / 1024 / 1024
    print(f'✓ Copied {file} ({size:.2f} MB)')

print('\n✅ Data setup complete!')
print(f'Files in {DATA_OUTPUT}:')
for f in os.listdir(DATA_OUTPUT):
    print(f'  - {f}')

In [None]:
# Cell 4: Login to W&B (for tracking)
import wandb
import os

# W&B API key for tracking
WANDB_API_KEY = "445d8df72343591d6588f101349ad4752497ce62"

os.environ['WANDB_API_KEY'] = WANDB_API_KEY
wandb.login(key=WANDB_API_KEY, anonymous="never")
print('✅ Logged in to W&B! Loss will be tracked.')
print('View your training at: https://wandb.ai/joshuafoshua-university-of-engineering-and-technology-hanoi/nlp-transformer-mt')

In [None]:
# Cell 5: Train Model with v3_en2vi Config
!python scripts/train.py --config experiments/v3_en2vi/config.yaml

## Training Complete!

Checkpoints saved to: `experiments/v3_en2vi/checkpoints/`

Logs available at: `experiments/v3_en2vi/logs/`