# Test Japanese to English Translation

This notebook tests the translation of Japanese text to English using a MarianMT model from Hugging Face, with local model caching.

In [7]:
from transformers import MarianMTModel, MarianTokenizer
import os

# 1. Define model name and custom download location
model_name = 'Helsinki-NLP/opus-mt-ja-en'
custom_model_path = os.path.abspath('../models/translation/ja-en') # Use absolute path
japanese_text = "これは日本語のテキストです。"

print(f"Model: {model_name}")
print(f"Custom model path: {custom_model_path}")
print(f"Japanese text: {japanese_text}")

Model: Helsinki-NLP/opus-mt-ja-en
Custom model path: /Volumes/Work/Apps/harvey/harvey-sidecars/models/translation/ja-en
Japanese text: これは日本語のテキストです。


In [8]:
# 2. Check if the model exists, and if not, download it
os.makedirs(custom_model_path, exist_ok=True)

config_path = os.path.join(custom_model_path, 'config.json')
if not os.path.exists(config_path):
    print(f"Downloading model '{model_name}' to '{custom_model_path}'...")
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    model.save_pretrained(custom_model_path)
    tokenizer.save_pretrained(custom_model_path)
    print("Model downloaded successfully.")
else:
    print(f"Model '{model_name}' already exists at '{custom_model_path}'.")

Downloading model 'Helsinki-NLP/opus-mt-ja-en' to '/Volumes/Work/Apps/harvey/harvey-sidecars/models/translation/ja-en'...


Downloading source.spm:   0%|          | 0.00/782k [00:00<?, ?B/s]

Downloading target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

Downloading vocab.json: 0.00B [00:00, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading config.json: 0.00B [00:00, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/303M [00:00<?, ?B/s]

Downloading generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Model downloaded successfully.


In [11]:
from transformers import pipeline

# 3. Use the model from the downloaded location to translate
print("Loading model from local path for translation...")
translator = pipeline("translation_ja_to_en", model=custom_model_path)

# 4. Translate and print the result
print("Translating...")
english_translation = translator(japanese_text)

print(f"Japanese text: {japanese_text}")
print(f"English translation: {english_translation[0]['translation_text']}")

Loading model from local path for translation...
Translating...
Japanese text: これは日本語のテキストです。
English translation: This is Japanese text.
