# Logging Pretrained Models to MLflow via DagsHub

This notebook logs the two pretrained models used in the API:
- EasyOCR for Japanese/English OCR
- MarianMT (Helsinki-NLP/opus-mt-ja-en) for Japanese to English translation

In [None]:
!pip install -q mlflow dagshub transformers torch easyocr

In [None]:
import mlflow
from dagshub import dagshub_logger
from transformers import MarianMTModel, MarianTokenizer
import easyocr
import os

In [None]:
# ✅ Initialiser la connexion à DagsHub (remplace par ton repo si besoin)
mlflow.set_tracking_uri("https://dagshub.com/hostephane/ML.mlflow")
mlflow.set_experiment("ocr_translation_baseline")

In [None]:
with mlflow.start_run(run_name="log_pretrained_models"):
    mlflow.set_tags({
        "project": "ocr-manga-translator",
        "type": "pretrained",
        "stage": "baseline"
    })

    # === Log EasyOCR ===
    print("Loading EasyOCR reader...")
    reader = easyocr.Reader(['ja', 'en'])
    dummy_model_path = "easyocr_dummy.txt"
    with open(dummy_model_path, "w") as f:
        f.write("EasyOCR model is loaded dynamically; no static file to log.")
    mlflow.log_artifact(dummy_model_path, artifact_path="easyocr")
    os.remove(dummy_model_path)

    # === Log MarianMT Model ===
    print("Downloading MarianMT model and tokenizer...")
    model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ja-en")
    tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ja-en")

    print("Saving Marian model locally...")
    model_path = "marian_mt_model"
    model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)

    mlflow.log_artifacts(model_path, artifact_path="marian_mt")

    print("✅ Models logged to MLflow")