In [None]:
# 0️⃣ Install dependencies
!pip install -qU sentence-transformers datasets sklearn datasets

# 1️⃣ Imports and data preparation
import gzip
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
from sentence_transformers import SentenceTransformer, InputExample, losses
from sentence_transformers.evaluation import TranslationEvaluator
from torch.utils.data import DataLoader

def clean(text):
    return text.replace("\xa0", " ").replace("\u202f", " ").strip()

# Read and clean training data
train_rows = []
with gzip.open("train.id.gz", "rt", encoding="utf-8") as idf, \
     gzip.open("train.src.gz", "rt", encoding="utf-8") as srcf, \
     gzip.open("train.trg.gz", "rt", encoding="utf-8") as trgf:
    for _, src, trg in zip(idf, srcf, trgf):
        train_rows.append({"translation": {"ru": clean(src), "tk": clean(trg)}})

# Split into train/val/test
train_data, temp = train_test_split(train_rows, test_size=0.10, random_state=42)
val_data, extra_test = train_test_split(temp, test_size=0.5, random_state=42)

# Read and clean original test data
test_rows = []
with open("test.id", "r", encoding="utf-8") as idf, \
     open("test.src", "r", encoding="utf-8") as srcf, \
     open("test.trg", "r", encoding="utf-8") as trgf:
    for _, src, trg in zip(idf, srcf, trgf):
        test_rows.append({"translation": {"ru": clean(src), "tk": clean(trg)}})

# Combine original + extra test
full_test = test_rows + extra_test

# ==== SIZE REPORT: RU–TK bitext ====
total_train_rows = len(train_rows)                       # original "train" file rows before split
n_train = len(train_data)                                # 90% of train_rows
n_val = len(val_data)                                    # 5% of train_rows
n_extra_test = len(extra_test)                           # 5% of train_rows
n_test_orig = len(test_rows)                             # original test file rows
n_test_final = len(full_test)                            # original test + extra_test (your eval set)

print("📊 RU–TK Bitext sizes")
print(f"  original train file     : {total_train_rows}")
print(f"    ├─ train (90%)        : {n_train}")
print(f"    ├─ val (5%)           : {n_val}")
print(f"    └─ extra_test (5%)    : {n_extra_test}")
print(f"  original test file      : {n_test_orig}")
print(f"  final test used         : {n_test_final}  (original test + extra_test)")


# Build DatasetDict
dataset = DatasetDict({
    "train": Dataset.from_list(train_data),
    "validation": Dataset.from_list(val_data),
    "test": Dataset.from_list(full_test),
})

# 2️⃣ Build InputExamples
train_examples = [InputExample(texts=[r["translation"]["ru"], r["translation"]["tk"]])
                  for r in dataset["train"]]
val_src = [r["translation"]["ru"] for r in dataset["validation"]]
val_tgt = [r["translation"]["tk"] for r in dataset["validation"]]
test_src = [r["translation"]["ru"] for r in dataset["test"]]
test_tgt = [r["translation"]["tk"] for r in dataset["test"]]

print(f"🧮 Dataset sizes → Train: {len(train_examples)}, Val: {len(val_src)}, Test: {len(test_src)}")


# 3️⃣ Setup model, dataloader, loss, evaluator
model = SentenceTransformer('sentence-transformers/LaBSE')
train_loader = DataLoader(train_examples, shuffle=True, batch_size=32)
train_loss = losses.MultipleNegativesRankingLoss(model)
val_evaluator = TranslationEvaluator(val_src, val_tgt, name="val_rus-tuk")

# 4️⃣ Fine-tune LaBSE
model.fit(
    train_objectives=[(train_loader, train_loss)],
    epochs=3,
    warmup_steps=100,
    evaluator=val_evaluator,
    evaluation_steps=500,
    output_path="labse_finetuned_rustuk"
)

# 5️⃣ Final evaluation on test set
test_evaluator = TranslationEvaluator(test_src, test_tgt, name="test_rus-tuk")
test_evaluator(model, output_path="labse_finetuned_rustuk")


  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
📊 RU–TK Bitext sizes
  original train file     : 116565
    ├─ train (90%)        : 104908
    ├─ val (5%)           : 5828
    └─ extra_test (5%)    : 5829
  original test file      : 9
  final test used         : 5838  (original test + extra_test)
🧮 Dataset sizes → Train: 104908, Val: 5828, Test: 5838


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/804 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/114 [00:00<?, ?B/s]

2_Dense/model.safetensors:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjelal[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Val Rus-tuk Src2trg Accuracy,Val Rus-tuk Trg2src Accuracy,Val Rus-tuk Mean Accuracy
500,0.326,No log,0.782086,0.774537,0.778312
1000,0.2913,No log,0.783116,0.782086,0.782601
1500,0.2612,No log,0.793583,0.792725,0.793154
2000,0.2608,No log,0.793926,0.789293,0.791609
2500,0.2533,No log,0.797014,0.794097,0.795556
3000,0.2292,No log,0.800103,0.788607,0.794355
3279,0.2292,No log,0.798216,0.794955,0.796585
3500,0.1954,No log,0.806966,0.800961,0.803964
4000,0.155,No log,0.805765,0.802334,0.804049
4500,0.1669,No log,0.806108,0.800103,0.803106


Step,Training Loss,Validation Loss,Val Rus-tuk Src2trg Accuracy,Val Rus-tuk Trg2src Accuracy,Val Rus-tuk Mean Accuracy
500,0.326,No log,0.782086,0.774537,0.778312
1000,0.2913,No log,0.783116,0.782086,0.782601
1500,0.2612,No log,0.793583,0.792725,0.793154
2000,0.2608,No log,0.793926,0.789293,0.791609
2500,0.2533,No log,0.797014,0.794097,0.795556
3000,0.2292,No log,0.800103,0.788607,0.794355
3279,0.2292,No log,0.798216,0.794955,0.796585
3500,0.1954,No log,0.806966,0.800961,0.803964
4000,0.155,No log,0.805765,0.802334,0.804049
4500,0.1669,No log,0.806108,0.800103,0.803106


{'test_rus-tuk_src2trg_accuracy': 0.8191161356628982,
 'test_rus-tuk_trg2src_accuracy': 0.8107228502911956,
 'test_rus-tuk_mean_accuracy': 0.8149194929770469}

In [None]:
print(f"🧮 Dataset sizes → Train: {len(train_examples)}, Val: {len(val_src)}, Test: {len(test_src)}")

🧮 Dataset sizes → Train: 104908, Val: 5828, Test: 5838


In [None]:
# ✅ Install dependencies
!pip install --quiet sentence-transformers datasets transformers torch numpy scikit-learn

import os, torch, numpy as np, logging
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
from datasets import load_dataset
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

# 🔹 Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()]
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {device}")

# 🔹 Utility Functions
def get_embeddings(model, sentences, batch_size=32):
    embs = []
    for i in range(0, len(sentences), batch_size):
        batch = sentences[i:i+batch_size]
        emb = model.encode(batch, convert_to_tensor=True, device=device, normalize_embeddings=True)
        embs.append(emb.cpu())
    return torch.cat(embs).numpy()

def bitext_retrieval(src_emb, tgt_emb):
    sim = cosine_similarity(src_emb, tgt_emb)
    N = sim.shape[0]
    top1 = sim.argmax(axis=1)
    p1 = (top1 == np.arange(N)).mean()
    correct_scores = sim[np.arange(N), np.arange(N)]
    better_scores = (sim > correct_scores[:, None]).sum(axis=1)
    ranks = better_scores + 1
    mrr = (1.0 / ranks).mean()
    return p1, mrr

def compute_sts_metrics(emb1, emb2, gold_scores):
    cos_sim = np.sum(emb1 * emb2, axis=1)
    pearson, _ = pearsonr(cos_sim, gold_scores)
    spearman, _ = spearmanr(cos_sim, gold_scores)
    return pearson, spearman

# 🔹 Load test bitext for Rus-Tuk
SRC_FILE = "test.src"
TGT_FILE = "test.trg"
with open(SRC_FILE, encoding="utf-8") as f1, open(TGT_FILE, encoding="utf-8") as f2:
    src_sents = [line.strip() for line in f1]
    tgt_sents = [line.strip() for line in f2]
assert len(src_sents) == len(tgt_sents)
logging.info(f"Loaded {len(src_sents)} test bitext pairs.")

# 🔹 Load STS17 Russian Dataset
sts = load_dataset("ai-forever/ru-stsbenchmark-sts", split="test")
sts_sents1 = sts["sentence1"]
sts_sents2 = sts["sentence2"]
sts_scores = np.array(sts["score"])
logging.info(f"Loaded STS17 test set: {len(sts_sents1)} sentence pairs.")

# 🔹 Load pretrained and fine-tuned LaBSE models
logging.info("Loading pretrained and fine-tuned LaBSE models...")
pretrained_labse = SentenceTransformer("sentence-transformers/LaBSE").to(device)
finetuned_labse = SentenceTransformer("labse_finetuned_rustuk").to(device)

# 🔹 Encode embeddings for Bitext Retrieval
logging.info("Encoding embeddings for Bitext Retrieval...")
src_pre = get_embeddings(pretrained_labse, src_sents)
tgt_pre = get_embeddings(pretrained_labse, tgt_sents)
src_ft  = get_embeddings(finetuned_labse, src_sents)
tgt_ft  = get_embeddings(finetuned_labse, tgt_sents)

# 🔹 Evaluate Bitext Retrieval
print("\n🎯 Bitext Retrieval Results:")
for name, src, tgt in [
    ("Pretrained LaBSE", src_pre, tgt_pre),
    ("Fine-tuned LaBSE", src_ft, tgt_ft)
]:
    p1, mrr = bitext_retrieval(src, tgt)
    print(f"{name:20} → Precision@1: {p1:.4f}, MRR: {mrr:.4f}")

# 🔹 Encode embeddings for STS
logging.info("Encoding embeddings for STS Evaluation...")
sts1_pre = get_embeddings(pretrained_labse, sts_sents1)
sts2_pre = get_embeddings(pretrained_labse, sts_sents2)
sts1_ft  = get_embeddings(finetuned_labse, sts_sents1)
sts2_ft  = get_embeddings(finetuned_labse, sts_sents2)

# 🔹 Evaluate STS
print("\n📐 STS Results:")
pearson_pre, spearman_pre = compute_sts_metrics(sts1_pre, sts2_pre, sts_scores)
pearson_ft,  spearman_ft  = compute_sts_metrics(sts1_ft, sts2_ft, sts_scores)
print(f"Pretrained LaBSE     → Pearson r = {pearson_pre:.4f}, Spearman ρ = {spearman_pre:.4f}")
print(f"Fine-tuned LaBSE     → Pearson r = {pearson_ft:.4f}, Spearman ρ = {spearman_ft:.4f}")



🎯 Bitext Retrieval Results:
Pretrained LaBSE     → Precision@1: 0.8889, MRR: 0.9444
Fine-tuned LaBSE     → Precision@1: 1.0000, MRR: 1.0000

📐 STS Results:
Pretrained LaBSE     → Pearson r = 0.7357, Spearman ρ = 0.7335
Fine-tuned LaBSE     → Pearson r = 0.6849, Spearman ρ = 0.6809


In [None]:
# ============================
# ⚙️ CONFIG
# ============================
BITEXT_BATCH_SIZE = 32
BITEXT_EPOCHS = 3
STS_BATCH_SIZE = 32
STS_EPOCHS = 3
MAX_SEQ_LEN = 256

# Your Rus–Tuk files (as in your working code)
TRAIN_ID = "train.id.gz"
TRAIN_SRC = "train.src.gz"   # Russian
TRAIN_TRG = "train.trg.gz"   # Turkmen
TEST_ID  = "test.id"
TEST_SRC = "test.src"
TEST_TRG = "test.trg"

BITEXT_ADAPTER_DIR = "adapters/bitext"
STS_ADAPTER_DIR    = "adapters/sts"
BASE_MODEL_NAME    = "sentence-transformers/LaBSE"

# ============================
# 0️⃣ Install deps
# ============================
!pip -qU install "sentence-transformers>=2.6.1" "transformers>=4.41.0" \
                  "datasets>=2.19.0" "peft>=0.11.1" "scikit-learn>=1.3.0" \
                  "tqdm>=4.66.0" "numpy>=1.24.0"

# ============================
# 1️⃣ Imports & helpers
# ============================
import os, gzip, random, numpy as np, torch
from datasets import Dataset, DatasetDict, load_dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import pearsonr
from torch.utils.data import DataLoader
from tqdm import tqdm

from sentence_transformers import SentenceTransformer, InputExample, losses
from sentence_transformers.evaluation import TranslationEvaluator, EmbeddingSimilarityEvaluator

from peft import LoraConfig, TaskType, get_peft_model, PeftModel

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(42); random.seed(42); np.random.seed(42)

def clean(text: str) -> str:
    return text.replace("\xa0", " ").replace("\u202f", " ").strip()

def ensure_files_exist(paths):
    missing = [p for p in paths if not os.path.exists(p)]
    if missing:
        raise FileNotFoundError(f"Missing files: {missing}\nUpload your Rus–Tuk files or fix paths in CONFIG.")

def cosine_pearson(emb1, emb2, gold):
    cs = np.sum(emb1 * emb2, axis=1)  # embeddings normalized → dot = cosine
    r, _ = pearsonr(cs, gold)
    return r

def encode_batches(st_model, texts, bsz=64):
    outs = []
    for i in range(0, len(texts), bsz):
        chunk = texts[i:i+bsz]
        embs = st_model.encode(
            chunk,
            batch_size=bsz,
            convert_to_tensor=True,
            device=device,
            normalize_embeddings=True
        )
        outs.append(embs.cpu())
    return torch.cat(outs).numpy()

# ============================
# 2️⃣ Load Bitext data (Rus–Tuk)
# ============================
ensure_files_exist([TRAIN_ID, TRAIN_SRC, TRAIN_TRG, TEST_ID, TEST_SRC, TEST_TRG])

train_rows = []
with gzip.open(TRAIN_ID, "rt", encoding="utf-8") as idf, \
     gzip.open(TRAIN_SRC, "rt", encoding="utf-8") as srcf, \
     gzip.open(TRAIN_TRG, "rt", encoding="utf-8") as trgf:
    for _, src, trg in zip(idf, srcf, trgf):
        train_rows.append({"translation": {"ru": clean(src), "tk": clean(trg)}})

# 90/5/5 split from your train → train/val/extra_test
train_data, temp = train_test_split(train_rows, test_size=0.10, random_state=42)
val_data, extra_test = train_test_split(temp, test_size=0.5, random_state=42)

test_rows = []
with open(TEST_ID, "r", encoding="utf-8") as idf, \
     open(TEST_SRC, "r", encoding="utf-8") as srcf, \
     open(TEST_TRG, "r", encoding="utf-8") as trgf:
    for _, src, trg in zip(idf, srcf, trgf):
        test_rows.append({"translation": {"ru": clean(src), "tk": clean(trg)}})

full_test = test_rows + extra_test

train_examples = [InputExample(texts=[r["translation"]["ru"], r["translation"]["tk"]])
                  for r in train_data]
val_src = [r["translation"]["ru"] for r in val_data]
val_tgt = [r["translation"]["tk"] for r in val_data]
test_src = [r["translation"]["ru"] for r in full_test]
test_tgt = [r["translation"]["tk"] for r in full_test]

# ============================
# 3️⃣ Load STS data (train on ru-STS Benchmark, eval on STS17 ru-ru)
# ============================
ru_stsb = load_dataset("ai-forever/ru-stsbenchmark-sts")
# Splits: train / validation / test
sts_train = ru_stsb["train"]
sts_val   = ru_stsb["validation"]
sts_test  = ru_stsb["test"]

def to_examples(dsplit):
    return [
        InputExample(texts=[clean(a), clean(b)], label=float(score)/5.0)
        for a,b,score in zip(dsplit["sentence1"], dsplit["sentence2"], dsplit["score"])
    ]

sts_train_examples = to_examples(sts_train)
sts_val_eval = EmbeddingSimilarityEvaluator(
    sts_val["sentence1"], sts_val["sentence2"],
    [float(s)/5.0 for s in sts_val["score"]],
    name="dev_ru_stsb"
)

# Try to load MTEB STS17 and extract ru-ru; if absent/empty, fallback to ru-STS test
def load_sts17_ru_ru():
    try:
        ds = load_dataset("mteb/sts17-crosslingual-sts", split="test")
        cols = set(ds.column_names)
        if "language1" in cols and "language2" in cols:
            ds_ru = ds.filter(lambda ex: ex["language1"]=="ru" and ex["language2"]=="ru")
        elif "lang1" in cols and "lang2" in cols:
            ds_ru = ds.filter(lambda ex: ex["lang1"]=="ru" and ex["lang2"]=="ru")
        elif "language" in cols:
            # e.g., 'ru-ru' or 'ru_en' like formats
            def is_ruru(ex):
                val = ex["language"]
                if isinstance(val, str):
                    parts = val.replace('_','-').split('-')
                    if len(parts) == 2:
                        return parts[0] == "ru" and parts[1] == "ru"
                return False
            ds_ru = ds.filter(is_ruru)
        else:
            ds_ru = None
        if ds_ru is None or len(ds_ru)==0:
            return None
        s1 = [clean(s) for s in ds_ru["sentence1"]]
        s2 = [clean(s) for s in ds_ru["sentence2"]]
        scores01 = [float(s)/5.0 for s in ds_ru["score"]]
        evalr = EmbeddingSimilarityEvaluator(s1, s2, scores01, name="test_sts17_ru")
        return (s1, s2, scores01, evalr)
    except Exception as e:
        print(f"⚠️ Could not load STS17 ru-ru: {e}")
        return None

sts17_payload = load_sts17_ru_ru()
if sts17_payload is None:
    print("🔁 Falling back to ru-STS test split for final STS evaluation.")
    sts17_s1 = [clean(s) for s in sts_test["sentence1"]]
    sts17_s2 = [clean(s) for s in sts_test["sentence2"]]
    sts17_scores01 = [float(s)/5.0 for s in sts_test["score"]]
    sts17_eval = EmbeddingSimilarityEvaluator(sts17_s1, sts17_s2, sts17_scores01, name="test_ru_stsb")
else:
    sts17_s1, sts17_s2, sts17_scores01, sts17_eval = sts17_payload

# ============================
# 4️⃣ Build SentenceTransformer + LoRA
# ============================
base = SentenceTransformer(BASE_MODEL_NAME, device=device)
base.max_seq_length = MAX_SEQ_LEN

# Get the underlying HF encoder used by Sentence-Transformers
hf_encoder = base._first_module().auto_model

# LoRA config for BERT-like encoders
lora_cfg = LoraConfig(
    task_type=TaskType.FEATURE_EXTRACTION,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query", "key", "value", "dense"],  # attention + output dense
    inference_mode=False
)

# Wrap encoder with PEFT; create first adapter "bitext"
peft_model = get_peft_model(hf_encoder, lora_cfg, adapter_name="bitext")
base._first_module().auto_model = peft_model  # reattach to ST pipeline

# ============================
# 5️⃣ Train LoRA "bitext" adapter
# ============================
train_loader = DataLoader(train_examples, shuffle=True, batch_size=BITEXT_BATCH_SIZE)
bitext_loss = losses.MultipleNegativesRankingLoss(base)
val_evaluator = TranslationEvaluator(val_src, val_tgt, name="val_rus_tuk")

print("🚀 Training 'bitext' adapter…")
base.fit(
    train_objectives=[(train_loader, bitext_loss)],
    epochs=BITEXT_EPOCHS,
    warmup_steps=100,
    evaluator=val_evaluator,
    evaluation_steps=500,
    output_path=None,  # we save adapter below
    use_amp=True
)

os.makedirs(BITEXT_ADAPTER_DIR, exist_ok=True)
# Save ONLY this adapter
peft_model.save_pretrained(BITEXT_ADAPTER_DIR, selected_adapters=["bitext"])
print(f"✅ Saved bitext adapter → {BITEXT_ADAPTER_DIR}")

print("\n🔎 Evaluating bitext retrieval (Rus↔Tuk) with 'bitext' adapter…")
test_eval = TranslationEvaluator(test_src, test_tgt, name="test_rus_tuk")
test_eval(base)

# ============================
# 6️⃣ Add+train LoRA "sts" adapter (ru STS)
# ============================
peft_model.add_adapter("sts", lora_cfg)
peft_model.set_adapter("sts")  # activate new adapter
print("\n🔁 Switched active adapter → 'sts'")

sts_loader = DataLoader(sts_train_examples, shuffle=True, batch_size=STS_BATCH_SIZE)
sts_loss = losses.CosineSimilarityLoss(base)  # labels in [0,1]

print("🚀 Training 'sts' adapter…")
base.fit(
    train_objectives=[(sts_loader, sts_loss)],
    epochs=STS_EPOCHS,
    warmup_steps=100,
    evaluator=sts_val_eval,         # monitor ru-STS validation
    evaluation_steps=500,
    output_path=None,
    use_amp=True
)

os.makedirs(STS_ADAPTER_DIR, exist_ok=True)
peft_model.save_pretrained(STS_ADAPTER_DIR, selected_adapters=["sts"])
print(f"✅ Saved sts adapter → {STS_ADAPTER_DIR}")

# ============================
# 7️⃣ Swap adapters at inference & evaluate
# ============================
def bitext_metrics(st_model, src_list, tgt_list):
    src_emb = encode_batches(st_model, src_list, bsz=64)
    tgt_emb = encode_batches(st_model, tgt_list, bsz=64)
    sim = cosine_similarity(src_emb, tgt_emb)
    N = sim.shape[0]
    top1 = sim.argmax(axis=1)
    p1 = (top1 == np.arange(N)).mean()
    correct = sim[np.arange(N), np.arange(N)]
    ranks = (sim > correct[:, None]).sum(axis=1) + 1
    mrr = (1.0 / ranks).mean()
    return p1, mrr

# Evaluate bitext adapter
peft_model.set_adapter("bitext")
print("\n🧪 Active adapter: 'bitext'")
p1, mrr = bitext_metrics(base, test_src, test_tgt)
print(f"Rus↔Tuk Retrieval → P@1: {p1:.4f}, MRR: {mrr:.4f}")

# Evaluate STS adapter on STS17 ru-ru (or ru-STS fallback)
peft_model.set_adapter("sts")
print("\n🧪 Active adapter: 'sts'")
_ = sts_val_eval(base)
_ = sts17_eval(base)

# Also compute explicit Pearson for the final STS set
print("\n📐 Final STS set (manual Pearson on cosine sim):")
s1 = encode_batches(base, sts17_s1)
s2 = encode_batches(base, sts17_s2)
pearson = cosine_pearson(s1, s2, np.array(sts17_scores01))
print(f"Pearson r = {pearson:.4f}")

print("\n✅ Done. Two swappable adapters saved:")
print(f"  • Bitext adapter dir: {BITEXT_ADAPTER_DIR}")
print(f"  • STS    adapter dir: {STS_ADAPTER_DIR}")




Usage:   
  pip3 <command> [options]

no such option: -U


README.md: 0.00B [00:00, ?B/s]

test/ar-ar.jsonl.gz:   0%|          | 0.00/10.8k [00:00<?, ?B/s]

test/en-ar.jsonl.gz:   0%|          | 0.00/11.8k [00:00<?, ?B/s]

test/en-de.jsonl.gz:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

test/en-en.jsonl.gz:   0%|          | 0.00/8.86k [00:00<?, ?B/s]

test/en-tr.jsonl.gz:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

test/es-en.jsonl.gz:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

test/es-es.jsonl.gz:   0%|          | 0.00/9.65k [00:00<?, ?B/s]

test/fr-en.jsonl.gz:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

test/it-en.jsonl.gz:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

test/ko-ko.jsonl.gz:   0%|          | 0.00/165k [00:00<?, ?B/s]

test/nl-en.jsonl.gz:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/5346 [00:00<?, ? examples/s]

🔁 Falling back to ru-STS test split for final STS evaluation.
🚀 Training 'bitext' adapter…


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Val Rus Tuk Src2trg Accuracy,Val Rus Tuk Trg2src Accuracy,Val Rus Tuk Mean Accuracy
500,0.3989,No log,0.768016,0.77059,0.769303
1000,0.3193,No log,0.77059,0.773507,0.772049
1500,0.3041,No log,0.775395,0.777111,0.776253
2000,0.296,No log,0.778312,0.777797,0.778054
2500,0.2879,No log,0.780714,0.779341,0.780027
3000,0.2693,No log,0.78466,0.781229,0.782944
3279,0.2693,No log,0.78466,0.780027,0.782344
3500,0.272,No log,0.783631,0.782601,0.783116
4000,0.2559,No log,0.784832,0.781057,0.782944
4500,0.2603,No log,0.787406,0.78243,0.784918


✅ Saved bitext adapter → adapters/bitext

🔎 Evaluating bitext retrieval (Rus↔Tuk) with 'bitext' adapter…

🔁 Switched active adapter → 'sts'
🚀 Training 'sts' adapter…


Step,Training Loss,Validation Loss,Dev Ru Stsb Pearson Cosine,Dev Ru Stsb Spearman Cosine
164,No log,No log,0.838651,0.836796
328,No log,No log,0.855384,0.853721
492,No log,No log,0.858408,0.856785


✅ Saved sts adapter → adapters/sts

🧪 Active adapter: 'bitext'
Rus↔Tuk Retrieval → P@1: 0.7955, MRR: 0.8281

🧪 Active adapter: 'sts'

📐 Final STS set (manual Pearson on cosine sim):
Pearson r = 0.8147

✅ Done. Two swappable adapters saved:
  • Bitext adapter dir: adapters/bitext
  • STS    adapter dir: adapters/sts


In [None]:
# ============================================
# 📊 BEFORE vs AFTER — baseline vs adapters
#  (handles nested paths like /content/adapters/bitext/bitext)
# ============================================
import os, numpy as np, torch, pandas as pd
from datasets import load_dataset
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from peft import PeftModel

device = "cuda" if torch.cuda.is_available() else "cpu"

# ---- Paths (parent dirs that contain the nested adapter folder) ----
BASE_MODEL_NAME = "sentence-transformers/LaBSE"
BITEXT_ADAPTER_PARENT = "/content/adapters/bitext"
STS_ADAPTER_PARENT    = "/content/adapters/sts"
MAX_SEQ_LEN = 256

# ---- Helper: resolve folder that actually has adapter_config.json ----
def resolve_adapter_dir(parent_dir: str, default_name: str):
    # Case 1: files are directly inside parent_dir
    if os.path.exists(os.path.join(parent_dir, "adapter_config.json")):
        return parent_dir
    # Case 2: nested dir equals adapter name (e.g., /bitext/bitext)
    candidate = os.path.join(parent_dir, default_name)
    if os.path.exists(os.path.join(candidate, "adapter_config.json")):
        return candidate
    # Case 3: search immediate children for adapter_config.json
    matches = []
    if os.path.isdir(parent_dir):
        for child in os.listdir(parent_dir):
            p = os.path.join(parent_dir, child)
            if os.path.isdir(p) and os.path.exists(os.path.join(p, "adapter_config.json")):
                matches.append(p)
    if len(matches) == 1:
        return matches[0]
    raise FileNotFoundError(
        f"Could not find adapter_config.json under {parent_dir}. "
        f"Tried '{parent_dir}', '{candidate}', and immediate children."
    )

bitext_path = resolve_adapter_dir(BITEXT_ADAPTER_PARENT, "bitext")
sts_path    = resolve_adapter_dir(STS_ADAPTER_PARENT, "sts")
print(f"🔎 Resolved bitext adapter dir: {bitext_path}")
print(f"🔎 Resolved sts    adapter dir: {sts_path}")

# ---- Load / reuse your bitext test data ----
def clean(t): return t.replace("\xa0"," ").replace("\u202f"," ").strip()

try:
    test_src, test_tgt  # noqa
except NameError:
    if not (os.path.exists("test.src") and os.path.exists("test.trg")):
        raise FileNotFoundError("test.src/test.trg not found. Re-run training or adjust paths.")
    with open("test.src", "r", encoding="utf-8") as f: test_src = [clean(x.strip()) for x in f]
    with open("test.trg", "r", encoding="utf-8") as f: test_tgt = [clean(x.strip()) for x in f]

# ---- Load STS17 ru-ru (fallback to ru-STS test) ----
def load_sts17_ru_ru():
    try:
        ds = load_dataset("mteb/sts17-crosslingual-sts", split="test")
        cols = set(ds.column_names)
        if "language1" in cols and "language2" in cols:
            ds_ru = ds.filter(lambda ex: ex["language1"]=="ru" and ex["language2"]=="ru")
        elif "lang1" in cols and "lang2" in cols:
            ds_ru = ds.filter(lambda ex: ex["lang1"]=="ru" and ex["lang2"]=="ru")
        elif "language" in cols:
            def is_ruru(ex):
                val = ex["language"]
                if isinstance(val, str):
                    a = val.replace('_','-').split('-')
                    return len(a)==2 and a[0]=="ru" and a[1]=="ru"
                return False
            ds_ru = ds.filter(is_ruru)
        else:
            ds_ru = None
        if ds_ru is None or len(ds_ru)==0:
            return None
        s1 = [clean(x) for x in ds_ru["sentence1"]]
        s2 = [clean(x) for x in ds_ru["sentence2"]]
        y  = [float(x)/5.0 for x in ds_ru["score"]]
        return s1, s2, y, "STS17 ru-ru"
    except Exception:
        return None

payload = load_sts17_ru_ru()
if payload is None:
    ru_stsb = load_dataset("ai-forever/ru-stsbenchmark-sts")
    s1 = [clean(x) for x in ru_stsb["test"]["sentence1"]]
    s2 = [clean(x) for x in ru_stsb["test"]["sentence2"]]
    y  = [float(x)/5.0 for x in ru_stsb["test"]["score"]]
    sts_name = "ru-STS test"
else:
    s1, s2, y, sts_name = payload

# ---- Helpers ----
def encode_batches(st_model, texts, bsz=64):
    outs = []
    for i in range(0, len(texts), bsz):
        ch = texts[i:i+bsz]
        embs = st_model.encode(
            ch, batch_size=bsz, convert_to_tensor=True,
            device=device, normalize_embeddings=True
        )
        outs.append(embs.cpu())
    return torch.cat(outs).numpy()

def bitext_metrics(st_model, src_list, tgt_list):
    src_emb = encode_batches(st_model, src_list)
    tgt_emb = encode_batches(st_model, tgt_list)
    sim = cosine_similarity(src_emb, tgt_emb)
    N = sim.shape[0]
    top1 = sim.argmax(axis=1)
    p1 = (top1 == np.arange(N)).mean()
    correct = sim[np.arange(N), np.arange(N)]
    ranks = (sim > correct[:, None]).sum(axis=1) + 1
    mrr = (1.0 / ranks).mean()
    return float(p1), float(mrr)

def sts_metrics(st_model, s1, s2, y):
    e1 = encode_batches(st_model, s1)
    e2 = encode_batches(st_model, s2)
    cos = np.sum(e1 * e2, axis=1)  # embeddings normalized → cosine
    pr = pearsonr(cos, y)[0]
    sr = spearmanr(cos, y)[0]
    return float(pr), float(sr)

# ---- Build three evaluable models: baseline, +bitext, +sts ----
baseline = SentenceTransformer(BASE_MODEL_NAME, device=device)
baseline.max_seq_length = MAX_SEQ_LEN

with_adapters = SentenceTransformer(BASE_MODEL_NAME, device=device)
auto = with_adapters._first_module().auto_model

# Load first adapter exactly from the resolved subdir; name it explicitly so we can switch
auto = PeftModel.from_pretrained(auto, bitext_path, adapter_name="bitext")
# Load second one and give it the 'sts' name
auto.load_adapter(sts_path, adapter_name="sts")

# Re-attach into Sentence-Transformers pipeline
with_adapters._first_module().auto_model = auto

# ---- Evaluate all variants on both tasks ----
rows = []

# 1) Baseline
bp1, bmrr = bitext_metrics(baseline, test_src, test_tgt)
bpr, bsr  = sts_metrics(baseline, s1, s2, y)
rows.append(["Baseline LaBSE", "none", bp1, bmrr, bpr, bsr])

# 2) Bitext adapter active
auto.set_adapter("bitext")
ap1, amrr = bitext_metrics(with_adapters, test_src, test_tgt)
apr, asr  = sts_metrics(with_adapters, s1, s2, y)
rows.append(["LaBSE + LoRA(bitext)", "bitext", ap1, amrr, apr, asr])

# 3) STS adapter active
auto.set_adapter("sts")
sp1, smrr = bitext_metrics(with_adapters, test_src, test_tgt)
spr, ssr  = sts_metrics(with_adapters, s1, s2, y)
rows.append(["LaBSE + LoRA(sts)", "sts", sp1, smrr, spr, ssr])

df = pd.DataFrame(rows, columns=[
    "Variant", "Active Adapter",
    "Bitext P@1", "Bitext MRR",
    f"{sts_name} Pearson", f"{sts_name} Spearman"
])

with pd.option_context('display.max_colwidth', None, 'display.precision', 4):
    display(df.style.format({
        "Bitext P@1": "{:.4f}", "Bitext MRR": "{:.4f}",
        f"{sts_name} Pearson": "{:.4f}", f"{sts_name} Spearman": "{:.4f}"
    }))


🔎 Resolved bitext adapter dir: /content/adapters/bitext/bitext
🔎 Resolved sts    adapter dir: /content/adapters/sts/sts


Unnamed: 0,Variant,Active Adapter,Bitext P@1,Bitext MRR,ru-STS test Pearson,ru-STS test Spearman
0,Baseline LaBSE,none,0.698,0.7354,0.7357,0.7334
1,LaBSE + LoRA(bitext),bitext,0.7934,0.8269,0.7375,0.7302
2,LaBSE + LoRA(sts),sts,0.7348,0.7685,0.8095,0.8046
