# For creating predictions for various fine-tuning configurations

In [22]:
import os, json
import torch
import torch.nn as nn
from datasets import load_from_disk, concatenate_datasets
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel
from collections import defaultdict

device = "cuda" if torch.cuda.is_available() else "cpu"


In [18]:
def predict_align_model(model, input_folders, output_dir, tokenizer, max_length=128):
    """
    Predict sentence-pair changes and align with truth files.

    input_folders: dict of difficulty/language -> folder path, e.g.,
        {"easy": "Data/easy/validation", "DANISH: "Data/reddit_new_DANISH", ...}
    output_dir: root folder for JSON predictions
    model: trained model
    tokenizer: tokenizer
    """
    model.eval()
    model.to(device)
    os.makedirs(output_dir, exist_ok=True)

    for diff, folder in input_folders.items():
        diff_dir = os.path.join(output_dir, diff)
        os.makedirs(diff_dir, exist_ok=True)

        txt_files = sorted([f for f in os.listdir(folder) if f.endswith(".txt")])
        for fname in txt_files:
            problem_id = fname[:-4]  # e.g., 'problem-1'

            # Load sentences
            with open(os.path.join(folder, fname), "r", encoding="utf8") as f:
                sentences = [s.strip() for s in f.readlines() if s.strip()]

            # Load truth
            truth_path = os.path.join(folder, f"truth-{problem_id}.json")
            with open(truth_path, "r", encoding="utf8") as f:
                truth = json.load(f)
            num_changes = len(truth["changes"])

            # Build sentence pairs
            pair_texts = [(sentences[i], sentences[i+1]) for i in range(len(sentences)-1)]

            # Tokenize
            encodings = tokenizer(
                [p[0] for p in pair_texts],
                [p[1] for p in pair_texts],
                padding=True,
                truncation=True,
                max_length=max_length,
                return_tensors="pt"
            ).to(device)

            # Predict
            with torch.no_grad():
                outputs = model(**encodings)
                preds = torch.argmax(outputs.logits, dim=-1).cpu().tolist()

            # Align predictions to truth length
            if len(preds) > num_changes:
                preds = preds[:num_changes]
            elif len(preds) < num_changes:
                preds += [0] * (num_changes - len(preds))  # pad with 0

            # Build prediction JSON
            pred_json = {
                "authors": truth.get("authors", 0),
                "changes": preds
            }

            # Save
            out_file = os.path.join(diff_dir, f"solution-{problem_id}.json")
            with open(out_file, "w") as f:
                json.dump(pred_json, f)

    print(f"Predictions written to {output_dir}")


In [19]:
tokenizer = AutoTokenizer.from_pretrained("jhu-clsp/mmBERT-base", use_fast=True)
input_folders = {
    "easy": "Data/easy/validation",
    "medium": "Data/medium/validation",
    "hard": "Data/hard/validation"
}
base_model = AutoModelForSequenceClassification.from_pretrained("jhu-clsp/mmBERT-base") 

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at jhu-clsp/mmBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Prediction generation:

### Baseline Test:

In [None]:
model_name = "jhu-clsp/mmBERT-base"

#Load the model only; no tokenizer needed
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    trust_remote_code=True
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_base",
    tokenizer=tokenizer,
    max_length=128
)


### LoRA Test

In [None]:
base_model = AutoModelForSequenceClassification.from_pretrained("jhu-clsp/mmBERT-base") 
model = PeftModel.from_pretrained(base_model, "trained_adapters/mmbert_lora_mawsa_adapters")  
model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_lora",
    tokenizer=tokenizer,
    max_length=128
)


### LoRA 2LCLS Test

In [None]:
import torch.nn as nn
from transformers import AutoModelForSequenceClassification
from peft import PeftModel

base_model = AutoModelForSequenceClassification.from_pretrained(
    "jhu-clsp/mmBERT-base",
    num_labels=2
)

# Apply same 2 layer classifier as used for training
hidden = base_model.config.hidden_size
base_model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model = PeftModel.from_pretrained(
    base_model,
    "trained_adapters/mmbert_lora_mawsa_adapters_2layercls"
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_lora_2lcls",
    tokenizer=tokenizer,
    max_length=128
)

### QLoRA Test

In [None]:
base_model = AutoModelForSequenceClassification.from_pretrained("jhu-clsp/mmBERT-base") 
model = PeftModel.from_pretrained(base_model, "trained_adapters/mmbert_Qlora_mawsa_adapters")  
model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_Qlora",
    tokenizer=tokenizer,
    max_length=128
)


### QLoRA 2LCLS Test

In [None]:
import torch.nn as nn
from transformers import AutoModelForSequenceClassification
from peft import PeftModel

base_model = AutoModelForSequenceClassification.from_pretrained(
    "jhu-clsp/mmBERT-base",
    num_labels=2
)

# Apply same 2 layer classifier as used for training
hidden = base_model.config.hidden_size
base_model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model = PeftModel.from_pretrained(
    base_model,
    "trained_adapters/mmbert_Qlora_mawsa_adapters_2layercls"
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_Qlora_2layercls",
    tokenizer=tokenizer,
    max_length=128
)

### QLoRA + Language adapter

In [None]:
base_model = AutoModelForSequenceClassification.from_pretrained("jhu-clsp/mmBERT-base") 
model = PeftModel.from_pretrained(base_model, "trained_adapters/Qlora_lang_mawsa")  
model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_Qlora",
    tokenizer=tokenizer,
    max_length=128
)

## Multilingual predictions

New input folders (scraped language specific reddit data)

In [None]:
input_folders = {
    "DANISH": "Data/reddit_dataset_new_DANISH",
    "ENGLISH": "Data/reddit_dataset_new_ENGLISH",
    "ITALIAN": "Data/reddit_dataset_new_ITALIAN",
    "POLISH": "Data/reddit_dataset_new_POLISH"
}


### Baseline

In [None]:
model_name = "jhu-clsp/mmBERT-base"

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    trust_remote_code=True
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/multilingual_predictions_base",
    tokenizer=tokenizer,
    max_length=128
)

### LoRA

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
        "jhu-clsp/mmBERT-base",
        use_fast=True
    )
base_model = AutoModelForSequenceClassification.from_pretrained("jhu-clsp/mmBERT-base") 
model = PeftModel.from_pretrained(base_model, "trained_adapters/mmbert_lora_mawsa_adapters")  
model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/multilingual_predictions_lora",
    tokenizer=tokenizer,
    max_length=128
)


### LoRA 2LCLS

In [None]:
import torch.nn as nn
from transformers import AutoModelForSequenceClassification
from peft import PeftModel
tokenizer = AutoTokenizer.from_pretrained(
        "jhu-clsp/mmBERT-base",
        use_fast=True
    )

base_model = AutoModelForSequenceClassification.from_pretrained(
    "jhu-clsp/mmBERT-base",
    num_labels=2
)

# Apply same 2 layer classifier as used for training
hidden = base_model.config.hidden_size
base_model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model = PeftModel.from_pretrained(
    base_model,
    "trained_adapters/mmbert_lora_mawsa_adapters_2layercls"
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/multilingual_predictions_lora_2lcls",
    tokenizer=tokenizer,
    max_length=128
)

### QLoRA

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
        "jhu-clsp/mmBERT-base",
        use_fast=True
    )

base_model = AutoModelForSequenceClassification.from_pretrained("jhu-clsp/mmBERT-base") 
model = PeftModel.from_pretrained(base_model, "trained_adapters/mmbert_Qlora_mawsa_adapters")  
model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/multilingual_predictions_Qlora",
    tokenizer=tokenizer,
    max_length=128
)


### QLoRa 2LCLS

In [None]:
import torch.nn as nn
from transformers import AutoModelForSequenceClassification
from peft import PeftModel

tokenizer = AutoTokenizer.from_pretrained(
        "jhu-clsp/mmBERT-base",
        use_fast=True
    )

base_model = AutoModelForSequenceClassification.from_pretrained(
    "jhu-clsp/mmBERT-base",
    num_labels=2
)

# Apply same 2 layer classifier as used for training
hidden = base_model.config.hidden_size
base_model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model = PeftModel.from_pretrained(
    base_model,
    "trained_adapters/mmbert_Qlora_mawsa_adapters_2layercls"
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/mutlilingual_predictions_Qlora_2layercls",
    tokenizer=tokenizer,
    max_length=128
)

## Language Adapter setup LoRA

In [None]:
from transformers import (
    AutoTokenizer,
    AutoModelForMaskedLM,
    ModernBertForSequenceClassification
)
from peft import PeftModel
import torch

def load_model_for_language(lang_adapter_path, task_adapter_path, device):
    tokenizer = AutoTokenizer.from_pretrained(
        "jhu-clsp/mmBERT-base",
        use_fast=True
    )

    # Load MLM base
    mlm_model = AutoModelForMaskedLM.from_pretrained(
        "jhu-clsp/mmBERT-base"
    )

    # Load LANGUAGE adapter on MLM
    mlm_model = PeftModel.from_pretrained(
        mlm_model,
        lang_adapter_path,
        is_trainable=False
    )

    # Create classification model
    cls_model = ModernBertForSequenceClassification.from_pretrained(
        "jhu-clsp/mmBERT-base",
        num_labels=2
    )

    # Transplant encoder (this carries language LoRA implicitly)
    cls_model.modernbert = mlm_model.base_model.model

    # Load TASK adapter ONLY
    cls_model = PeftModel.from_pretrained(
        cls_model,
        task_adapter_path,
        is_trainable=False
    )

    cls_model.to(device)
    cls_model.eval()

    return cls_model, tokenizer


In [None]:
language_adapters = {
    "DANISH": "trained_adapters/language_adapters/language_lora_danish",
    "ENGLISH": "trained_adapters/language_adapters/language_lora_english",
    "ITALIAN": "trained_adapters/language_adapters/language_lora_italian",
    "POLISH": "trained_adapters/language_adapters/language_lora_polish"
}

task_adapter = "trained_adapters/mmbert_Qlora_lang_mawsa_adapters_peft"

for lang, lang_adapter_path in language_adapters.items():
    model, tokenizer = load_model_for_language(
        lang_adapter_path,
        task_adapter,
        device
    )

    predict_align_model(
        model=model,
        input_folders={lang: f"Data/reddit_dataset_new_{lang}"},
        output_dir=f"predictions/LoRA_language_adapter_predictions",
        tokenizer=tokenizer
    )


In [None]:
predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_lora_lang",
    tokenizer=tokenizer,
    max_length=128
)

### QLoRA_2LCLS + English Lang_adapter

In [None]:
# Load base model
model = AutoModelForSequenceClassification.from_pretrained(
    "jhu-clsp/mmBERT-base",
    num_labels=2,
    trust_remote_code=True
)

hidden = base_model.config.hidden_size
base_model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model = PeftModel.from_pretrained(
    base_model,
    "trained_adapters/mmbert_Qlora_lang_mawsa_adapters_2layercls"
)

model.to(device)
model.eval()

predict_align_model(
    model=model,
    input_folders=input_folders,
    output_dir="predictions/predictions_Qlora_lang",
    tokenizer=tokenizer,
    max_length=128
)


## Parallel Bottleneck Adapter Evaluation

### Adapter loading and setup

In [32]:
tokenizer = AutoTokenizer.from_pretrained("jhu-clsp/mmBERT-base", use_fast=True)
from transformers import AutoModelForSequenceClassification
import adapters
from adapters import AdapterModelInterface, SeqBnInvConfig
import torch.nn as nn
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load base model
from adapters import AutoAdapterModel

model = AutoModelForSequenceClassification.from_pretrained(
    "jhu-clsp/mmBERT-base",
    num_labels=2,
    trust_remote_code=True,
)


plugin_interface = AdapterModelInterface(
    adapter_methods=["bottleneck", "invertible"],
    model_embeddings="embeddings",
    model_layers="layers",
    layer_self_attn="attn",
    layer_cross_attn=None,
    attn_qkv_proj="Wqkv",
    attn_o_proj="Wo",
    layer_intermediate_proj="mlp.Wi",
    layer_output_proj="mlp.Wo",
    layer_pre_self_attn="attn",
    layer_pre_cross_attn=None,
    layer_pre_ffn="mlp",
    layer_ln_1="mlp_norm",
    layer_ln_2=None,
)

adapters.init(model, interface=plugin_interface)

# loading all trained adapters for use in each evaluation

lang_adapter_english = model.load_adapter(
    "trained_adapters/language_adapters/English_adapter_loss_20.47",
    load_as="english_adapter",
    set_active=False
)

lang_adapter_danish = model.load_adapter(
    "trained_adapters/language_adapters/Danish_adapter_loss_14.64",
    load_as="danish_adapter",
    set_active=False
)

lang_adapter_italian = model.load_adapter(
    "trained_adapters/language_adapters/Italian_adapter_loss_14.97",
    load_as="italian_adapter",
    set_active=False
)

lang_adapter_polish = model.load_adapter(
    "trained_adapters/language_adapters/Polish_adapter_loss_12.18",
    load_as="polish_adapter",
    set_active=False
)

task_adapter= model.load_adapter(
    "trained_adapters/mmbert_parallelseqbn",
    load_as="task",
    set_active=False
)

task_adapter_2lcls = model.load_adapter(
    "trained_adapters/mmbert_parallelseqbn_2lcls",
    load_as="task_2lcls",
    set_active=False
)

task_adapter_lang= model.load_adapter(
    "trained_adapters/mmbert_parallelseqbn_lang",
    load_as="task_lang",
    set_active=False
)

task_adapter_lang_2lcls= model.load_adapter(
    "trained_adapters/mmbert_parallelseqbn_lang_2lcls",
    load_as="task_lang_2lcls",
    set_active=False
)


print(model.adapter_summary())


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at jhu-clsp/mmBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Model class 'ModernBertForMaskedLM' of found prediction head does not match current model class.
Model class 'ModernBertForMaskedLM' of found prediction head does not match current model class.
Model class 'ModernBertForMaskedLM' of found prediction head does not match current model class.
Model class 'ModernBertForMaskedLM' of found prediction head does not match current model class.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
english_adapter          bottleneck        1,936,032       0.631       0       1
danish_adapter           bottleneck        1,936,032       0.631       0       1
italian_adapter          bottleneck        1,936,032       0.631       0       1
polish_adapter           bottleneck        1,936,032       0.631       0       1
task                     bottleneck        1,639,968       0.534       0       1
task_2lcls               bottleneck        1,639,968       0.534       0       1
task_lang                bottleneck        1,639,968       0.534       0       1
task_lang_2lcls          bottleneck        1,639,968       0.534       0       1
--------------------------------------------------------------------------------
Full model                               306,939,648     100.000               1


In [33]:
input_pan = {
    "easy": "Data/easy/validation",
    "medium": "Data/medium/validation",
    "hard": "Data/hard/validation"
}

In [34]:
input_multilingual = {
    "DANISH": "Data/reddit_dataset_new_DANISH",
    "ENGLISH": "Data/reddit_dataset_new_ENGLISH",
    "ITALIAN": "Data/reddit_dataset_new_ITALIAN",
    "POLISH": "Data/reddit_dataset_new_POLISH"
}


## Single layer head predictions

### ParallelSeqBN Task test

In [47]:
model.set_active_adapters(None)
model.set_active_adapters("task")
model.eval()

predict_align_model(
    model=model,
    input_folders=input_pan,
    output_dir="predictions/predictions_parallelseqbn",
    tokenizer=tokenizer,
    max_length=128
)

There are adapters available but none are activated for the forward pass.


Predictions written to predictions/predictions_parallelseqbn


### ParallelSeqBn + English adapter

In [36]:
model.set_active_adapters(None)
model.set_active_adapters(["task_lang", "english_adapter"])
print(model.adapter_summary())
model.eval()

predict_align_model(
    model=model,
    input_folders=input_pan,
    output_dir="predictions/predictions_parallelseqbn_lang",
    tokenizer=tokenizer,
    max_length=128
)

There are adapters available but none are activated for the forward pass.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
english_adapter          bottleneck        1,936,032       0.631       1       1
danish_adapter           bottleneck        1,936,032       0.631       0       1
italian_adapter          bottleneck        1,936,032       0.631       0       1
polish_adapter           bottleneck        1,936,032       0.631       0       1
task                     bottleneck        1,639,968       0.534       0       1
task_2lcls               bottleneck        1,639,968       0.534       0       1
task_lang                bottleneck        1,639,968       0.534       1       1
task_lang_2lcls          bottleneck        1,639,968       0.534       0       1
--------------------------------------------------------------------------------
Full model                               306,939,648     100.000               1
Predictions written to predi

### Multilingual ParallelSeqBN

In [46]:
model.set_active_adapters(None)
model.set_active_adapters("task")
model.eval()

predict_align_model(
    model=model,
    input_folders=input_multilingual,
    output_dir="predictions/mutlilingual_predictions_parallelseqbn",
    tokenizer=tokenizer,
    max_length=128
)

There are adapters available but none are activated for the forward pass.


Predictions written to predictions/mutlilingual_predictions_parallelseqbn


### Multilingual ParallelSeqBN + language adapters

In [None]:
language_adapters = {
    "DANISH": "danish_adapter",
    "ENGLISH": "english_adapter",
    "ITALIAN": "italian_adapter",
    "POLISH": "polish_adapter"
}

for lang, adapter_name in language_adapters.items():
    model.set_active_adapters(None)
    model.set_active_adapters([adapter_name, "task_lang"])

    model.eval()

    predict_align_model(
        model=model,
        input_folders={lang: f"Data/reddit_dataset_new_{lang}"},
        output_dir=f"predictions/parallelseqbn_lang_adapter_predictions",
        tokenizer=tokenizer
    )


There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.


Predictions written to predictions/parallelseqbn_lang_adapter_predictions/DANISH


There are adapters available but none are activated for the forward pass.


Predictions written to predictions/parallelseqbn_lang_adapter_predictions/ENGLISH


There are adapters available but none are activated for the forward pass.


Predictions written to predictions/parallelseqbn_lang_adapter_predictions/ITALIAN
Predictions written to predictions/parallelseqbn_lang_adapter_predictions/POLISH


## ParallelSeqBN 2 layer head predictions 

### ParallelSeqBn 2lcls

In [39]:
model.set_active_adapters(None)
model.set_active_adapters("task_2lcls")

hidden = model.config.hidden_size
model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model.eval()

predict_align_model(
    model=model,
    input_folders=input_pan,
    output_dir="predictions/predictions_parallelseqbn_2lcls",
    tokenizer=tokenizer,
    max_length=128
)

There are adapters available but none are activated for the forward pass.


Predictions written to predictions/predictions_parallelseqbn_2lcls


### ParallelSeqBn 2lcls + English adapter

In [40]:
model.set_active_adapters(None)
model.set_active_adapters(["task_lang_2lcls", "english_adapter"])
print(model.adapter_summary())
hidden = model.config.hidden_size
model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model.eval()

predict_align_model(
    model=model,
    input_folders=input_pan,
    output_dir="predictions/predictions_parallelseqbn_lang_2lcls",
    tokenizer=tokenizer,
    max_length=128
)

There are adapters available but none are activated for the forward pass.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
english_adapter          bottleneck        1,936,032       0.631       1       1
danish_adapter           bottleneck        1,936,032       0.631       0       1
italian_adapter          bottleneck        1,936,032       0.631       0       1
polish_adapter           bottleneck        1,936,032       0.631       0       1
task                     bottleneck        1,639,968       0.534       0       1
task_2lcls               bottleneck        1,639,968       0.534       0       1
task_lang                bottleneck        1,639,968       0.534       0       1
task_lang_2lcls          bottleneck        1,639,968       0.534       1       1
--------------------------------------------------------------------------------
Full model                               306,939,648     100.000               1
Predictions written to predi

### ParallelSeqBn 2lcls multilingual


In [41]:
model.set_active_adapters(None)
model.set_active_adapters(["task_lang_2lcls", "english_adapter"])
print(model.adapter_summary())
hidden = model.config.hidden_size
model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)

model.eval()

predict_align_model(
    model=model,
    input_folders=input_multilingual,
    output_dir="predictions/multilingual_predictions_parallelseqbn_lang_2lcls",
    tokenizer=tokenizer,
    max_length=128
)

There are adapters available but none are activated for the forward pass.


Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
english_adapter          bottleneck        1,936,032       0.631       1       1
danish_adapter           bottleneck        1,936,032       0.631       0       1
italian_adapter          bottleneck        1,936,032       0.631       0       1
polish_adapter           bottleneck        1,936,032       0.631       0       1
task                     bottleneck        1,639,968       0.534       0       1
task_2lcls               bottleneck        1,639,968       0.534       0       1
task_lang                bottleneck        1,639,968       0.534       0       1
task_lang_2lcls          bottleneck        1,639,968       0.534       1       1
--------------------------------------------------------------------------------
Full model                               306,939,648     100.000               1
Predictions written to predi

### ParallelSeqBn 2lcls multilingual + language adapters

In [None]:
language_adapters = {
    "DANISH": "danish_adapter",
    "ENGLISH": "english_adapter",
    "ITALIAN": "italian_adapter",
    "POLISH": "polish_adapter"
}

hidden = model.config.hidden_size
model.classifier = nn.Sequential(
    nn.Linear(hidden, hidden),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(hidden, 2)
)


for lang, adapter_name in language_adapters.items():
    model.set_active_adapters(None)
    model.set_active_adapters([adapter_name, "task_lang_2lcls"])

    model.eval()

    predict_align_model(
        model=model,
        input_folders={lang: f"Data/reddit_dataset_new_{lang}"},
        output_dir=f"predictions/parallelseqbn_2lcls_lang_adapter_predictions",
        tokenizer=tokenizer
    )

There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.


Predictions written to predictions/parallelseqbn_2lcls_lang_adapter_predictions/DANISH


There are adapters available but none are activated for the forward pass.


Predictions written to predictions/parallelseqbn_2lcls_lang_adapter_predictions/ENGLISH


There are adapters available but none are activated for the forward pass.


Predictions written to predictions/parallelseqbn_2lcls_lang_adapter_predictions/ITALIAN
Predictions written to predictions/parallelseqbn_2lcls_lang_adapter_predictions/POLISH
