In [1]:
from common import *
import os
import json
import string
from dataclasses import dataclass
from enum import Enum
from datetime import datetime
import wandb
from wandb import AlertLevel
import numpy as np
import pandas as pd
import evaluate
import datasets
from datasets import Dataset
from transformers import (
    BartTokenizerFast,
    BartForConditionalGeneration,
    LEDForConditionalGeneration,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
)
from lsg_converter import (
    LSGBartForConditionalGeneration,
    LSGConverter,
)

class ModelSize(Enum):
    BASE = "base"
    LARGE = "large"

class ModelType(Enum):
    BART = "BART"  # BartForConditionalGeneration
    LED = "LED"    # LEDForConditionalGeneration
    LSG = "LSG"    # LSGBartForConditionalGeneration

class DomainAdaptation(Enum):
    NO = "no"
    SHORT = "short"  # on BART
    LONG = "long"  # on LED/LSG

class FinetuningTrainSetType(Enum):
    NONE = "none"
    STANDARD = "standard"  # "e-caste/mitocw_yale-lectures_summarization"
    EXTENDED = "extended"  # "e-caste/mitocw_yale-lectures_summarization_extended"

class ConsiderAsTestSet(Enum):
    ALL = "dev+test+train"
    TEST_ONLY = "test-only"

class TestSetType(Enum):
    DEFAULT = "default"
    COURSES = "courses"
    CATEGORIES = "categories"
    MACROCATEGORIES = "macrocategories"
    WRITING_PERCENTILES = "writingpercentiles"


@dataclass
class TestModel:
    model_size: ModelSize
    model_type: ModelType
    domain_adaptation: DomainAdaptation
    finetuning_train_set: FinetuningTrainSetType

@dataclass
class TestRunConfig:
    test_model: TestModel
    consider_as_test_set: ConsiderAsTestSet
    test_set: TestSetType


current_notebook_name = "BART_LBARTO_models_test"
max_input_length = 16384
max_output_length = 384

### Define test run configuration

In [2]:
test_model = TestModel(
    model_size=ModelSize.BASE,
    model_type=ModelType.LSG,
    domain_adaptation=DomainAdaptation.LONG,
    finetuning_train_set=FinetuningTrainSetType.STANDARD,
)

test_run_config = TestRunConfig(
    test_model=test_model,
    test_set=TestSetType.DEFAULT,
    consider_as_test_set=ConsiderAsTestSet.TEST_ONLY,
)

run_name = f"{test_model.model_type.value}-{test_model.model_size.value}_DA={test_model.domain_adaptation.value}_trainDS={test_model.finetuning_train_set.value}_considerTestDS={test_run_config.consider_as_test_set.value}_testDS={test_run_config.test_set.value}_{str(datetime.now()).split('.')[0].replace(' ', '_')}"
run_name

'LSG-base_DA=long_trainDS=standard_considerTestDS=test-only_testDS=default_2023-02-19_18:27:24'

In [3]:
def get_model_and_tokenizer_from_wandb(wandb_run, model_id: str, model_class):
    artifact = wandb_run.use_artifact(model_id, type="model")
    artifact_dir = artifact.download()
    tokenizer = BartTokenizerFast.from_pretrained(artifact_dir)
    model = model_class.from_pretrained(artifact_dir)
    return model, tokenizer, artifact_dir

In [4]:
if test_model.model_type == ModelType.BART:
    max_input_length = 1024

In [5]:
def get_bart_converted_to_lsg(model_name_or_path):
    converter = LSGConverter(
        max_sequence_length=max_input_length,
        random_global_init=False,
        global_positional_stride=64,
        keep_first_global_token=False,
        resize_lsg=False,
        use_token_ids=True,
        use_auth_token=False,
        seed=42,
    )
    model, tokenizer = converter.convert_from_pretrained(
        model_name_or_path=model_name_or_path,
        architecture="BartForConditionalGeneration",
        use_auth_token=False,
        # next params are from: https://github.com/ccdv-ai/convert_checkpoint_to_lsg/blob/main/lsg_converter/bart/modeling_lsg_bart.py
        adaptive=True,
        base_model_prefix="lsg",
        block_size=128,
        lsh_num_pre_rounds=1,
        mask_first_token=False,
        num_global_tokens=512,  # this is the only non-default param, to "improve the flow of information inside the model"
        pass_global_tokens_to_decoder=True,
        pool_with_global=True,
        sparse_block_size=128,
        sparsity_factor=2,
        sparsity_type="norm",
    )
    return model, tokenizer

Choose the appropriate trained model from Huggingface or Weights and Biases based on the `TestModel` configuration.

In [6]:
run = wandb.init(
    entity="e-caste",
    project=current_notebook_name,
    name=run_name,
)

# facebook/bart-base or facebook/bart-large
if test_model.model_type == ModelType.BART and test_model.domain_adaptation == DomainAdaptation.NO and test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    model_id = f"facebook/bart-{'large' if test_model.model_size == ModelSize.LARGE else 'base'}"
    tokenizer = BartTokenizerFast.from_pretrained(model_id)
    model = BartForConditionalGeneration.from_pretrained(model_id)

# BART-base with domain adaptation
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.BART and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    model_id = "e-caste/BART_DLM_domain_adaptation/model-2evbbo65:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, BartForConditionalGeneration)

# BART-large with domain adaptation
elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.BART and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    model_id = "e-caste/BART_DLM_domain_adaptation/model-30s865ap:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, BartForConditionalGeneration)

# LSG-base or LSG-large without domain adaptation not finetuned
elif test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.NO and test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    model_id = f"facebook/bart-{'large' if test_model.model_size == ModelSize.LARGE else 'base'}"
    model, tokenizer = get_bart_converted_to_lsg(model_id)

# LSG-base with domain adaptation not finetuned
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    model_id = "e-caste/BART_DLM_domain_adaptation/model-2evbbo65:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, BartForConditionalGeneration)
    model, tokenizer = get_bart_converted_to_lsg(artifact_dir)

# LSG-large with domain adaptation not finetuned
elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    model_id = "e-caste/BART_DLM_domain_adaptation/model-30s865ap:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, BartForConditionalGeneration)
    model, tokenizer = get_bart_converted_to_lsg(artifact_dir)

# BART-base with domain adaptation finetuned
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.BART and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-20nyyrmg:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, BartForConditionalGeneration)

# BART-large with domain adaptation finetuned
elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.BART and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-2lsncict:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, BartForConditionalGeneration)

# LSG-base without domain adaptation finetuned on standard
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.NO and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-2uink6pj:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.NO and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-125uzjof:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

# LED-base with domain adaptation finetuned
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.LED and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-1sqtisc9:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LEDForConditionalGeneration)

# LED-large with domain adaptation finetuned
elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.LED and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-3sj8ow53:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LEDForConditionalGeneration)

# LSG-base with domain adaptation finetuned
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-2rsq9s8l:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

# LSG-large with domain adaptation finetuned
elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-2zpjuhkv:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

# LSG-base with domain adaptation finetuned extended
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.EXTENDED:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-2y5per63:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

# LSG-large with domain adaptation finetuned extended
elif test_model.model_size == ModelSize.LARGE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.SHORT and test_model.finetuning_train_set == FinetuningTrainSetType.EXTENDED:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-17jvyuxj:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

# LSG-base with long domain adaptation finetuned
elif test_model.model_size == ModelSize.BASE and test_model.model_type == ModelType.LSG and test_model.domain_adaptation == DomainAdaptation.LONG and test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD:
    model_id = "e-caste/LBARTO_summarization_finetuning/model-39i2bqy5:v0"
    model, tokenizer, artifact_dir = get_model_and_tokenizer_from_wandb(run, model_id, LSGBartForConditionalGeneration)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33me-caste[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact model-39i2bqy5:v0, 626.88MB. 9 files... 
[34m[1mwandb[0m:   9 of 9 files downloaded.  
Done. 0:0:0.4


Preprocess test dataset based on the `TestRunConfig` configuration.

In [7]:
if test_model.finetuning_train_set == FinetuningTrainSetType.STANDARD or test_model.finetuning_train_set == FinetuningTrainSetType.NONE:
    text_column = "transcript_base"
elif test_model.finetuning_train_set == FinetuningTrainSetType.EXTENDED:
    text_column = "transcript_special"

target_column = "description"

print(text_column, target_column)

transcript_base description


In [8]:
def process_data_to_model_inputs(batch):
    # tokenize the inputs and labels
    inputs = tokenizer(
        batch[text_column],
        padding="max_length",
        truncation=True,
        max_length=max_input_length,
    )
    outputs = tokenizer(
        batch[target_column],
        padding="max_length",
        truncation=True,
        max_length=max_output_length,
    )

    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask

    if test_model.model_type == ModelType.LED:
        # create 0 global_attention_mask lists
        batch["global_attention_mask"] = len(batch["input_ids"]) * [
            [0 for _ in range(len(batch["input_ids"][0]))]
        ]
        # since above lists are references, the following line changes the 0 index for all samples
        batch["global_attention_mask"][0][0] = 1

    batch["labels"] = outputs.input_ids

    # We have to make sure that the PAD token is ignored
    batch["labels"] = [
        [-100 if token == tokenizer.pad_token_id else token for token in labels]
        for labels in batch["labels"]
    ]

    return batch

In [9]:
def tokenize_datasets(raw_datasets, test_model):
    tokenized_datasets = raw_datasets.map(
        process_data_to_model_inputs,
        batched=True,
        remove_columns=raw_datasets['train'].column_names if "train" in raw_datasets else raw_datasets.column_names,
    )

    # for dataset in tokenized_datasets:
    #     tokenized_datasets[dataset].set_format(
    #         type="torch",
    #         columns=["input_ids", "attention_mask", "labels"] + (["global_attention_mask"] if test_model.model_type == ModelType.LED else []),
    #     )

    return tokenized_datasets

In [10]:
# this is a testing-specific dataset containing both standard and extended
# transcripts and all metadata
dataset_id = "e-caste/mitocw_yale-lectures_summarization_testing"
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

raw_datasets = datasets.load_dataset(dataset_id, use_auth_token=True)
raw_datasets

Using custom data configuration e-caste--mitocw_yale-lectures_summarization_testing-40cfe2476154b421
Found cached dataset csv (/home/caste/.cache/huggingface/datasets/e-caste___csv/e-caste--mitocw_yale-lectures_summarization_testing-40cfe2476154b421/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'from_subset', 'course_title', 'course_url', 'course_macrocategory', 'course_category', 'course_fraction_writing', 'lecture_title', 'lecture_fraction_writing', 'transcript_base', 'transcript_base_length_tokens', 'transcript_special', 'transcript_special_length_tokens', 'description', 'description_length_tokens', 'lectures_writing_frequency', 'courses_writing_frequency'],
        num_rows: 1424
    })
    test: Dataset({
        features: ['Unnamed: 0', 'from_subset', 'course_title', 'course_url', 'course_macrocategory', 'course_category', 'course_fraction_writing', 'lecture_title', 'lecture_fraction_writing', 'transcript_base', 'transcript_base_length_tokens', 'transcript_special', 'transcript_special_length_tokens', 'description', 'description_length_tokens', 'lectures_writing_frequency', 'courses_writing_frequency'],
        num_rows: 159
    })
})

In [11]:
if test_run_config.test_set == TestSetType.DEFAULT:
    test_sets = [{
        'id': "default",
        'raw_dataset': raw_datasets['test'],
        'tokenized_dataset': tokenize_datasets(raw_datasets, test_model)['test'],
    }]

else:
    if test_run_config.consider_as_test_set == ConsiderAsTestSet.ALL:
        df = pd.concat([raw_datasets['train'].to_pandas(), raw_datasets['test'].to_pandas()])
    else:
        df = raw_datasets['test'].to_pandas()

    if test_run_config.test_set == TestSetType.COURSES:
        test_sets = [
            {
                'id': course_title,
                'raw_dataset': Dataset.from_pandas(course),
                'tokenized_dataset': tokenize_datasets(Dataset.from_pandas(course), test_model),
            } for course_title, course in df.groupby("course_title")
        ]

    elif test_run_config.test_set == TestSetType.CATEGORIES:
        test_sets = [
            {
                'id': course_category,
                'raw_dataset': Dataset.from_pandas(courses),
                'tokenized_dataset': tokenize_datasets(Dataset.from_pandas(courses), test_model),
            } for course_category, courses in df.groupby("course_category")
        ]

    elif test_run_config.test_set == TestSetType.MACROCATEGORIES:
        test_sets = [
            {
                'id': course_macrocategory,
                'raw_dataset': Dataset.from_pandas(courses),
                'tokenized_dataset': tokenize_datasets(Dataset.from_pandas(courses), test_model),
            } for course_macrocategory, courses in df.groupby("course_macrocategory")
        ]

    elif test_run_config.test_set == TestSetType.WRITING_PERCENTILES:
        test_sets = [
            {
                'id': courses_writing_frequency,
                'raw_dataset': Dataset.from_pandas(courses),
                'tokenized_dataset': tokenize_datasets(Dataset.from_pandas(courses), test_model),
            } for courses_writing_frequency, courses in df.groupby("courses_writing_frequency")
        ]

test_sets



  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

[{'id': 'default',
  'raw_dataset': Dataset({
      features: ['Unnamed: 0', 'from_subset', 'course_title', 'course_url', 'course_macrocategory', 'course_category', 'course_fraction_writing', 'lecture_title', 'lecture_fraction_writing', 'transcript_base', 'transcript_base_length_tokens', 'transcript_special', 'transcript_special_length_tokens', 'description', 'description_length_tokens', 'lectures_writing_frequency', 'courses_writing_frequency'],
      num_rows: 159
  }),
  'tokenized_dataset': Dataset({
      features: ['input_ids', 'attention_mask', 'labels'],
      num_rows: 159
  })}]

In [12]:
rouge = evaluate.load("rouge")
# https://huggingface.co/spaces/evaluate-metric/bertscore
bertscore = evaluate.load("bertscore")

In [13]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels[labels == -100] = tokenizer.pad_token_id
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    rouge_result = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        use_stemmer=False,  # no Porter stemmer
    )

    bertscore_result = bertscore.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        lang="en",
        # see here for best models: https://docs.google.com/spreadsheets/d/1RKOVpselB98Nnh_EOC4A2BYn8_201tmPODpNWu4w7xI
        # TODO: should we use the default roberta-large or even facebook/bart-base so the tokenizer vocabulary is the same? no
        model_type="microsoft/deberta-xlarge-mnli",  # NOTE: this takes ~5GB of memory at the end of the first evaluation
        idf=False,
        rescale_with_baseline=False,
        use_fast_tokenizer=True,
        batch_size=1,
        nthreads=1,
        verbose=False,
        device="cuda",
    )

    result = {}
    for k, v in rouge_result.items():
        result[k] = v
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result['gen_len'] = np.mean(prediction_lens)
    for k, v in bertscore_result.items():
        if k != "hashcode":
            result[f'bertscore_{k}'] = np.mean(v)

    return result

In [14]:
def compute_test_predictions(
    run_name: str,
    trainer: Seq2SeqTrainer,
    test_set_id: str,
    tokenized_test_set: Dataset,
    raw_test_set: Dataset,
    print_metrics: bool = True,
    save_predictions_locally: bool = True,
    upload_saved_predictions_to_wandb: bool = True,
):
    results = trainer.predict(tokenized_test_set)  # .select(range(8))
    metrics = results.metrics
    metrics["test_samples"] = len(tokenized_test_set)
    if print_metrics:
        trainer.log_metrics("test", metrics)
    trainer.save_metrics("test", metrics)

    if save_predictions_locally:
        predictions = [
            {'predicted': predicted, 'original': original[target_column]}
            for predicted, original in zip(
               tokenizer.batch_decode(results.predictions, skip_special_tokens=True),
               raw_test_set,
            )
        ]

        os.makedirs(current_notebook_name, exist_ok=True)
        predictions_file_name = f"{current_notebook_name}/predictions_{run_name}_{test_set_id}.json"
        with open(predictions_file_name, "w") as f:
            json.dump(predictions, f)

        if upload_saved_predictions_to_wandb:
            wandb.save(predictions_file_name, base_path=current_notebook_name, policy="now")

    return metrics

In [15]:
# model config
model.config.use_cache = False

# set generate hyperparameters
# https://huggingface.co/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig
model.config.max_length = max_output_length  # Maximum length that will be used by default in the generate method of the model.
model.config.min_length = 32  # 32 from Yale+MITOCW stats. Minimum length that will be used by default in the generate method of the model.
model.config.length_penalty = 2.0  # Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log likelihood of the sequence (i.e. negative), length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences.
model.config.no_repeat_ngram_size = 3  # Value that will be used by default in the — generate method of the model for no_repeat_ngram_size. If set to int > 0, all ngrams of that size can only occur once.
# make evaluation faster
model.config.num_beams = 5  # Number of beams for beam search that will be used by default in the generate method of the model. 1 means no beam search.
model.config.early_stopping = True  # Flag that will be used by default in the generate method of the model. Whether to stop the beam search when at least num_beams sentences are finished per batch or not.

model.config

LSGBartConfig {
  "_name_or_path": "./artifacts/model-39i2bqy5:v0",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "adaptive": true,
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "LSGBartForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "auto_map": {
    "AutoModel": "modeling_lsg_bart.LSGBartModel",
    "AutoModelForCausalLM": "modeling_lsg_bart.LSGBartForCausalLM",
    "AutoModelForQuestionAnswering": "modeling_lsg_bart.LSGBartForQuestionAnswering",
    "AutoModelForSeq2SeqLM": "modeling_lsg_bart.LSGBartForConditionalGeneration",
    "AutoModelForSequenceClassification": "modeling_lsg_bart.LSGBartForSequenceClassification"
  },
  "base_model_prefix": "lsg",
  "block_size": 128,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "drop

In [16]:
training_args = Seq2SeqTrainingArguments(
    output_dir=f"/tmp/{run.name}",
    predict_with_generate=True,
    generation_max_length=max_output_length,
    per_device_eval_batch_size=1,
)
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
)

In [17]:
#  Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
def print_markdown_metrics(metrics, test_set_id):
    if test_run_config.test_set == TestSetType.CATEGORIES:
        test_set_id = f"{test_set_id} ({cat2macro[test_set_id]})"
    elif test_run_config.test_set == TestSetType.COURSES:
        test_set_id = f"{test_set_id} ({macrocategories[test_set_id]}->{categories[test_set_id]})"  # TODO: subset?
    print(f"| {test_model.model_size.value} "
          f"| {test_model.model_type.value} "
          f"| {test_model.domain_adaptation.value} "
          f"| {test_model.finetuning_train_set.value} "
          f"| {test_set_id} "
          f"| {metrics['test_loss']:.4f} "
          f"| {metrics['test_rouge1']:.4f} "
          f"| {metrics['test_rouge2']:.4f} "
          f"| {metrics['test_rougeL']:.4f} "
          f"| {metrics['test_rougeLsum']:.4f} "
          f"| {metrics['test_gen_len']:.4f} "
          f"| {metrics['test_bertscore_f1']:.4f} "
          f"| {metrics['test_bertscore_precision']:.4f} "
          f"| {metrics['test_bertscore_recall']:.4f} "
          f"|")

In [18]:
def sanitize_test_set_id(name: str) -> str:
    result = ""
    for character in name:
        if character in string.ascii_letters or character in string.digits:
            result += character
        elif character == "&":
            result += "and"
        elif character in "()[]{}":
            result += "+"
        else:
            result += "-"
    return result

In [19]:
try:
    metrics_list = []
    for test_set in test_sets:
        print(f"Testing {run_name} on test set: {test_set['id']}")
        metrics_list.append(
            compute_test_predictions(
                run_name=run_name,
                trainer=trainer,
                test_set_id=sanitize_test_set_id(test_set['id']),
                tokenized_test_set=test_set['tokenized_dataset'],
                raw_test_set=test_set['raw_dataset'],
                save_predictions_locally=True,
                upload_saved_predictions_to_wandb=True,
            )
        )
        print_markdown_metrics(metrics_list[-1], test_set['id'])
        print("=" * 30)

    # print the MarkDown table automatically
    for metrics, test_set in zip(metrics_list, test_sets):
        print_markdown_metrics(metrics, test_set['id'])

    wandb.alert(
        title=f"Test finished",
        text=f"Test name: {run_name}.\n\nTest sets: {', '.join(s['id'] for s in test_sets)}\n\nSee details at https://wandb.ai/e-caste/{current_notebook_name}",
        level=AlertLevel.INFO,
    )

except Exception as e:
    wandb.alert(
        title=f"Error running test",
        text=f"Test name: {run_name}.\n\nException: {e}\n\nTest sets: {', '.join(s['id'] for s in test_sets)}\n\nSee details at https://wandb.ai/e-caste/{current_notebook_name}",
        level=AlertLevel.ERROR,
    )
    raise e

wandb.finish()

***** Running Prediction *****
  Num examples = 159
  Batch size = 1


Testing LSG-base_DA=long_trainDS=standard_considerTestDS=test-only_testDS=default_2023-02-19_18:27:24 on test set: default


loading configuration file config.json from cache at /home/caste/.cache/huggingface/hub/models--microsoft--deberta-xlarge-mnli/snapshots/5b07a9086c1dbb79981ff7b05b4d1ad83b3af51c/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-xlarge-mnli",
  "architectures": [
    "DebertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "CONTRADICTION",
    "1": "NEUTRAL",
    "2": "ENTAILMENT"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "CONTRADICTION": 0,
    "ENTAILMENT": 2,
    "NEUTRAL": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 48,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pooling": {
    "dropout": 0,
    

***** test metrics *****
  test_bertscore_f1        =     0.6284
  test_bertscore_precision =     0.6383
  test_bertscore_recall    =     0.6212
  test_gen_len             =     77.044
  test_loss                =     2.7872
  test_rouge1              =      0.338
  test_rouge2              =     0.1082
  test_rougeL              =     0.2203
  test_rougeLsum           =     0.2201
  test_runtime             = 0:24:08.91
  test_samples             =        159
  test_samples_per_second  =       0.11
  test_steps_per_second    =       0.11
| base | LSG | long | standard | default | 2.7872 | 0.3380 | 0.1082 | 0.2203 | 0.2201 | 77.0440 | 0.6284 | 0.6383 | 0.6212 |
| base | LSG | long | standard | default | 2.7872 | 0.3380 | 0.1082 | 0.2203 | 0.2201 | 77.0440 | 0.6284 | 0.6383 | 0.6212 |


VBox(children=(Label(value='0.141 MB of 0.141 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

### Results

For all tests: `num_beams=5` and `length_penalty=2.0`.  
Domain adaptation has been run with `lr=1e-4` for both BART-base and BART-large.  
Base models have been finetuned with `lr=7.5e-5`, large models with `lr=5e-5`.

#### Table 1: effect of domain adaptation
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | BART | no | none | default | 4.2768 | 0.1703 | 0.0326 | 0.1031 | 0.1032 | 384.0 | 0.4967 | 0.4445 | 0.5673 |
| base | BART | yes | none | default | 5.9857 | 0.1734 | 0.0307 | 0.099 | 0.0993 | 384.0 | 0.5094 | 0.4604 | 0.5731 |
| large | BART | no | none | default | 3.9704 | 0.1761 | 0.0344 | 0.104 | 0.1038 | 384.0 | 0.5192 | 0.4691 | 0.5848 |
| large | BART | yes | none | default | 4.3846 | 0.1727 | 0.0311 | 0.1011 | 0.1011 | 384.0 | 0.507 | 0.4581 | 0.5714 |
| base | LSG | no | none | default | 4.5163 | 0.1472 | 0.024 | 0.0943 | 0.0942 | 384.0 | 0.4458 | 0.399 | 0.5107 |
| base | LSG | yes | none | default | 5.072 | 0.1739 | 0.0275 | 0.0979 | 0.098 | 384.0 | 0.5122 | 0.4628 | 0.5771 |
| base | LSG | no | standard | default | 2.8357 | 0.3512 | 0.1113 | 0.2284 | 0.2279 | 85.4465 | 0.6303 | 0.6365 | 0.6266 |
| base | LSG | yes | standard | default | 2.8565 | 0.3351 | 0.1058 | 0.2185 | 0.2184 | 77.3836 | 0.6265 | 0.6376 | 0.6182 |
| base | LSG | long | standard | default | 2.7872 | 0.3380 | 0.1082 | 0.2203 | 0.2201 | 77.0440 | 0.6284 | 0.6383 | 0.6212 |
| large | LSG | no | none | default | 3.9274 | 0.1634 | 0.0282 | 0.0991 | 0.0989 | 384.0 | 0.4997 | 0.4543 | 0.5588 |
| large | LSG | yes | none | default | 4.4562 | 0.1699 | 0.0293 | 0.0996 | 0.0998 | 384.0 | 0.5123 | 0.4654 | 0.573 |
| large | LSG | no | standard | default | 2.6532 | 0.3483 | 0.1148 | 0.2312 | 0.2310 | 80.4151 | 0.6422 | 0.6525 | 0.6350 |
| large | LSG | yes | standard | default | 2.6596 | 0.3455 | 0.1161 | 0.2261 | 0.2258 | 79.4277 | 0.6384 | 0.6496 | 0.6306 |

Finetuned models with domain adaptation perform worse than those without it. Hypotheses:
1. pad token inserted in DLM training
2. DLM done with fp32, finetuning done with fp16
3. the model class for pretraining should not be ...ForConditionalGeneration?

#### Table 2: effect of long attention
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | BART | yes | standard | default | 3.2257 | 0.3161 | 0.0928 | 0.213 | 0.2128 | 77.8868 | 0.6092 | 0.6181 | 0.6028 |
| large | BART | yes | standard | default | 3.0427 | 0.3215 | 0.0993 | 0.2159 | 0.2162 | 96.8365 | 0.6192 | 0.622 | 0.6191 |
| base | LED | yes | standard | default | 2.9161 | 0.3489 | 0.115 | 0.2267 | 0.2267 | 80.7484 | 0.629 | 0.6373 | 0.6231 |
| large | LED | yes | standard | default | 2.8144 | 0.3448 | 0.1144 | 0.2322 | 0.2324 | 63.2390 | 0.6431 | 0.6632 | 0.6265 |
| base | LSG | yes | standard | default | 2.8565 | 0.3351 | 0.1058 | 0.2185 | 0.2184 | 77.3836 | 0.6265 | 0.6376 | 0.6182 |
| large | LSG | yes | standard | default | 2.6596 | 0.3455 | 0.1161 | 0.2261 | 0.2258 | 79.4277 | 0.6384 | 0.6496 | 0.6306 |

#### Table 3: effect of extended dataset
**Table 3a: default test set**

| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | default | 2.8565 | 0.3351 | 0.1058 | 0.2185 | 0.2184 | 77.3836 | 0.6265 | 0.6376 | 0.6182 | same as table 2
| base | LSG | yes | extended | default | 2.88 | 0.3367 | 0.102 | 0.2155 | 0.2158 | 76.5597 | 0.6297 | 0.6411 | 0.6211 |
| large | LSG | yes | standard | default | 2.6596 | 0.3455 | 0.1161 | 0.2261 | 0.2258 | 79.4277 | 0.6384 | 0.6496 | 0.6306 | same as table 2
| large | LSG | yes | extended | default | 2.6835 | 0.3481 | 0.1111 | 0.2251 | 0.2248 | 86.7987 | 0.6378 | 0.6431 | 0.6352 |

##### **Considering only the test split of the dataset as the test set**

**Table 3b: macrocategories test set**  
humanities: N=82  
scientific: N=77  
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | humanities | 2.8763 | 0.3465 | 0.1065 | 0.2124 | 0.2123 | 98.5122 | 0.6165 | 0.6298 | 0.6062 |
| base | LSG | yes | extended | humanities | 2.8935 | 0.354 | 0.1049 | 0.2139 | 0.2134 | 97.2683 | 0.6207 | 0.6342 | 0.61 |
| large | LSG | yes | standard | humanities | 2.5428 | 0.354 | 0.1126 | 0.221 | 0.2209 | 101.061 | 0.6282 | 0.6423 | 0.6173 |
| large | LSG | yes | extended | humanities | 2.5731 | 0.3533 | 0.1103 | 0.2185 | 0.2185 | 107.439 | 0.6279 | 0.6377 | 0.621 |
| base | LSG | yes | standard | scientific | 2.8355 | 0.3233 | 0.1056 | 0.2249 | 0.2242 | 54.8831 | 0.6372 | 0.6458 | 0.6311 |
| base | LSG | yes | extended | scientific | 2.8656 | 0.3192 | 0.0983 | 0.2177 | 0.2174 | 54.5065 | 0.6392 | 0.6485 | 0.6329 |
| large | LSG | yes | standard | scientific | 2.784 | 0.3346 | 0.1197 | 0.232 | 0.231 | 56.3896 | 0.6492 | 0.6573 | 0.6447 |
| large | LSG | yes | extended | scientific | 2.801 | 0.3435 | 0.1121 | 0.2315 | 0.232 | 64.8181 | 0.6483 | 0.6489 | 0.6503 |

**Table 3c: writing frequency test set**  
frequent writing: N=55  
medium frequent writing: N=46  
infrequent writing: N=58  
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | frequent writing | 2.8713 | 0.3197 | 0.1091 | 0.2187 | 0.2196 | 56.0545 | 0.629 | 0.6421 | 0.6191 |
| base | LSG | yes | extended | frequent writing | 2.8859 | 0.3216 | 0.0961 | 0.2164 | 0.2151 | 56.8182 | 0.6282 | 0.6384 | 0.6213 |
| large | LSG | yes | standard | frequent writing | 2.7452 | 0.3352 | 0.1223 | 0.231 | 0.2312 | 57.7273 | 0.6422 | 0.6546 | 0.634 |
| large | LSG | yes | extended | frequent writing | 2.7716 | 0.3516 | 0.1114 | 0.2344 | 0.2336 | 70.8 | 0.641 | 0.6414 | 0.643 |
| base | LSG | yes | standard | medium frequent writing | 2.8376 | 0.3183 | 0.0939 | 0.2181 | 0.2179 | 68.587 | 0.6346 | 0.6421 | 0.6299 |
| base | LSG | yes | extended | medium frequent writing | 2.8801 | 0.3126 | 0.0936 | 0.2085 | 0.2071 | 64.1739 | 0.6375 | 0.6506 | 0.6278 |
| large | LSG | yes | standard | medium frequent writing | 2.7203 | 0.3268 | 0.0991 | 0.2145 | 0.2141 | 66.087 | 0.6453 | 0.6542 | 0.6396 |
| large | LSG | yes | extended | medium frequent writing | 2.7431 | 0.317 | 0.1024 | 0.2128 | 0.2126 | 71.2174 | 0.6418 | 0.6497 | 0.6377 |
| base | LSG | yes | standard | infrequent writing | 2.8575 | 0.3641 | 0.1131 | 0.2177 | 0.2178 | 104.5862 | 0.6177 | 0.6296 | 0.6082 |
| base | LSG | yes | extended | infrequent writing | 2.8744 | 0.3717 | 0.1138 | 0.2215 | 0.221 | 105.1034 | 0.6248 | 0.636 | 0.6156 |
| large | LSG | yes | standard | infrequent writing | 2.5303 | 0.3688 | 0.1233 | 0.2295 | 0.23 | 110.5862 | 0.6292 | 0.6412 | 0.6201 |
| large | LSG | yes | extended | infrequent writing | 2.5525 | 0.371 | 0.1174 | 0.2258 | 0.2257 | 114.3276 | 0.6315 | 0.6396 | 0.6258 |

**Table 3d: categories test set** 
arts (humanities): N=5  
biology (scientific): N=6  
business (scientific): N=3  
chemistry (scientific): N=0  
computer science (scientific): N=24  
economics (humanities): N=9  
engineering (scientific): N=7  
history (humanities): N=29  
literature (humanities): N=16  
mathematics (scientific): N=18  
philosophy (humanities): N=11  
physics (scientific): N=19  
politics (humanities): N=5  
psychology (humanities): N=2  
social studies (humanities): N=5  
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | arts (humanities) | 2.6795 | 0.3360 | 0.1180 | 0.2658 | 0.2646 | 79.8000 | 0.6396 | 0.6404 | 0.6406 |
| base | LSG | yes | extended | arts (humanities) | 2.7786 | 0.3533 | 0.0946 | 0.2065 | 0.2065 | 67.2000 | 0.6594 | 0.6650 | 0.6556 |
| large | LSG | yes | standard | arts (humanities) | 2.4687 | 0.3335 | 0.0832 | 0.2003 | 0.2026 | 77.2 | 0.6583 | 0.6615 | 0.6584 |
| large | LSG | yes | extended | arts (humanities) | 2.5383 | 0.2913 | 0.1027 | 0.2032 | 0.2043 | 106.0000 | 0.6420 | 0.6305 | 0.6574 |
| base | LSG | yes | standard | biology (scientific) | 2.2452 | 0.3421 | 0.1332 | 0.2665 | 0.2680 | 49.3333 | 0.7088 | 0.6889 | 0.7317 |
| base | LSG | yes | extended | biology (scientific) | 2.1930 | 0.3435 | 0.1377 | 0.2440 | 0.2440 | 43.8333 | 0.7091 | 0.6981 | 0.7220 |
| large | LSG | yes | standard | biology (scientific) | 2.2752 | 0.3399 | 0.1434 | 0.2685 | 0.2695 | 49.1667 | 0.7109 | 0.6869 | 0.7392 |
| large | LSG | yes | extended | biology (scientific) | 2.3410 | 0.3529 | 0.1406 | 0.2616 | 0.2631 | 51.8333 | 0.6954 | 0.6771 | 0.7177 |
| base | LSG | yes | standard | business (scientific) | 3.3838 | 0.3028 | 0.1197 | 0.2154 | 0.2154 | 52.3333 | 0.6320 | 0.6256 | 0.6408 |
| base | LSG | yes | extended | business (scientific) | 3.2187 | 0.3067 | 0.0974 | 0.2360 | 0.2360 | 55.6667 | 0.6331 | 0.6223 | 0.6448 |
| large | LSG | yes | standard | business (scientific) | 3.1152 | 0.3059 | 0.1004 | 0.2281 | 0.2281 | 60.3333 | 0.6538 | 0.6418 | 0.6694 |
| large | LSG | yes | extended | business (scientific) | 3.3128 | 0.3085 | 0.0993 | 0.2373 | 0.2373 | 58.3333 | 0.6564 | 0.6402 | 0.6754 |
| base | LSG | yes | standard | computer science (scientific) | 2.9064 | 0.3087 | 0.0892 | 0.2140 | 0.2157 | 52.0417 | 0.6374 | 0.6415 | 0.6358 |
| base | LSG | yes | extended | computer science (scientific) | 3.0124 | 0.3153 | 0.0918 | 0.2198 | 0.2200 | 50.5417 | 0.6457 | 0.6529 | 0.6410 |
| large | LSG | yes | standard | computer science (scientific) | 3.0571 | 0.3204 | 0.1028 | 0.2234 | 0.2214 | 56.25 | 0.6507 | 0.6549 | 0.6494 |
| large | LSG | yes | extended | computer science (scientific) | 3.0322 | 0.3207 | 0.0973 | 0.2176 | 0.2172 | 60.3750 | 0.6530 | 0.6546 | 0.6534 |
| base | LSG | yes | standard | economics (humanities) | 3.1610 | 0.2855 | 0.0944 | 0.1846 | 0.1843 | 73.7778 | 0.5968 | 0.6201 | 0.5762 |
| base | LSG | yes | extended | economics (humanities) | 3.1676 | 0.2932 | 0.0949 | 0.1872 | 0.1866 | 66.5556 | 0.6102 | 0.6451 | 0.5804 |
| large | LSG | yes | standard | economics (humanities) | 2.8116 | 0.2964 | 0.0887 | 0.1845| 0.1849 | 97.4444 | 0.6218 | 0.6367 | 0.6103 |
| large | LSG | yes | extended | economics (humanities) | 2.8836 | 0.2950 | 0.0738 | 0.1813 | 0.1818 | 95.4444 | 0.6179 | 0.6370 | 0.6016 |
| base | LSG | yes | standard | engineering (scientific) | 3.0342 | 0.2950 | 0.0547 | 0.1745 | 0.1747 | 74.5714 | 0.6069 | 0.6116 | 0.6029 |
| base | LSG | yes | extended | engineering (scientific) | 3.0571 | 0.2357 | 0.0362 | 0.1529 | 0.1540 | 57.2857 | 0.5953 | 0.6098 | 0.5846 |
| large | LSG | yes | standard | engineering (scientific) | 2.9847 | 0.3148 | 0.0998 | 0.2154 | 0.2146 | 57.0 | 0.6508 | 0.6682 | 0.6354 |
| large | LSG | yes | extended | engineering (scientific) | 3.1122 | 0.3018 | 0.0486 | 0.1816 | 0.1817 | 71.7143 | 0.6246 | 0.6335 | 0.6174 |
| base | LSG | yes | standard | history (humanities) | 2.8927 | 0.3488 | 0.0951 | 0.2038 | 0.2039 | 116.9655 | 0.6151 | 0.6218 | 0.6115 |
| base | LSG | yes | extended | history (humanities) | 2.9331 | 0.3622 | 0.0988 | 0.2102 | 0.2104 | 114.7241 | 0.6192 | 0.6261 | 0.6152 |
| large | LSG | yes | standard | history (humanities) | 2.5072 | 0.3534 | 0.1132 | 0.2232 | 0.2234 | 123.4483 | 0.6201 | 0.6284 | 0.6155 |
| large | LSG | yes | extended | history (humanities) | 2.5268 | 0.3542 | 0.1113 | 0.2227 | 0.2228 | 118.4828 | 0.6286 | 0.6382 | 0.6223 |
| base | LSG | yes | standard | literature (humanities) | 2.7848 | 0.4025 | 0.1420 | 0.2388 | 0.2387 | 115.0625 | 0.6245 | 0.6266 | 0.6237 |
| base | LSG | yes | extended | literature (humanities) | 2.8023 | 0.4085 | 0.1327 | 0.2391 | 0.2388 | 128.1875 | 0.6162 | 0.6149 | 0.6182 |
| large | LSG | yes | standard | literature (humanities) | 2.4948 | 0.4084 | 0.1343 | 0.2482 | 0.2487 | 115.625 | 0.6329 | 0.6394 | 0.6276 |
| large | LSG | yes | extended | literature (humanities) | 2.5064 | 0.4144 | 0.1370 | 0.2495 | 0.2498 | 115.0625 | 0.6379 | 0.6438 | 0.6327 |
| base | LSG | yes | standard | mathematics (scientific) | 2.6308 | 0.3492 | 0.1297 | 0.2386 | 0.2381 | 52.1667 | 0.6365 | 0.6502 | 0.6264 |
| base | LSG | yes | extended | mathematics (scientific) | 2.6426 | 0.3287 | 0.1042 | 0.2077 | 0.2075 | 51.4444 | 0.6293 | 0.6414 | 0.6217 |
| large | LSG | yes | standard | mathematics (scientific) | 2.4863 | 0.3421 | 0.1329 | 0.2261 | 0.2265 | 53.4444 | 0.639 | 0.6517 | 0.6326 |
| large | LSG | yes | extended | mathematics (scientific) | 2.4585 | 0.3712 | 0.1306 | 0.2355 | 0.2350 | 72.8889 | 0.6337 | 0.6252 | 0.6467 |
| base | LSG | yes | standard | philosophy (humanities) | 2.6267 | 0.3598 | 0.1154 | 0.2130 | 0.2131 | 83.3636 | 0.6150 | 0.6452 | 0.5901 |
| base | LSG | yes | extended | philosophy (humanities) | 2.5892 | 0.3680 | 0.1108 | 0.2229 | 0.2238 | 84.0000 | 0.6309 | 0.6542 | 0.6101 |
| large | LSG | yes | standard | philosophy (humanities) | 2.3016 | 0.3647 | 0.1363 | 0.2356 | 0.2342 | 72.9091 | 0.6303 | 0.6661 | 0.6001 |
| large | LSG | yes | extended | philosophy (humanities) | 2.3270 | 0.3775 | 0.1242 | 0.2237 | 0.2234 | 107.6364 | 0.6291 | 0.6432 | 0.6179 |
| base | LSG | yes | standard | physics (scientific) | 2.9665 | 0.3291 | 0.1081 | 0.2293 | 0.2281 | 55.9474 | 0.6268 | 0.6493 | 0.6067 |
| base | LSG | yes | extended | physics (scientific) | 2.9778 | 0.3419 | 0.1126 | 0.2362 | 0.2348 | 64.5789 | 0.6355 | 0.6523 | 0.6209 |
| large | LSG | yes | standard | physics (scientific) | 2.7553 | 0.3611 | 0.1323 | 0.2411 | 0.2394 | 60.7895 | 0.6361 | 0.6548 | 0.6198 |
| large | LSG | yes | extended | physics (scientific) | 2.7834 | 0.3636 | 0.1293 | 0.2551 | 0.2536 | 65.3684 | 0.6486 | 0.6624 | 0.6369 |
| base | LSG | yes | standard | politics (humanities) | 2.9548 | 0.3711 | 0.1126 | 0.2542 | 0.2542 | 64.4000 | 0.6464 | 0.6833 | 0.6148 |
| base | LSG | yes | extended | politics (humanities) | 2.8705 | 0.3366 | 0.1217 | 0.2342 | 0.2342 | 58.2000 | 0.6391 | 0.6780 | 0.6077 |
| large | LSG | yes | standard | politics (humanities) | 2.6137 | 0.3881 | 0.1454 | 0.2346 | 0.2358 | 61.0 | 0.6572 | 0.6838 | 0.6332 |
| large | LSG | yes | extended | politics (humanities) | 2.6244 | 0.3437 | 0.1301 | 0.2194 | 0.2194 | 72.8000 | 0.6337 | 0.6518 | 0.6183 |
| base | LSG | yes | standard | psychology (humanities) | 2.8961 | 0.2288 | 0.0732 | 0.1803 | 0.1803 | 52.5000 | 0.6439 | 0.6520 | 0.6367 |
| base | LSG | yes | extended | psychology (humanities) | 2.9719 | 0.2937 | 0.0937 | 0.1944 | 0.1944 | 47.5000 | 0.6655 | 0.6649 | 0.6663 |
| large | LSG | yes | standard | psychology (humanities) | 2.8318 | 0.2641 | 0.0855 | 0.2107 | 0.2107 | 64.0 | 0.6754 | 0.6683 | 0.6835 |
| large | LSG | yes | extended | psychology (humanities) | 2.8929 | 0.3406 | 0.0758 | 0.2206 | 0.2206 | 78.0000 | 0.6832 | 0.6657 | 0.7041 |
| base | LSG | yes | standard | social studies (humanities) | 3.2202 | 0.2538 | 0.0506 | 0.1404 | 0.1410 | 87.6000 | 0.5734 | 0.5974 | 0.5535 |
| base | LSG | yes | extended | social studies (humanities) | 3.2383 | 0.2690 | 0.0468 | 0.1684 | 0.1684 | 70.6000 | 0.5647 | 0.5919 | 0.5407 |
| large | LSG | yes | standard | social studies (humanities) | 2.8378 | 0.2895 | 0.043 | 0.1668 | 0.1666 | 71.8 | 0.5888 | 0.6196 | 0.5623 |
| large | LSG | yes | extended | social studies (humanities) | 2.8923 | 0.2783 | 0.0487 | 0.1621 | 0.1621 | 88.0000 | 0.5657 | 0.5864 | 0.5501 |

**Table 3e: courses test set**
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | American History: From Emancipation to the Present (humanities->history) | 2.8325 | 0.3607 | 0.1044 | 0.1882 | 0.1882 | 160.8000 | 0.6074 | 0.6158 | 0.6008 |
| base | LSG | yes | extended | American History: From Emancipation to the Present (humanities->history) | 2.8472 | 0.3921 | 0.1373 | 0.1985 | 0.2009 | 139.2000 | 0.6270 | 0.6388 | 0.6165 |
| large | LSG | yes | standard | American History: From Emancipation to the Present (humanities->history) | 2.4781 | 0.3306 | 0.1122 | 0.2059 | 0.2059 | 120.4000 | 0.6160 | 0.6498 | 0.5884 |
| large | LSG | yes | extended | American History: From Emancipation to the Present (humanities->history) | 2.5197 | 0.3422 | 0.1345 | 0.2179 | 0.2147 | 96.2000 | 0.6173 | 0.6613 | 0.5802 |
| base | LSG | yes | standard | Artificial Intelligence (scientific->computer science) | 2.1246 | 0.3440 | 0.1264 | 0.2452 | 0.2452 | 46.0000 | 0.6452 | 0.6558 | 0.6353 |
| base | LSG | yes | extended | Artificial Intelligence (scientific->computer science) | 2.1471 | 0.3295 | 0.1125 | 0.2282 | 0.2282 | 58.3333 | 0.6463 | 0.6390 | 0.6539 |
| large | LSG | yes | standard | Artificial Intelligence (scientific->computer science) | 2.3238 | 0.2609 | 0.0885 | 0.1771 | 0.1771 | 86.0000 | 0.6156 | 0.6001 | 0.6324 |
| large | LSG | yes | extended | Artificial Intelligence (scientific->computer science) | 2.2326 | 0.3027 | 0.0679 | 0.2006 | 0.2006 | 62.6667 | 0.6439 | 0.6380 | 0.6507 |
| base | LSG | yes | standard | Brains, Minds and Machines Summer Course (scientific->computer science) | 3.6535 | 0.2442 | 0.0621 | 0.1725 | 0.1725 | 64.2500 | 0.6282 | 0.6153 | 0.6420 |
| base | LSG | yes | extended | Brains, Minds and Machines Summer Course (scientific->computer science) | 3.7445 | 0.2670 | 0.0563 | 0.1840 | 0.1840 | 45.7500 | 0.6372 | 0.6343 | 0.6402 |
| large | LSG | yes | standard | Brains, Minds and Machines Summer Course (scientific->computer science) | 3.6647 | 0.2201 | 0.0516 | 0.1545 | 0.1545 | 63.0000 | 0.6215 | 0.6080 | 0.6357 |
| large | LSG | yes | extended | Brains, Minds and Machines Summer Course (scientific->computer science) | 3.7857 | 0.2899 | 0.0797 | 0.2005 | 0.2005 | 55.2500 | 0.6354 | 0.6339 | 0.6387 |
| base | LSG | yes | standard | Calculus Revisited: Multivariable Calculus (scientific->mathematics) | 2.5245 | 0.3210 | 0.1038 | 0.2359 | 0.2359 | 50.5000 | 0.6372 | 0.6005 | 0.6795 |
| base | LSG | yes | extended | Calculus Revisited: Multivariable Calculus (scientific->mathematics) | 2.5957 | 0.2645 | 0.0697 | 0.1800 | 0.1800 | 54.0000 | 0.6127 | 0.5734 | 0.6579 |
| large | LSG | yes | standard | Calculus Revisited: Multivariable Calculus (scientific->mathematics) | 2.2385 | 0.3086 | 0.1422 | 0.2400 | 0.2400 | 54.5000 | 0.6579 | 0.6169 | 0.7052 |
| large | LSG | yes | extended | Calculus Revisited: Multivariable Calculus (scientific->mathematics) | 2.1182 | 0.3229 | 0.1325 | 0.2511 | 0.2511 | 49.0000 | 0.6439 | 0.6074 | 0.6858 |
| base | LSG | yes | standard | Capitalism: Success, Crisis and Reform with Douglas W. Rae (humanities->economics) | 2.9866 | 0.3162 | 0.0781 | 0.1949 | 0.1949 | 88.5000 | 0.5931 | 0.6142 | 0.5734 |
| base | LSG | yes | extended | Capitalism: Success, Crisis and Reform with Douglas W. Rae (humanities->economics) | 2.9973 | 0.2916 | 0.1140 | 0.1945 | 0.1945 | 68.0000 | 0.6164 | 0.6736 | 0.5682 |
| large | LSG | yes | standard | Capitalism: Success, Crisis and Reform with Douglas W. Rae (humanities->economics) | 2.4862 | 0.3176 | 0.0759 | 0.1720 | 0.1720 | 135.0000 | 0.6109 | 0.6154 | 0.6069 |
| large | LSG | yes | extended | Capitalism: Success, Crisis and Reform with Douglas W. Rae (humanities->economics) | 2.4954 | 0.2885 | 0.0539 | 0.1532 | 0.1532 | 132.5000 | 0.6015 | 0.6106 | 0.5939 |
| base | LSG | yes | standard | Cervantes' Don Quixote with Roberto González Echevarría (humanities->literature) | 2.7476 | 0.4015 | 0.1250 | 0.2263 | 0.2263 | 138.0000 | 0.6141 | 0.6375 | 0.5924 |
| base | LSG | yes | extended | Cervantes' Don Quixote with Roberto González Echevarría (humanities->literature) | 2.8888 | 0.4207 | 0.1264 | 0.2214 | 0.2214 | 130.0000 | 0.6064 | 0.6318 | 0.5830 |
| large | LSG | yes | standard | Cervantes' Don Quixote with Roberto González Echevarría (humanities->literature) | 2.2293 | 0.4437 | 0.0922 | 0.2535 | 0.2535 | 149.0000 | 0.6256 | 0.6503 | 0.6028 |
| large | LSG | yes | extended | Cervantes' Don Quixote with Roberto González Echevarría (humanities->literature) | 2.4533 | 0.4756 | 0.1288 | 0.2378 | 0.2378 | 194.0000 | 0.5897 | 0.5960 | 0.5836 |
| base | LSG | yes | standard | Dante in Translation with Giuseppe Mazzotta (humanities->literature) | 3.1900 | 0.3875 | 0.1331 | 0.2491 | 0.2491 | 95.5000 | 0.5881 | 0.6131 | 0.5655 |
| base | LSG | yes | extended | Dante in Translation with Giuseppe Mazzotta (humanities->literature) | 3.2237 | 0.4013 | 0.1369 | 0.2484 | 0.2484 | 123.0000 | 0.5884 | 0.5999 | 0.5776 |
| large | LSG | yes | standard | Dante in Translation with Giuseppe Mazzotta (humanities->literature) | 2.8985 | 0.2718 | 0.0506 | 0.1836 | 0.1836 | 71.5000 | 0.5624 | 0.6022 | 0.5281 |
| large | LSG | yes | extended | Dante in Translation with Giuseppe Mazzotta (humanities->literature) | 2.8863 | 0.3570 | 0.0928 | 0.2277 | 0.2277 | 96.0000 | 0.6167 | 0.6436 | 0.5921 |
| base | LSG | yes | standard | Death with Shelly Kagan (humanities->philosophy) | 2.6973 | 0.3662 | 0.1050 | 0.1923 | 0.1923 | 80.2500 | 0.5993 | 0.6235 | 0.5785 |
| base | LSG | yes | extended | Death with Shelly Kagan (humanities->philosophy) | 2.6339 | 0.3759 | 0.0959 | 0.2194 | 0.2194 | 73.2500 | 0.6415 | 0.6629 | 0.6219 |
| large | LSG | yes | standard | Death with Shelly Kagan (humanities->philosophy) | 2.3513 | 0.3447 | 0.0972 | 0.1974 | 0.1974 | 72.5000 | 0.6099 | 0.6307 | 0.5914 |
| large | LSG | yes | extended | Death with Shelly Kagan (humanities->philosophy) | 2.3367 | 0.4033 | 0.1298 | 0.2397 | 0.2397 | 100.5000 | 0.6349 | 0.6512 | 0.6200 |
| base | LSG | yes | standard | Discrete Stochastic Processes (scientific->mathematics) | 2.8442 | 0.2295 | 0.1017 | 0.1967 | 0.1967 | 43.0000 | 0.6335 | 0.6398 | 0.6273 |
| base | LSG | yes | extended | Discrete Stochastic Processes (scientific->mathematics) | 2.9068 | 0.4000 | 0.1587 | 0.2154 | 0.2154 | 46.0000 | 0.6801 | 0.6832 | 0.6770 |
| large | LSG | yes | standard | Discrete Stochastic Processes (scientific->mathematics) | 2.5808 | 0.2410 | 0.0741 | 0.1205 | 0.1205 | 69.0000 | 0.6497 | 0.6219 | 0.6800 |
| large | LSG | yes | extended | Discrete Stochastic Processes (scientific->mathematics) | 2.6245 | 0.3824 | 0.0909 | 0.2059 | 0.2059 | 58.0000 | 0.6803 | 0.6483 | 0.7155 |
| base | LSG | yes | standard | Early Modern England with Keith E. Wrightson (humanities->history) | 2.8369 | 0.3588 | 0.0716 | 0.2076 | 0.2076 | 143.7500 | 0.6079 | 0.6122 | 0.6056 |
| base | LSG | yes | extended | Early Modern England with Keith E. Wrightson (humanities->history) | 2.8424 | 0.4059 | 0.1211 | 0.2540 | 0.2540 | 125.7500 | 0.6093 | 0.6215 | 0.5985 |
| large | LSG | yes | standard | Early Modern England with Keith E. Wrightson (humanities->history) | 2.4988 | 0.3495 | 0.0967 | 0.2320 | 0.2323 | 132.5000 | 0.6093 | 0.6230 | 0.5983 |
| large | LSG | yes | extended | Early Modern England with Keith E. Wrightson (humanities->history) | 2.4788 | 0.3957 | 0.1190 | 0.2240 | 0.2240 | 205.7500 | 0.6272 | 0.6178 | 0.6385 |
| base | LSG | yes | standard | Environmental Politics and Law with John Wargo (humanities->politics) | 3.1076 | 0.2947 | 0.0448 | 0.1894 | 0.1894 | 61.3333 | 0.6154 | 0.6538 | 0.5817 |
| base | LSG | yes | extended | Environmental Politics and Law with John Wargo (humanities->politics) | 3.0255 | 0.2866 | 0.0620 | 0.1820 | 0.1820 | 54.6667 | 0.6101 | 0.6452 | 0.5801 |
| large | LSG | yes | standard | Environmental Politics and Law with John Wargo (humanities->politics) | 2.7302 | 0.2700 | 0.0490 | 0.1619 | 0.1619 | 53.6667 | 0.6230 | 0.6565 | 0.5933 |
| large | LSG | yes | extended | Environmental Politics and Law with John Wargo (humanities->politics) | 2.8106 | 0.2511 | 0.0717 | 0.1588 | 0.1588 | 60.3333 | 0.6105 | 0.6395 | 0.5846 |
| base | LSG | yes | standard | Epidemics in Western Society Since 1600 with Frank Snowden (humanities->history) | 3.3323 | 0.2817 | 0.0523 | 0.1781 | 0.1781 | 92.5000 | 0.5985 | 0.6004 | 0.5996 |
| base | LSG | yes | extended | Epidemics in Western Society Since 1600 with Frank Snowden (humanities->history) | 3.4015 | 0.3322 | 0.0612 | 0.1912 | 0.1912 | 130.7500 | 0.6016 | 0.5877 | 0.6186 |
| large | LSG | yes | standard | Epidemics in Western Society Since 1600 with Frank Snowden (humanities->history) | 2.8857 | 0.3217 | 0.0839 | 0.1925 | 0.1925 | 146.7500 | 0.6147 | 0.6006 | 0.6311 |
| large | LSG | yes | extended | Epidemics in Western Society Since 1600 with Frank Snowden (humanities->history) | 2.8424 | 0.3018 | 0.0866 | 0.1959 | 0.1959 | 103.5000 | 0.6135 | 0.6047 | 0.6258 |
| base | LSG | yes | standard | European Civiliization (1648-1945) with John Merriman (humanities->history) | 3.3245 | 0.2737 | 0.0339 | 0.1728 | 0.1728 | 74.0000 | 0.5695 | 0.6077 | 0.5360 |
| base | LSG | yes | extended | European Civiliization (1648-1945) with John Merriman (humanities->history) | 3.3917 | 0.2544 | 0.0190 | 0.1635 | 0.1635 | 103.0000 | 0.5614 | 0.5804 | 0.5435 |
| large | LSG | yes | standard | European Civiliization (1648-1945) with John Merriman (humanities->history) | 2.8101 | 0.3250 | 0.0491 | 0.1601 | 0.1601 | 174.5000 | 0.5765 | 0.5729 | 0.5811 |
| large | LSG | yes | extended | European Civiliization (1648-1945) with John Merriman (humanities->history) | 2.8730 | 0.3166 | 0.0667 | 0.1767 | 0.1767 | 148.5000 | 0.5846 | 0.5926 | 0.5773 |
| base | LSG | yes | standard | Evolution, Ecology and Behavior with Stephen C. Stearns (humanities->social studies) | 3.1398 | 0.2388 | 0.0211 | 0.1491 | 0.1491 | 59.0000 | 0.5907 | 0.5984 | 0.5833 |
| base | LSG | yes | extended | Evolution, Ecology and Behavior with Stephen C. Stearns (humanities->social studies) | 3.1420 | 0.2756 | 0.0250 | 0.1899 | 0.1899 | 52.0000 | 0.5728 | 0.5794 | 0.5663 |
| large | LSG | yes | standard | Evolution, Ecology and Behavior with Stephen C. Stearns (humanities->social studies) | 2.9081 | 0.3514 | 0.0372 | 0.1896 | 0.1896 | 68.0000 | 0.6164 | 0.6207 | 0.6122 |
| large | LSG | yes | extended | Evolution, Ecology and Behavior with Stephen C. Stearns (humanities->social studies) | 3.0188 | 0.3163 | 0.0584 | 0.1835 | 0.1835 | 90.0000 | 0.5832 | 0.5695 | 0.5979 |
| base | LSG | yes | standard | Financial Markets (2008) with Robert Shiller (humanities->economics) | 3.2186 | 0.3039 | 0.0881 | 0.1750 | 0.1750 | 84.0000 | 0.5906 | 0.6057 | 0.5767 |
| base | LSG | yes | extended | Financial Markets (2008) with Robert Shiller (humanities->economics) | 3.2286 | 0.3037 | 0.0844 | 0.1669 | 0.1669 | 74.3333 | 0.6124 | 0.6369 | 0.5903 |
| large | LSG | yes | standard | Financial Markets (2008) with Robert Shiller (humanities->economics) | 2.9143 | 0.3248 | 0.0756 | 0.1794 | 0.1794 | 114.0000 | 0.6131 | 0.6148 | 0.6128 |
| large | LSG | yes | extended | Financial Markets (2008) with Robert Shiller (humanities->economics) | 2.9475 | 0.2916 | 0.0539 | 0.1568 | 0.1568 | 70.0000 | 0.6212 | 0.6492 | 0.5958 |
| base | LSG | yes | standard | Financial Theory with John Geanakoplos (humanities->economics) | 3.2935 | 0.2854 | 0.1141 | 0.1939 | 0.1939 | 67.0000 | 0.5953 | 0.6394 | 0.5570 |
| base | LSG | yes | extended | Financial Theory with John Geanakoplos (humanities->economics) | 3.3237 | 0.3058 | 0.1005 | 0.1989 | 0.1989 | 67.3333 | 0.5970 | 0.6441 | 0.5568 |
| large | LSG | yes | standard | Financial Theory with John Geanakoplos (humanities->economics) | 2.8943 | 0.2987 | 0.1199 | 0.2079 | 0.2079 | 71.3333 | 0.6294 | 0.6801 | 0.5859 |
| large | LSG | yes | extended | Financial Theory with John Geanakoplos (humanities->economics) | 3.0161 | 0.3411 | 0.1086 | 0.2259 | 0.2259 | 110.0000 | 0.6187 | 0.6517 | 0.5893 |
| base | LSG | yes | standard | Foundations of Computational and Systems Biology (scientific->biology) | 2.3446 | 0.3263 | 0.1147 | 0.2589 | 0.2589 | 51.5000 | 0.7235 | 0.7283 | 0.7193 |
| base | LSG | yes | extended | Foundations of Computational and Systems Biology (scientific->biology) | 2.3332 | 0.3340 | 0.1041 | 0.2329 | 0.2329 | 45.0000 | 0.7198 | 0.7299 | 0.7099 |
| large | LSG | yes | standard | Foundations of Computational and Systems Biology (scientific->biology) | 2.4717 | 0.2324 | 0.0627 | 0.1843 | 0.1843 | 39.5000 | 0.6768 | 0.6893 | 0.6647 |
| large | LSG | yes | extended | Foundations of Computational and Systems Biology (scientific->biology) | 2.5888 | 0.2898 | 0.0814 | 0.1994 | 0.1994 | 52.5000 | 0.6763 | 0.6794 | 0.6739 |
| base | LSG | yes | standard | Foundations of Modern Social Theory with Iván Szelényi (humanities->social studies) | 2.9909 | 0.2575 | 0.0900 | 0.1442 | 0.1442 | 77.5000 | 0.5584 | 0.6104 | 0.5165 |
| base | LSG | yes | extended | Foundations of Modern Social Theory with Iván Szelényi (humanities->social studies) | 3.0384 | 0.3116 | 0.0872 | 0.1821 | 0.1821 | 92.0000 | 0.5671 | 0.6092 | 0.5304 |
| large | LSG | yes | standard | Foundations of Modern Social Theory with Iván Szelényi (humanities->social studies) | 2.5452 | 0.2561 | 0.0572 | 0.1623 | 0.1623 | 67.0000 | 0.5547 | 0.6088 | 0.5095 |
| large | LSG | yes | extended | Foundations of Modern Social Theory with Iván Szelényi (humanities->social studies) | 2.5729 | 0.2460 | 0.0591 | 0.1586 | 0.1586 | 60.5000 | 0.5465 | 0.6094 | 0.4954 |
| base | LSG | yes | standard | Frontiers of Biomedical Engineering with W. Mark Saltzman (scientific->engineering) | 3.5137 | 0.3200 | 0.0405 | 0.1600 | 0.1600 | 63.0000 | 0.6043 | 0.6203 | 0.5890 |
| base | LSG | yes | extended | Frontiers of Biomedical Engineering with W. Mark Saltzman (scientific->engineering) | 3.5194 | 0.3200 | 0.0405 | 0.1600 | 0.1600 | 63.0000 | 0.5996 | 0.6141 | 0.5858 |
| large | LSG | yes | standard | Frontiers of Biomedical Engineering with W. Mark Saltzman (scientific->engineering) | 3.1695 | 0.3067 | 0.0405 | 0.1467 | 0.1467 | 68.0000 | 0.6227 | 0.6485 | 0.5989 |
| large | LSG | yes | extended | Frontiers of Biomedical Engineering with W. Mark Saltzman (scientific->engineering) | 3.1914 | 0.2519 | 0.0301 | 0.1333 | 0.1333 | 49.0000 | 0.6120 | 0.6470 | 0.5806 |
| base | LSG | yes | standard | Frontiers/Controversies in Astrophysics with Charles Bailyn (scientific->physics) | 2.9846 | 0.3135 | 0.1089 | 0.2232 | 0.2232 | 63.3333 | 0.6115 | 0.6549 | 0.5742 |
| base | LSG | yes | extended | Frontiers/Controversies in Astrophysics with Charles Bailyn (scientific->physics) | 2.9820 | 0.3007 | 0.1343 | 0.2296 | 0.2296 | 61.6667 | 0.6309 | 0.6776 | 0.5908 |
| large | LSG | yes | standard | Frontiers/Controversies in Astrophysics with Charles Bailyn (scientific->physics) | 2.3090 | 0.3675 | 0.1156 | 0.2285 | 0.2285 | 85.0000 | 0.6215 | 0.6489 | 0.5967 |
| large | LSG | yes | extended | Frontiers/Controversies in Astrophysics with Charles Bailyn (scientific->physics) | 2.3365 | 0.3803 | 0.1654 | 0.2787 | 0.2787 | 67.0000 | 0.6388 | 0.6769 | 0.6051 |
| base | LSG | yes | standard | Fundamentals of Physics II with Ramamurti Shankar (scientific->physics) | 3.0820 | 0.3218 | 0.0892 | 0.1997 | 0.1997 | 60.3333 | 0.5889 | 0.6086 | 0.5716 |
| base | LSG | yes | extended | Fundamentals of Physics II with Ramamurti Shankar (scientific->physics) | 3.0818 | 0.3372 | 0.0865 | 0.1918 | 0.1917 | 85.1667 | 0.5998 | 0.6035 | 0.5971 |
| large | LSG | yes | standard | Fundamentals of Physics II with Ramamurti Shankar (scientific->physics) | 2.9056 | 0.3280 | 0.0959 | 0.1918 | 0.1931 | 60.6667 | 0.6155 | 0.6302 | 0.6024 |
| large | LSG | yes | extended | Fundamentals of Physics II with Ramamurti Shankar (scientific->physics) | 2.9819 | 0.3127 | 0.0760 | 0.1932 | 0.1932 | 72.8333 | 0.6132 | 0.6221 | 0.6049 |
| base | LSG | yes | standard | Fundamentals of Physics with Ramamurti Shankar (scientific->physics) | 2.3108 | 0.3571 | 0.0364 | 0.1964 | 0.1964 | 50.0000 | 0.6641 | 0.6972 | 0.6340 |
| base | LSG | yes | extended | Fundamentals of Physics with Ramamurti Shankar (scientific->physics) | 2.2537 | 0.3559 | 0.0172 | 0.1864 | 0.1864 | 55.0000 | 0.6361 | 0.6593 | 0.6144 |
| large | LSG | yes | standard | Fundamentals of Physics with Ramamurti Shankar (scientific->physics) | 2.1503 | 0.3750 | 0.0317 | 0.1875 | 0.1875 | 72.0000 | 0.6421 | 0.6483 | 0.6359 |
| large | LSG | yes | extended | Fundamentals of Physics with Ramamurti Shankar (scientific->physics) | 2.0928 | 0.3761 | 0.0348 | 0.2051 | 0.2051 | 56.0000 | 0.6611 | 0.6931 | 0.6319 |
| base | LSG | yes | standard | Game Theory with Ben Polak (scientific->computer science) | 2.9430 | 0.2560 | 0.0668 | 0.1456 | 0.1456 | 59.6667 | 0.5890 | 0.6342 | 0.5503 |
| base | LSG | yes | extended | Game Theory with Ben Polak (scientific->computer science) | 2.9609 | 0.2669 | 0.0602 | 0.1564 | 0.1564 | 52.3333 | 0.5956 | 0.6464 | 0.5526 |
| large | LSG | yes | standard | Game Theory with Ben Polak (scientific->computer science) | 2.5411 | 0.2552 | 0.0656 | 0.1617 | 0.1617 | 50.0000 | 0.6042 | 0.6684 | 0.5514 |
| large | LSG | yes | extended | Game Theory with Ben Polak (scientific->computer science) | 2.6493 | 0.3178 | 0.0676 | 0.1545 | 0.1545 | 82.3333 | 0.5810 | 0.6138 | 0.5534 |
| base | LSG | yes | standard | Global Problems of Population Growth with Robert Wyman (humanities->social studies) | 3.8397 | 0.2763 | 0.0397 | 0.1184 | 0.1184 | 165.0000 | 0.5688 | 0.5696 | 0.5680 |
| base | LSG | yes | extended | Global Problems of Population Growth with Robert Wyman (humanities->social studies) | 3.8305 | 0.1706 | 0.0096 | 0.0948 | 0.0948 | 65.0000 | 0.5438 | 0.5821 | 0.5102 |
| large | LSG | yes | standard | Global Problems of Population Growth with Robert Wyman (humanities->social studies) | 3.2825 | 0.2328 | 0.0261 | 0.1293 | 0.1293 | 89.0000 | 0.6016 | 0.6392 | 0.5681 |
| large | LSG | yes | extended | Global Problems of Population Growth with Robert Wyman (humanities->social studies) | 3.2782 | 0.2667 | 0.0071 | 0.1263 | 0.1263 | 139.0000 | 0.5689 | 0.5740 | 0.5638 |
| base | LSG | yes | standard | Graph Theory and Additive Combinatorics (scientific->mathematics) | 2.2511 | 0.3957 | 0.1712 | 0.2778 | 0.2778 | 56.0000 | 0.6690 | 0.6820 | 0.6576 |
| base | LSG | yes | extended | Graph Theory and Additive Combinatorics (scientific->mathematics) | 2.1986 | 0.3616 | 0.1359 | 0.2226 | 0.2226 | 50.5000 | 0.6623 | 0.6714 | 0.6547 |
| large | LSG | yes | standard | Graph Theory and Additive Combinatorics (scientific->mathematics) | 1.9143 | 0.3779 | 0.1643 | 0.2717 | 0.2717 | 55.5000 | 0.6543 | 0.6655 | 0.6444 |
| large | LSG | yes | extended | Graph Theory and Additive Combinatorics (scientific->mathematics) | 1.9369 | 0.3960 | 0.1595 | 0.2352 | 0.2352 | 95.5000 | 0.6475 | 0.6194 | 0.6793 |
| base | LSG | yes | standard | Green Supply Chain Management (scientific->business) | 3.5533 | 0.3251 | 0.1629 | 0.2263 | 0.2263 | 60.5000 | 0.6486 | 0.6238 | 0.6755 |
| base | LSG | yes | extended | Green Supply Chain Management (scientific->business) | 3.3868 | 0.2933 | 0.1318 | 0.2429 | 0.2429 | 61.0000 | 0.6335 | 0.6146 | 0.6537 |
| large | LSG | yes | standard | Green Supply Chain Management (scientific->business) | 3.1631 | 0.3720 | 0.1507 | 0.2696 | 0.2696 | 73.0000 | 0.6596 | 0.6261 | 0.6971 |
| large | LSG | yes | extended | Green Supply Chain Management (scientific->business) | 3.4462 | 0.2871 | 0.1212 | 0.2073 | 0.2073 | 66.0000 | 0.6318 | 0.6024 | 0.6649 |
| base | LSG | yes | standard | Hemingway, Fitzgerald, Faulkner with Wai Chee Dimock (humanities->literature) | 2.6212 | 0.3888 | 0.1810 | 0.2652 | 0.2652 | 116.5000 | 0.6324 | 0.6359 | 0.6292 |
| base | LSG | yes | extended | Hemingway, Fitzgerald, Faulkner with Wai Chee Dimock (humanities->literature) | 2.6719 | 0.4304 | 0.2156 | 0.2710 | 0.2710 | 137.5000 | 0.6366 | 0.6355 | 0.6391 |
| large | LSG | yes | standard | Hemingway, Fitzgerald, Faulkner with Wai Chee Dimock (humanities->literature) | 2.4267 | 0.4532 | 0.1929 | 0.2748 | 0.2748 | 141.0000 | 0.6615 | 0.6658 | 0.6575 |
| large | LSG | yes | extended | Hemingway, Fitzgerald, Faulkner with Wai Chee Dimock (humanities->literature) | 2.4171 | 0.4662 | 0.2042 | 0.2977 | 0.2977 | 142.0000 | 0.6447 | 0.6459 | 0.6437 |
| base | LSG | yes | standard | Innovation Systems for Science, Technology, Energy, Manufacturing, and Health (scientific->engineering) | 2.6493 | 0.3028 | 0.0739 | 0.1619 | 0.1619 | 132.5000 | 0.5728 | 0.5797 | 0.5664 |
| base | LSG | yes | extended | Innovation Systems for Science, Technology, Energy, Manufacturing, and Health (scientific->engineering) | 2.6541 | 0.1392 | 0.0325 | 0.1018 | 0.1018 | 60.5000 | 0.5502 | 0.6031 | 0.5089 |
| large | LSG | yes | standard | Innovation Systems for Science, Technology, Energy, Manufacturing, and Health (scientific->engineering) | 2.5341 | 0.2021 | 0.0574 | 0.1450 | 0.1450 | 68.5000 | 0.5955 | 0.6335 | 0.5624 |
| large | LSG | yes | extended | Innovation Systems for Science, Technology, Energy, Manufacturing, and Health (scientific->engineering) | 2.6186 | 0.3588 | 0.0811 | 0.2217 | 0.2217 | 87.0000 | 0.6158 | 0.6474 | 0.5873 |
| base | LSG | yes | standard | Introduction to Ancient Greek History with Donald Kagan (humanities->history) | 2.6140 | 0.3864 | 0.1269 | 0.2314 | 0.2314 | 128.3333 | 0.6036 | 0.6077 | 0.6002 |
| base | LSG | yes | extended | Introduction to Ancient Greek History with Donald Kagan (humanities->history) | 2.6539 | 0.4045 | 0.1073 | 0.2395 | 0.2395 | 111.6667 | 0.6424 | 0.6581 | 0.6279 |
| large | LSG | yes | standard | Introduction to Ancient Greek History with Donald Kagan (humanities->history) | 2.3546 | 0.3966 | 0.1117 | 0.2232 | 0.2232 | 153.3333 | 0.6100 | 0.6047 | 0.6156 |
| large | LSG | yes | extended | Introduction to Ancient Greek History with Donald Kagan (humanities->history) | 2.3696 | 0.3710 | 0.1226 | 0.2344 | 0.2344 | 103.6667 | 0.6313 | 0.6488 | 0.6153 |
| base | LSG | yes | standard | Introduction to EECS II: Digital Communication Systems (scientific->computer science) | 3.1605 | 0.2957 | 0.0773 | 0.2074 | 0.2074 | 53.0000 | 0.6487 | 0.6293 | 0.6697 |
| base | LSG | yes | extended | Introduction to EECS II: Digital Communication Systems (scientific->computer science) | 3.2321 | 0.3080 | 0.0943 | 0.2207 | 0.2207 | 50.5000 | 0.6602 | 0.6478 | 0.6733 |
| large | LSG | yes | standard | Introduction to EECS II: Digital Communication Systems (scientific->computer science) | 3.4721 | 0.3301 | 0.0974 | 0.2215 | 0.2215 | 48.7500 | 0.6773 | 0.6670 | 0.6887 |
| large | LSG | yes | extended | Introduction to EECS II: Digital Communication Systems (scientific->computer science) | 3.2315 | 0.3027 | 0.0774 | 0.2114 | 0.2114 | 45.0000 | 0.6895 | 0.6855 | 0.6936 |
| base | LSG | yes | standard | Introduction to Lean Six Sigma Methods (scientific->business) | 3.0447 | 0.2581 | 0.0333 | 0.1935 | 0.1935 | 36.0000 | 0.5988 | 0.6291 | 0.5713 |
| base | LSG | yes | extended | Introduction to Lean Six Sigma Methods (scientific->business) | 2.8825 | 0.3333 | 0.0286 | 0.2222 | 0.2222 | 45.0000 | 0.6323 | 0.6377 | 0.6269 |
| large | LSG | yes | standard | Introduction to Lean Six Sigma Methods (scientific->business) | 3.0193 | 0.1739 | 0.0000 | 0.1449 | 0.1449 | 35.0000 | 0.6423 | 0.6730 | 0.6142 |
| large | LSG | yes | extended | Introduction to Lean Six Sigma Methods (scientific->business) | 3.0459 | 0.3514 | 0.0556 | 0.2973 | 0.2973 | 43.0000 | 0.7058 | 0.7156 | 0.6963 |
| base | LSG | yes | standard | Introduction to Nuclear Engineering and Ionizing Radiation (scientific->physics) | 3.6215 | 0.2494 | 0.0486 | 0.1517 | 0.1517 | 62.3333 | 0.5845 | 0.5970 | 0.5736 |
| base | LSG | yes | extended | Introduction to Nuclear Engineering and Ionizing Radiation (scientific->physics) | 3.6804 | 0.3058 | 0.0565 | 0.1866 | 0.1866 | 59.6667 | 0.6244 | 0.6423 | 0.6079 |
| large | LSG | yes | standard | Introduction to Nuclear Engineering and Ionizing Radiation (scientific->physics) | 3.3884 | 0.2401 | 0.0824 | 0.1383 | 0.1383 | 41.6667 | 0.5762 | 0.6143 | 0.5439 |
| large | LSG | yes | extended | Introduction to Nuclear Engineering and Ionizing Radiation (scientific->physics) | 3.4544 | 0.2681 | 0.0894 | 0.1661 | 0.1661 | 52.0000 | 0.6156 | 0.6461 | 0.5906 |
| base | LSG | yes | standard | Introduction to Political Philosophy with Steven B. Smith (humanities->philosophy) | 1.7925 | 0.4675 | 0.1968 | 0.3094 | 0.3094 | 90.5000 | 0.6767 | 0.6845 | 0.6700 |
| base | LSG | yes | extended | Introduction to Political Philosophy with Steven B. Smith (humanities->philosophy) | 1.7642 | 0.4634 | 0.1992 | 0.2907 | 0.2907 | 94.0000 | 0.6731 | 0.6770 | 0.6701 |
| large | LSG | yes | standard | Introduction to Political Philosophy with Steven B. Smith (humanities->philosophy) | 1.6685 | 0.4925 | 0.2378 | 0.3497 | 0.3497 | 67.5000 | 0.6784 | 0.6940 | 0.6636 |
| large | LSG | yes | extended | Introduction to Political Philosophy with Steven B. Smith (humanities->philosophy) | 1.6452 | 0.3987 | 0.1722 | 0.2418 | 0.2418 | 98.0000 | 0.6343 | 0.6218 | 0.6478 |
| base | LSG | yes | standard | Introduction to Psychology with Paul Bloom (humanities->psychology) | 2.8961 | 0.2288 | 0.0732 | 0.1803 | 0.1803 | 52.5000 | 0.6439 | 0.6520 | 0.6367 |
| base | LSG | yes | extended | Introduction to Psychology with Paul Bloom (humanities->psychology) | 2.9719 | 0.2937 | 0.0937 | 0.1944 | 0.1944 | 47.5000 | 0.6655 | 0.6649 | 0.6663 |
| large | LSG | yes | standard | Introduction to Psychology with Paul Bloom (humanities->psychology) | 2.8318 | 0.2641 | 0.0855 | 0.2107 | 0.2107 | 64.0000 | 0.6754 | 0.6683 | 0.6835 |
| large | LSG | yes | extended | Introduction to Psychology with Paul Bloom (humanities->psychology) | 2.8929 | 0.3406 | 0.0758 | 0.2206 | 0.2206 | 78.0000 | 0.6832 | 0.6657 | 0.7041 |
| base | LSG | yes | standard | Introduction to Theory of Literature with Paul H. Fry (humanities->literature) | 2.6767 | 0.4129 | 0.1669 | 0.2646 | 0.2646 | 110.3333 | 0.6427 | 0.6473 | 0.6391 |
| base | LSG | yes | extended | Introduction to Theory of Literature with Paul H. Fry (humanities->literature) | 2.5460 | 0.4307 | 0.1261 | 0.2680 | 0.2680 | 129.3333 | 0.6148 | 0.6071 | 0.6230 |
| large | LSG | yes | standard | Introduction to Theory of Literature with Paul H. Fry (humanities->literature) | 2.3512 | 0.4523 | 0.1648 | 0.2772 | 0.2772 | 100.3333 | 0.6546 | 0.6642 | 0.6453 |
| large | LSG | yes | extended | Introduction to Theory of Literature with Paul H. Fry (humanities->literature) | 2.2122 | 0.4024 | 0.1528 | 0.2553 | 0.2553 | 78.6667 | 0.6419 | 0.6621 | 0.6231 |
| base | LSG | yes | standard | Introduction to the Old Testament With Christine Hayes (humanities->literature) | 2.6205 | 0.4208 | 0.1579 | 0.2199 | 0.2199 | 116.0000 | 0.6440 | 0.6352 | 0.6534 |
| base | LSG | yes | extended | Introduction to the Old Testament With Christine Hayes (humanities->literature) | 2.7555 | 0.4358 | 0.1237 | 0.2370 | 0.2370 | 121.3333 | 0.6364 | 0.6299 | 0.6433 |
| large | LSG | yes | standard | Introduction to the Old Testament With Christine Hayes (humanities->literature) | 2.2813 | 0.3829 | 0.1215 | 0.2428 | 0.2428 | 103.0000 | 0.6294 | 0.6291 | 0.6300 |
| large | LSG | yes | extended | Introduction to the Old Testament With Christine Hayes (humanities->literature) | 2.4835 | 0.3913 | 0.1349 | 0.2382 | 0.2382 | 126.3333 | 0.6328 | 0.6219 | 0.6443 |
| base | LSG | yes | standard | Introductory Biology (scientific->biology) | 2.1546 | 0.2984 | 0.0905 | 0.2248 | 0.2248 | 50.5000 | 0.6848 | 0.6408 | 0.7353 |
| base | LSG | yes | extended | Introductory Biology (scientific->biology) | 2.2012 | 0.2292 | 0.0678 | 0.1636 | 0.1636 | 39.0000 | 0.7081 | 0.6883 | 0.7292 |
| large | LSG | yes | standard | Introductory Biology (scientific->biology) | 2.3647 | 0.2862 | 0.0922 | 0.1986 | 0.1986 | 55.0000 | 0.7127 | 0.6641 | 0.7689 |
| large | LSG | yes | extended | Introductory Biology (scientific->biology) | 2.3321 | 0.3353 | 0.1213 | 0.2184 | 0.2184 | 43.0000 | 0.6900 | 0.6672 | 0.7157 |
| base | LSG | yes | standard | Learn Differential Equations: Up Close with Gilbert Strang and Cleve Moler (scientific->mathematics) | 2.1683 | 0.3492 | 0.0984 | 0.2222 | 0.2222 | 44.0000 | 0.6397 | 0.6410 | 0.6385 |
| base | LSG | yes | extended | Learn Differential Equations: Up Close with Gilbert Strang and Cleve Moler (scientific->mathematics) | 2.1477 | 0.1739 | 0.0667 | 0.1304 | 0.1304 | 73.0000 | 0.6123 | 0.5767 | 0.6525 |
| large | LSG | yes | standard | Learn Differential Equations: Up Close with Gilbert Strang and Cleve Moler (scientific->mathematics) | 1.8841 | 0.2273 | 0.0465 | 0.1591 | 0.1591 | 75.0000 | 0.6088 | 0.5776 | 0.6434 |
| large | LSG | yes | extended | Learn Differential Equations: Up Close with Gilbert Strang and Cleve Moler (scientific->mathematics) | 2.1420 | 0.3333 | 0.0909 | 0.2000 | 0.2000 | 76.0000 | 0.5888 | 0.5417 | 0.6448 |
| base | LSG | yes | standard | Linear Algebra (scientific->mathematics) | 2.9983 | 0.2972 | 0.1055 | 0.1714 | 0.1714 | 48.5000 | 0.6483 | 0.6797 | 0.6197 |
| base | LSG | yes | extended | Linear Algebra (scientific->mathematics) | 2.8778 | 0.3026 | 0.1073 | 0.1906 | 0.1906 | 49.5000 | 0.6237 | 0.6548 | 0.5954 |
| large | LSG | yes | standard | Linear Algebra (scientific->mathematics) | 2.8587 | 0.3532 | 0.1092 | 0.2247 | 0.2247 | 57.5000 | 0.6402 | 0.6536 | 0.6276 |
| large | LSG | yes | extended | Linear Algebra (scientific->mathematics) | 2.7806 | 0.3879 | 0.1259 | 0.2146 | 0.2146 | 62.5000 | 0.6466 | 0.6704 | 0.6247 |
| base | LSG | yes | standard | Listening to Music with Craig Wright (humanities->arts) | 2.5340 | 0.3838 | 0.1449 | 0.3070 | 0.3070 | 80.3333 | 0.6470 | 0.6655 | 0.6298 |
| base | LSG | yes | extended | Listening to Music with Craig Wright (humanities->arts) | 2.6778 | 0.3533 | 0.0861 | 0.2047 | 0.2047 | 73.6667 | 0.6593 | 0.6806 | 0.6401 |
| large | LSG | yes | standard | Listening to Music with Craig Wright (humanities->arts) | 2.3880 | 0.4040 | 0.1053 | 0.2326 | 0.2326 | 67.6667 | 0.6773 | 0.6992 | 0.6579 |
| large | LSG | yes | extended | Listening to Music with Craig Wright (humanities->arts) | 2.4687 | 0.3701 | 0.1378 | 0.2482 | 0.2482 | 82.6667 | 0.6750 | 0.6830 | 0.6682 |
| base | LSG | yes | standard | Machine Learning for Healthcare (scientific->computer science) | 2.5855 | 0.3898 | 0.1119 | 0.2595 | 0.2595 | 47.3333 | 0.6825 | 0.6843 | 0.6823 |
| base | LSG | yes | extended | Machine Learning for Healthcare (scientific->computer science) | 2.6162 | 0.3816 | 0.1260 | 0.2712 | 0.2712 | 48.3333 | 0.6585 | 0.6771 | 0.6464 |
| large | LSG | yes | standard | Machine Learning for Healthcare (scientific->computer science) | 2.4821 | 0.4086 | 0.1586 | 0.2981 | 0.2981 | 50.3333 | 0.7050 | 0.7121 | 0.6990 |
| large | LSG | yes | extended | Machine Learning for Healthcare (scientific->computer science) | 2.4212 | 0.3515 | 0.0989 | 0.2378 | 0.2378 | 66.3333 | 0.6748 | 0.6646 | 0.6878 |
| base | LSG | yes | standard | Matrix Methods in Data Analysis, Signal Processing, and Machine Learning (scientific->mathematics) | 3.1131 | 0.3504 | 0.1198 | 0.2500 | 0.2500 | 56.7500 | 0.5965 | 0.6548 | 0.5479 |
| base | LSG | yes | extended | Matrix Methods in Data Analysis, Signal Processing, and Machine Learning (scientific->mathematics) | 3.1786 | 0.3326 | 0.1029 | 0.2408 | 0.2408 | 50.7500 | 0.6145 | 0.6812 | 0.5600 |
| large | LSG | yes | standard | Matrix Methods in Data Analysis, Signal Processing, and Machine Learning (scientific->mathematics) | 2.9979 | 0.3191 | 0.1217 | 0.1917 | 0.1917 | 40.0000 | 0.6117 | 0.7019 | 0.5420 |
| large | LSG | yes | extended | Matrix Methods in Data Analysis, Signal Processing, and Machine Learning (scientific->mathematics) | 2.9634 | 0.3712 | 0.1073 | 0.2254 | 0.2254 | 73.7500 | 0.6095 | 0.6530 | 0.5717 |
| base | LSG | yes | standard | Modern Poetry with Langdon Hammer  (ENGL 310) (humanities->literature) | 2.7720 | 0.3966 | 0.1098 | 0.2204 | 0.2204 | 119.0000 | 0.6238 | 0.6127 | 0.6370 |
| base | LSG | yes | extended | Modern Poetry with Langdon Hammer  (ENGL 310) (humanities->literature) | 2.7673 | 0.3684 | 0.1246 | 0.2157 | 0.2157 | 132.5000 | 0.6175 | 0.6094 | 0.6261 |
| large | LSG | yes | standard | Modern Poetry with Langdon Hammer  (ENGL 310) (humanities->literature) | 2.5714 | 0.4405 | 0.1585 | 0.2502 | 0.2502 | 124.7500 | 0.6530 | 0.6428 | 0.6642 |
| large | LSG | yes | extended | Modern Poetry with Langdon Hammer  (ENGL 310) (humanities->literature) | 2.5594 | 0.4262 | 0.1211 | 0.2476 | 0.2476 | 106.0000 | 0.6598 | 0.6581 | 0.6618 |
| base | LSG | yes | standard | Performance Engineering of Software Systems (scientific->computer science) | 2.9121 | 0.3004 | 0.0871 | 0.2315 | 0.2315 | 45.7500 | 0.6581 | 0.6544 | 0.6636 |
| base | LSG | yes | extended | Performance Engineering of Software Systems (scientific->computer science) | 3.3304 | 0.2638 | 0.0643 | 0.1844 | 0.1844 | 45.7500 | 0.6695 | 0.6720 | 0.6681 |
| large | LSG | yes | standard | Performance Engineering of Software Systems (scientific->computer science) | 3.6543 | 0.3217 | 0.1165 | 0.2256 | 0.2256 | 54.5000 | 0.6647 | 0.6518 | 0.6800 |
| large | LSG | yes | extended | Performance Engineering of Software Systems (scientific->computer science) | 3.7981 | 0.2592 | 0.0923 | 0.1941 | 0.1941 | 62.5000 | 0.6665 | 0.6546 | 0.6803 |
| base | LSG | yes | standard | Philosophy and the Science of Human Nature w/ Tamar Gendler (humanities->philosophy) | 2.8638 | 0.3497 | 0.1130 | 0.2175 | 0.2175 | 89.5000 | 0.6201 | 0.6554 | 0.5908 |
| base | LSG | yes | extended | Philosophy and the Science of Human Nature w/ Tamar Gendler (humanities->philosophy) | 2.8187 | 0.3523 | 0.1040 | 0.2191 | 0.2176 | 90.7500 | 0.6211 | 0.6467 | 0.5976 |
| large | LSG | yes | standard | Philosophy and the Science of Human Nature w/ Tamar Gendler (humanities->philosophy) | 2.4762 | 0.3570 | 0.1429 | 0.2377 | 0.2381 | 76.7500 | 0.6510 | 0.7031 | 0.6079 |
| large | LSG | yes | extended | Philosophy and the Science of Human Nature w/ Tamar Gendler (humanities->philosophy) | 2.5257 | 0.3646 | 0.1144 | 0.2171 | 0.2156 | 124.0000 | 0.6423 | 0.6523 | 0.6342 |
| base | LSG | yes | standard | Power and Politics in Today’s World (humanities->politics) | 2.7255 | 0.4858 | 0.2195 | 0.3513 | 0.3513 | 69.0000 | 0.6929 | 0.7275 | 0.6643 |
| base | LSG | yes | extended | Power and Politics in Today’s World (humanities->politics) | 2.6380 | 0.4115 | 0.2185 | 0.3132 | 0.3132 | 63.5000 | 0.6826 | 0.7272 | 0.6491 |
| large | LSG | yes | standard | Power and Politics in Today’s World (humanities->politics) | 2.4389 | 0.5652 | 0.2899 | 0.3465 | 0.3465 | 72.0000 | 0.7085 | 0.7247 | 0.6931 |
| large | LSG | yes | extended | Power and Politics in Today’s World (humanities->politics) | 2.3452 | 0.4914 | 0.2195 | 0.3103 | 0.3103 | 91.5000 | 0.6685 | 0.6702 | 0.6689 |
| base | LSG | yes | standard | Psychology and Economics (humanities->economics) | 2.9397 | 0.1702 | 0.0889 | 0.1702 | 0.1702 | 34.0000 | 0.6273 | 0.6170 | 0.6381 |
| base | LSG | yes | extended | Psychology and Economics (humanities->economics) | 2.8573 | 0.2353 | 0.0816 | 0.1961 | 0.1961 | 38.0000 | 0.6305 | 0.6155 | 0.6463 |
| large | LSG | yes | standard | Psychology and Economics (humanities->economics) | 2.9062 | 0.1538 | 0.0635 | 0.1538 | 0.1538 | 51.0000 | 0.6468 | 0.6145 | 0.6828 |
| large | LSG | yes | extended | Psychology and Economics (humanities->economics) | 3.0710 | 0.1846 | 0.0635 | 0.1846 | 0.1846 | 54.0000 | 0.6388 | 0.6094 | 0.6711 |
| base | LSG | yes | standard | Real Analysis (scientific->mathematics) | 2.4597 | 0.3678 | 0.1474 | 0.2383 | 0.2383 | 50.7500 | 0.6379 | 0.6289 | 0.6472 |
| base | LSG | yes | extended | Real Analysis (scientific->mathematics) | 2.5141 | 0.3521 | 0.0874 | 0.1999 | 0.1999 | 48.7500 | 0.6139 | 0.6045 | 0.6238 |
| large | LSG | yes | standard | Real Analysis (scientific->mathematics) | 2.6114 | 0.3968 | 0.1575 | 0.2526 | 0.2526 | 53.0000 | 0.6460 | 0.6301 | 0.6630 |
| large | LSG | yes | extended | Real Analysis (scientific->mathematics) | 2.5220 | 0.3675 | 0.1430 | 0.2627 | 0.2626 | 69.5000 | 0.6322 | 0.6045 | 0.6639 |
| base | LSG | yes | standard | Roman Architecture with Diana E. E. Kleiner (humanities->history) | 2.6927 | 0.4693 | 0.1309 | 0.1949 | 0.1949 | 165.0000 | 0.6463 | 0.6620 | 0.6312 |
| base | LSG | yes | extended | Roman Architecture with Diana E. E. Kleiner (humanities->history) | 2.7218 | 0.4462 | 0.1318 | 0.2231 | 0.2231 | 152.0000 | 0.6381 | 0.6601 | 0.6175 |
| large | LSG | yes | standard | Roman Architecture with Diana E. E. Kleiner (humanities->history) | 2.2007 | 0.3710 | 0.1370 | 0.2262 | 0.2262 | 90.0000 | 0.6405 | 0.6864 | 0.6003 |
| large | LSG | yes | extended | Roman Architecture with Diana E. E. Kleiner (humanities->history) | 2.2738 | 0.3474 | 0.1422 | 0.2254 | 0.2254 | 80.0000 | 0.6318 | 0.6893 | 0.5832 |
| base | LSG | yes | standard | Sensory Systems (scientific->biology) | 2.2363 | 0.4012 | 0.2003 | 0.3281 | 0.3281 | 46.0000 | 0.7180 | 0.6976 | 0.7405 |
| base | LSG | yes | extended | Sensory Systems (scientific->biology) | 2.0445 | 0.4537 | 0.2414 | 0.3356 | 0.3356 | 47.5000 | 0.6995 | 0.6760 | 0.7271 |
| large | LSG | yes | standard | Sensory Systems (scientific->biology) | 1.9892 | 0.5041 | 0.2754 | 0.4274 | 0.4274 | 53.0000 | 0.7432 | 0.7074 | 0.7839 |
| large | LSG | yes | extended | Sensory Systems (scientific->biology) | 2.1022 | 0.4336 | 0.2235 | 0.3709 | 0.3709 | 60.0000 | 0.7199 | 0.6846 | 0.7634 |
| base | LSG | yes | standard | Signals and Systems (scientific->engineering) | 3.1067 | 0.2858 | 0.0493 | 0.1835 | 0.1835 | 48.5000 | 0.6246 | 0.6253 | 0.6246 |
| base | LSG | yes | extended | Signals and Systems (scientific->engineering) | 3.1429 | 0.2626 | 0.0392 | 0.1755 | 0.1755 | 54.2500 | 0.6168 | 0.6120 | 0.6221 |
| large | LSG | yes | standard | Signals and Systems (scientific->engineering) | 3.1638 | 0.3732 | 0.1347 | 0.2716 | 0.2679 | 48.5000 | 0.6855 | 0.6905 | 0.6810 |
| large | LSG | yes | extended | Signals and Systems (scientific->engineering) | 3.3391 | 0.2843 | 0.0367 | 0.1728 | 0.1728 | 69.7500 | 0.6321 | 0.6232 | 0.6416 |
| base | LSG | yes | standard | String Theory and Holographic Duality (scientific->physics) | 2.6239 | 0.3750 | 0.1670 | 0.3065 | 0.3072 | 45.6667 | 0.6871 | 0.7055 | 0.6702 |
| base | LSG | yes | extended | String Theory and Holographic Duality (scientific->physics) | 2.6410 | 0.3859 | 0.1727 | 0.3166 | 0.3207 | 49.5000 | 0.6790 | 0.6924 | 0.6675 |
| large | LSG | yes | standard | String Theory and Holographic Duality (scientific->physics) | 2.6126 | 0.4466 | 0.2167 | 0.3539 | 0.3542 | 56.5000 | 0.6929 | 0.7036 | 0.6841 |
| large | LSG | yes | extended | String Theory and Holographic Duality (scientific->physics) | 2.5879 | 0.4483 | 0.1987 | 0.3547 | 0.3522 | 65.3333 | 0.7034 | 0.6986 | 0.7087 |
| base | LSG | yes | standard | The American Novel Since 1945 with Amy Hungerford (humanities->literature) | 3.2066 | 0.4057 | 0.1143 | 0.2264 | 0.2264 | 124.0000 | 0.5819 | 0.5916 | 0.5726 |
| base | LSG | yes | extended | The American Novel Since 1945 with Amy Hungerford (humanities->literature) | 3.1838 | 0.3592 | 0.0392 | 0.1845 | 0.1845 | 118.0000 | 0.5787 | 0.5867 | 0.5709 |
| large | LSG | yes | standard | The American Novel Since 1945 with Amy Hungerford (humanities->literature) | 2.8534 | 0.3600 | 0.0806 | 0.2320 | 0.2320 | 167.0000 | 0.5890 | 0.5932 | 0.5849 |
| large | LSG | yes | extended | The American Novel Since 1945 with Amy Hungerford (humanities->literature) | 2.7177 | 0.4186 | 0.1221 | 0.2326 | 0.2326 | 132.0000 | 0.6300 | 0.6411 | 0.6193 |
| base | LSG | yes | standard | The American Revolution with Joanne B. Freeman (humanities->history) | 2.3484 | 0.4386 | 0.1835 | 0.2589 | 0.2589 | 109.6667 | 0.6591 | 0.6495 | 0.6705 |
| base | LSG | yes | extended | The American Revolution with Joanne B. Freeman (humanities->history) | 2.4887 | 0.4048 | 0.1408 | 0.2249 | 0.2249 | 108.0000 | 0.6482 | 0.6412 | 0.6570 |
| large | LSG | yes | standard | The American Revolution with Joanne B. Freeman (humanities->history) | 2.0427 | 0.4788 | 0.2323 | 0.3343 | 0.3343 | 104.6667 | 0.6938 | 0.6963 | 0.6945 |
| large | LSG | yes | extended | The American Revolution with Joanne B. Freeman (humanities->history) | 2.1491 | 0.3463 | 0.1080 | 0.2518 | 0.2518 | 99.6667 | 0.6341 | 0.6350 | 0.6377 |
| base | LSG | yes | standard | The Battlecode Programming Competition (scientific->computer science) | 2.9875 | 0.3103 | 0.0357 | 0.2414 | 0.2414 | 50.0000 | 0.6535 | 0.6350 | 0.6730 |
| base | LSG | yes | extended | The Battlecode Programming Competition (scientific->computer science) | 2.9582 | 0.2500 | 0.0857 | 0.2222 | 0.2222 | 57.0000 | 0.6314 | 0.6172 | 0.6462 |
| large | LSG | yes | standard | The Battlecode Programming Competition (scientific->computer science) | 3.1467 | 0.4068 | 0.0351 | 0.3051 | 0.3051 | 41.0000 | 0.6804 | 0.6770 | 0.6839 |
| large | LSG | yes | extended | The Battlecode Programming Competition (scientific->computer science) | 2.9393 | 0.3934 | 0.1695 | 0.2951 | 0.2951 | 46.0000 | 0.7151 | 0.7220 | 0.7083 |
| base | LSG | yes | standard | The Civil War and Reconstruction with David Blight (humanities->history) | 3.2856 | 0.3871 | 0.1101 | 0.2086 | 0.2086 | 117.5000 | 0.6231 | 0.6382 | 0.6090 |
| base | LSG | yes | extended | The Civil War and Reconstruction with David Blight (humanities->history) | 3.1848 | 0.3806 | 0.1081 | 0.2319 | 0.2319 | 93.5000 | 0.6381 | 0.6676 | 0.6113 |
| large | LSG | yes | standard | The Civil War and Reconstruction with David Blight (humanities->history) | 2.6342 | 0.3971 | 0.1233 | 0.2376 | 0.2376 | 163.5000 | 0.6230 | 0.6172 | 0.6294 |
| large | LSG | yes | extended | The Civil War and Reconstruction with David Blight (humanities->history) | 2.6005 | 0.4411 | 0.1169 | 0.2241 | 0.2241 | 163.5000 | 0.6191 | 0.6154 | 0.6234 |
| base | LSG | yes | standard | The Film Experience (humanities->arts) | 2.8976 | 0.2689 | 0.0797 | 0.2039 | 0.2039 | 79.0000 | 0.6285 | 0.6028 | 0.6567 |
| base | LSG | yes | extended | The Film Experience (humanities->arts) | 2.9298 | 0.3532 | 0.1074 | 0.2092 | 0.2092 | 57.5000 | 0.6597 | 0.6416 | 0.6789 |
| large | LSG | yes | standard | The Film Experience (humanities->arts) | 2.5897 | 0.2278 | 0.0501 | 0.1519 | 0.1519 | 91.5000 | 0.6299 | 0.6051 | 0.6591 |
| large | LSG | yes | extended | The Film Experience (humanities->arts) | 2.6426 | 0.1732 | 0.0502 | 0.1386 | 0.1386 | 141.0000 | 0.5926 | 0.5517 | 0.6411 |
| base | LSG | yes | standard | The Moral Foundations of Politics with Ian Shapiro (humanities->philosophy) | 3.0639 | 0.1846 | 0.0310 | 0.1154 | 0.1154 | 57.0000 | 0.5344 | 0.6128 | 0.4739 |
| base | LSG | yes | extended | The Moral Foundations of Politics with Ian Shapiro (humanities->philosophy) | 3.1419 | 0.2214 | 0.0432 | 0.1357 | 0.1357 | 80.0000 | 0.5427 | 0.6036 | 0.4929 |
| large | LSG | yes | standard | The Moral Foundations of Politics with Ian Shapiro (humanities->philosophy) | 2.6708 | 0.2426 | 0.0593 | 0.1471 | 0.1471 | 70.0000 | 0.5326 | 0.6035 | 0.4766 |
| large | LSG | yes | extended | The Moral Foundations of Politics with Ian Shapiro (humanities->philosophy) | 2.8570 | 0.2847 | 0.0629 | 0.1597 | 0.1597 | 90.0000 | 0.5429 | 0.6169 | 0.4847 |
| base | LSG | yes | standard | Theory of Computation (scientific->computer science) | 2.4514 | 0.3874 | 0.1487 | 0.2583 | 0.2583 | 44.0000 | 0.5774 | 0.6209 | 0.5409 |
| base | LSG | yes | extended | Theory of Computation (scientific->computer science) | 2.4692 | 0.5023 | 0.1727 | 0.3690 | 0.3690 | 55.5000 | 0.6487 | 0.6743 | 0.6264 |
| large | LSG | yes | standard | Theory of Computation (scientific->computer science) | 2.5098 | 0.4860 | 0.2224 | 0.3688 | 0.3688 | 42.5000 | 0.6540 | 0.6962 | 0.6173 |
| large | LSG | yes | extended | Theory of Computation (scientific->computer science) | 2.3319 | 0.5005 | 0.2448 | 0.3658 | 0.3658 | 59.0000 | 0.6466 | 0.6721 | 0.6231 |
| base | LSG | yes | standard | Timothy Snyder: The Making of Modern Ukraine (humanities->history) | 2.5847 | 0.2023 | 0.0312 | 0.1450 | 0.1450 | 61.5000 | 0.6696 | 0.6172 | 0.7318 |
| base | LSG | yes | extended | Timothy Snyder: The Making of Modern Ukraine (humanities->history) | 2.7213 | 0.1878 | 0.0263 | 0.1349 | 0.1349 | 66.0000 | 0.6270 | 0.5700 | 0.6968 |
| large | LSG | yes | standard | Timothy Snyder: The Making of Modern Ukraine (humanities->history) | 2.3088 | 0.2771 | 0.0800 | 0.2216 | 0.2216 | 45.0000 | 0.6440 | 0.6019 | 0.6924 |
| large | LSG | yes | extended | Timothy Snyder: The Making of Modern Ukraine (humanities->history) | 2.3714 | 0.3409 | 0.1043 | 0.2625 | 0.2625 | 38.0000 | 0.7216 | 0.6866 | 0.7603 |
| base | LSG | yes | standard | Yale University's Lectures: The Early Middle Ages, 284-1000 (humanities->history) | 3.0269 | 0.3590 | 0.1058 | 0.2393 | 0.2393 | 86.0000 | 0.6057 | 0.6471 | 0.5704 |
| base | LSG | yes | extended | Yale University's Lectures: The Early Middle Ages, 284-1000 (humanities->history) | 3.0343 | 0.3454 | 0.0894 | 0.2142 | 0.2142 | 89.6667 | 0.6053 | 0.6440 | 0.5719 |
| large | LSG | yes | standard | Yale University's Lectures: The Early Middle Ages, 284-1000 (humanities->history) | 2.6272 | 0.3010 | 0.1074 | 0.2018 | 0.2018 | 77.0000 | 0.5898 | 0.6353 | 0.5513 |
| large | LSG | yes | extended | Yale University's Lectures: The Early Middle Ages, 284-1000 (humanities->history) | 2.6247 | 0.3593 | 0.1130 | 0.2269 | 0.2269 | 109.3333 | 0.6335 | 0.6610 | 0.6084 |

---

##### **Considering the complete dataset as the test set (-> larger sample number at the cost of a large bias)**

**Table 3b: macrocategories test set**
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | humanities | 1.9307 | 0.3809 | 0.1367 | 0.2401 | 0.2401 | 100.7608 | 0.6278 | 0.6379 | 0.6202 |
| base | LSG | yes | extended | humanities | 1.9204 | 0.3794 | 0.133 | 0.2382 | 0.2383 | 99.9019 | 0.6275 | 0.6378 | 0.6197 |
| large | LSG | yes | standard | humanities | 1.5741 | 0.4005 | 0.1665 | 0.2668 | 0.2669 | 97.8888 | 0.6483 | 0.6646 | 0.6356 |
| large | LSG | yes | extended | humanities | 1.594 | 0.402 | 0.161 | 0.2624 | 0.2622 | 105.9761 | 0.646 | 0.6559 | 0.6392 |
| base | LSG | yes | standard | scientific | 1.6957 | 0.3668 | 0.1588 | 0.2699 | 0.2695 | 54.8768 | 0.6583 | 0.6651 | 0.6547 |
| base | LSG | yes | extended | scientific | 1.6853 | 0.3608 | 0.1534 | 0.2651 | 0.2645 | 55.0254 | 0.6562 | 0.6629 | 0.6528 |
| large | LSG | yes | standard | scientific | 1.2628 | 0.4441 | 0.2463 | 0.349 | 0.3491 | 54.1954 | 0.7 | 0.7093 | 0.6949 |
| large | LSG | yes | extended | scientific | 1.2874 | 0.4398 | 0.2339 | 0.3403 | 0.3398 | 61.1339 | 0.6945 | 0.6964 | 0.6961 |

**Table 3c: writing frequency test set**
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | frequent writing | X | 0.3477 | 0.1449 | 0.2506 | 0.2513 | 55.1727 | 0.6399 | 0.6461 | 0.6377 |
| base | LSG | yes | extended | frequent writing | X | 0.3467 | 0.1421 | 0.2512 | 0.2512 | 56.2188 | 0.6393 | 0.6447 | 0.6382 |
| large | LSG | yes | standard | frequent writing | X | 0.4321 | 0.2334 | 0.3359 | 0.3353 | 56.1977 | 0.6839 | 0.6912 | 0.6816 |
| large | LSG | yes | extended | frequent writing | X | 0.4292 | 0.2266 | 0.3297 | 0.3294 | 60.9155 | 0.6803 | 0.6815 | 0.6836 |
| base | LSG | yes | standard | medium frequent writing | X | 0.3684 | 0.1442 | 0.2545 | 0.2539 | 73.5087 | 0.6511 | 0.6597 | 0.6453 |
| base | LSG | yes | extended | medium frequent writing | X | 0.3638 | 0.1373 | 0.2486 | 0.2487 | 71.3424 | 0.6510 | 0.6615 | 0.6510 |
| large | LSG | yes | standard | medium frequent writing | X | 0.4122 | 0.2004 | 0.3034 | 0.3040 | 66.4836 | 0.6813 | 0.6974 | 0.6687 |
| large | LSG | yes | extended | medium frequent writing | X | 0.4090 | 0.1888 | 0.2958 | 0.2960 | 74.1683 | 0.6759 | 0.6843 | 0.6705 |
| base | LSG | yes | standard | infrequent writing | X | 0.4050 | 0.1518 | 0.2570 | 0.2569 | 104.3706 | 0.6358 | 0.6466 | 0.6271 |
| base | LSG | yes | extended | infrequent writing | X | 0.4001 | 0.1475 | 0.2523 | 0.2523 | 104.3009 | 0.6332 | 0.6430 | 0.6252 |
| large | LSG | yes | standard | infrequent writing | X | 0.4203 | 0.1806 | 0.2792 | 0.2789 | 103.2422 | 0.6538 | 0.6693 | 0.6414 |
| large | LSG | yes | extended | infrequent writing | X | 0.4210 | 0.1712 | 0.2730 | 0.2730 | 112.2862 | 0.6512 | 0.6600 | 0.6449 |

**Table 3d: categories test set**  
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| base | LSG | yes | standard | arts | X | 0.3577 | 0.1343 | 0.2320 | 0.2315 | 81.3913 | 0.6441 | 0.6424 | 0.6490 |
| base | LSG | yes | extended | arts | X | 0.3563 | 0.1128 | 0.2222 | 0.2222 | 80.1739 | 0.6397 | 0.6375 | 0.6443 |
| large | LSG | yes | standard | arts | X | 0.4032 | 0.1847 | 0.2881 | 0.2895 | 64.3695 | 0.6799 | 0.6968 | 0.6669 |
| base | LSG | yes | standard | biology | X | 0.4687 | 0.2744 | 0.3804 | 0.3808 | 49.0480 | 0.7309 | 0.7207 | 0.7434 |
| base | LSG | yes | extended | biology | X | 0.4621 | 0.2584 | 0.3754 | 0.3759 | 48.2692 | 0.7279 | 0.7189 | 0.7394 |
| large | LSG | yes | standard | biology | X | 0.5845 | 0.4082 | 0.5085 | 0.5089 | 47.7211 | 0.7891 | 0.7806 | 0.8000 |
| base | LSG | yes | standard | business | X | 0.3168 | 0.1601 | 0.2540 | 0.2537 | 46.9565 | 0.6500 | 0.6317 | 0.6746 |
| base | LSG | yes | extended | business | X | 0.3546 | 0.1853 | 0.2924 | 0.2900 | 47.6521 | 0.6647 | 0.6446 | 0.6920 |
| large | LSG | yes | standard | business | X | 0.4432 | 0.3020 | 0.4000 | 0.3974 | 46.6086 | 0.7189 | 0.6999 | 0.7439 |
| base | LSG | yes | standard | chemistry | X | 0.4460 | 0.2032 | 0.3292 | 0.3279 | 57.9230 | 0.7116 | 0.7090 | 0.7151 |
| base | LSG | yes | extended | chemistry | X | 0.4668 | 0.2322 | 0.3458 | 0.3423 | 51.6153 | 0.7291 | 0.7404 | 0.7189 |
| large | LSG | yes | standard | chemistry | X | 0.5503 | 0.3333 | 0.4401 | 0.4434 | 56.6153 | 0.7676 | 0.7746 | 0.7620 |
| base | LSG | yes | standard | computer science | X | 0.3463 | 0.1421 | 0.2603 | 0.2601 | 51.2885 | 0.6580 | 0.6606 | 0.6576 |
| base | LSG | yes | extended | computer science | X | 0.3389 | 0.1348 | 0.2553 | 0.2546 | 49.7114 | 0.6559 | 0.6596 | 0.6543 |
| large | LSG | yes | standard | computer science | X | 0.4558 | 0.2596 | 0.3683 | 0.3679 | 49.3830 | 0.7123 | 0.7185 | 0.7086 |
| base | LSG | yes | standard | economics | X | 0.3593 | 0.1309 | 0.2403 | 0.2394 | 82.8282 | 0.6340 | 0.6424 | 0.6282 |
| base | LSG | yes | extended | economics | X | 0.3469 | 0.1210 | 0.2268 | 0.2268 | 81.1212 | 0.6329 | 0.6441 | 0.6252 |
| large | LSG | yes | standard | economics | X | 0.4100 | 0.1939 | 0.3007 | 0.2993 | 81.9090 | 0.6693 | 0.6813 | 0.6609 |
| base | LSG | yes | standard | engineering | X | 0.3128 | 0.0983 | 0.2121 | 0.2121 | 61.7129 | 0.6347 | 0.6496 | 0.6237 |
| base | LSG | yes | extended | engineering | X | 0.2922 | 0.0836 | 0.1959 | 0.1966 | 62.1666 | 0.6269 | 0.6401 | 0.6175 |
| large | LSG | yes | standard | engineering | X | 0.3588 | 0.1538 | 0.2531 | 0.2536 | 59.4629 | 0.6653 | 0.6823 | 0.6524 |
| base | LSG | yes | standard | history | X | 0.3834 | 0.1352 | 0.2324 | 0.2327 | 115.5991 | 0.6241 | 0.6325 | 0.6185 |
| base | LSG | yes | extended | history | X | 0.3830 | 0.1342 | 0.2355 | 0.2357 | 112.7024 | 0.6241 | 0.6341 | 0.6173 |
| large | LSG | yes | standard | history | X | 0.3953 | 0.1605 | 0.2591 | 0.2590 | 113.3677 | 0.6429 | 0.6564 | 0.6341 |
| base | LSG | yes | standard | literature | X | 0.4257 | 0.1550 | 0.2597 | 0.2599 | 119.4219 | 0.6279 | 0.6382 | 0.6189 |
| base | LSG | yes | extended | literature | X | 0.4294 | 0.1585 | 0.2601 | 0.2601 | 121.2658 | 0.6291 | 0.6378 | 0.6215 |
| large | LSG | yes | standard | literature | X | 0.4324 | 0.1691 | 0.2681 | 0.2684 | 114.9710 | 0.6414 | 0.6584 | 0.6269 |
| base | LSG | yes | standard | mathematics | X | 0.3628 | 0.1539 | 0.2590 | 0.2591 | 51.6063 | 0.6386 | 0.6479 | 0.6341 |
| base | LSG | yes | extended | mathematics | X | 0.3593 | 0.1518 | 0.2576 | 0.2575 | 51.6223 | 0.6382 | 0.6475 | 0.6341 |
| large | LSG | yes | standard | mathematics | X | 0.4240 | 0.2254 | 0.3288 | 0.3285 | 51.1968 | 0.6745 | 0.6838 | 0.6716 |
| base | LSG | yes | standard | pilosophy | X | 0.3914 | 0.1396 | 0.2360 | 0.2357 | 102.3168 | 0.6218 | 0.6344 | 0.6116 |
| base | LSG | yes | extended | philosophy | X | 0.3817 | 0.1281 | 0.2304 | 0.2298 | 100.1782 | 0.6185 | 0.6304 | 0.6087 |
| large | LSG | yes | standard | philosophy | X | 0.3907 | 0.1535 | 0.2474 | 0.2477 | 91.3465 | 0.6359 | 0.6579 | 0.6178 |
| base | LSG | yes | standard | physics | X | 0.3645 | 0.1383 | 0.2510 | 0.2506 | 59.5954 | 0.6451 | 0.6627 | 0.6302 |
| base | LSG | yes | extended | physics | X | 0.3571 | 0.1336 | 0.2413 | 0.2411 | 63.3129 | 0.6409 | 0.6562 | 0.6281 |
| large | LSG | yes | standard | physics | X | 0.3985 | 0.1775 | 0.2771 | 0.2770 | 58.6564 | 0.6662 | 0.6894 | 0.6470 |
| base | LSG | yes | standard | politics | X | 0.4285 | 0.2007 | 0.3162 | 0.3167 | 73.1836 | 0.6875 | 0.7009 | 0.6779 |
| base | LSG | yes | extended | politics | X | 0.4225 | 0.2011 | 0.3178 | 0.3192 | 67.1020 | 0.6832 | 0.6977 | 0.6724 |
| large | LSG | yes | standard | politics | X | 0.4826 | 0.2694 | 0.3690 | 0.3693 | 64.4693 | 0.7143 | 0.6975 | 0.6470 |
| base | LSG | yes | standard | psychology | X | 0.3720 | 0.1398 | 0.2571 | 0.2568 | 62.95 | 0.6682 | 0.6842 | 0.6544 |
| base | LSG | yes | extended | psychology | X | 0.3509 | 0.1203 | 0.2263 | 0.2267 | 68.4 | 0.6540 | 0.6597 | 0.6497 |
| large | LSG | yes | standard | psychology | X | 0.3689 | 0.1518 | 0.2580 | 0.2569 | 59.1 | 0.6703 | 0.6899 | 0.6546 |
| base | LSG | yes | standard | social studies | X | 0.2922 | 0.0747 | 0.1807 | 0.1804 | 75.9529 | 0.5761 | 0.6000 | 0.5554 |
| base | LSG | yes | extended | social studies | X | 0.3109 | 0.0762 | 0.1886 | 0.1881 | 79.7647 | 0.5849 | 0.6084 | 0.5643 |
| large | LSG | yes | standard | social studies | X | 0.3092 | 0.0868 | 0.1905 | 0.1901 | 88.5647 | 0.5925 | 0.6183 | 0.5702 |

| large | LSG | yes | extended | arts | X | 0.3799 | 0.1580 | 0.2620 | 0.2633 | 77.3695 | 0.6621 | 0.6672 | 0.6608 |
| large | LSG | yes | extended | biology | X | 0.5567 | 0.3784 | 0.4812 | 0.4825 | 51.0673 | 0.7742 | 0.7592 | 0.7923 |
| large | LSG | yes | extended | business | X | 0.4577 | 0.3157 | 0.4206 | 0.4189 | 46.2173 | 0.7236 | 0.7058 | 0.7464 |
| large | LSG | yes | extended | chemistry | X | 0.4574 | 0.2223 | 0.3517 | 0.3520 | 69.4615 | 0.7228 | 0.7187 | 0.7275 |
| large | LSG | yes | extended | computer science | X | 0.4503 | 0.2485 | 0.3588 | 0.3585 | 56.5621 | 0.7094 | 0.7073 | 0.7139 |
| large | LSG | yes | extended | economics | X | 0.3830 | 0.1669 | 0.2726 | 0.2716 | 87.9393 | 0.6579 | 0.6663 | 0.6534 |
| large | LSG | yes | extended | engineering | X | 0.3571 | 0.1461 | 0.2526 | 0.2521 | 66.1296 | 0.6634 | 0.6761 | 0.6537 |
| large | LSG | yes | extended | history | X | 0.4017 | 0.1597 | 0.2572 | 0.2571 | 118.5247 | 0.6443 | 0.6544 | 0.6386 |
| large | LSG | yes | extended | literature | X | 0.4297 | 0.1602 | 0.2626 | 0.2629 | 121.7109 | 0.6391 | 0.6511 | 0.6290 |
| large | LSG | yes | extended | mathematics | X | 0.4286 | 0.2226 | 0.3200 | 0.3201 | 57.4042 | 0.6698 | 0.6717 | 0.6737 |
| large | LSG | yes | extended | philosophy | X | 0.4060 | 0.1547 | 0.2514 | 0.2510 | 106.9702 | 0.6310 | 0.6418 | 0.6225 |

**Table 3e: courses test set**
| Model Size | Model Type | Domain Adaptation | Finetuning Training Set | Test Set | Test Loss | ROUGE1 | ROUGE2 | ROUGEL | ROUGELSUM | Generation Length | BERT-score F1 | BERT-score Precision | BERT-score Recall |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|