In [2]:
# imports
import json
import matplotlib.pyplot as plt
import numpy as np
import os
from transformers import AutoTokenizer
from experiment_lib import *


In [13]:
# define experiment constants

# results are saved in this directory
ROOT_DIR = "tmp/"
# the name of the experiment
EXPERIMENT_NAME = "nl-100-100-1.3B"
# path where the split datasets are stored
SOURCE_DIR = "./datasets"
# file name of the (split) tokenized version of the dataset
SOURCE_FILE = "train_dataset.npy"
# number of trials to run in the experiment
NUM_TRIALS = 100
# language of the setup
LANGUAGE = "nl"
# split of the dataset to use
SPLIT = "train"
# path to the dataset raw files
DATASET_DIR = "europarl"
# path to the dataset files
DATASET_FILE = "europarl-v7.nl-en.nl"

# sequence parameters
SUFFIX_LEN = 50
PREFIX_LEN = 50
EXAMPLE_TOKEN_LEN = 100
PREPREFIX_LEN = 0

BATCH_SIZE = 32

MODEL = "EleutherAI/gpt-neo-1.3B"

# create config.json from constants
config = {
    "root_dir": ROOT_DIR,
    "experiment_name": EXPERIMENT_NAME,
    "dataset_dir": DATASET_DIR,
    "dataset_file": DATASET_FILE,
    "num_trials": NUM_TRIALS,
    "language": LANGUAGE,
    "split": SPLIT,
    "suffix_len": SUFFIX_LEN,
    "prefix_len": PREFIX_LEN,
    "example_token_len": EXAMPLE_TOKEN_LEN,
    "preprefix_len": PREPREFIX_LEN,
    "source_dir": SOURCE_DIR,
    "source_file": SOURCE_FILE,
    "batch_size": BATCH_SIZE,
    "model": MODEL
}

with open("config.json", "w") as f:
    json.dump(config, f, indent=4)

In [8]:
# 1. Split the examples into two parts: prefix and suffix (and preprefix)

# assumption: data is prepared in jsonlines format with sentences all of desired token length
!python split_dataset.py --config_file exp-configs/EMEA/config-2.7B-en.json

2024-06-08 14:30:07,630 - INFO
===== Starting dataset token split generation for language en with token length 100 =====
2024-06-08 14:30:07,630 - INFO
Opened file: EMEA/EMEA-c-100.en-train.jsonl
2024-06-08 14:30:07,639 - INFO
Processed 32 lines
2024-06-08 14:30:07,647 - INFO
Processed 64 lines
2024-06-08 14:30:07,654 - INFO
Processed 96 lines
2024-06-08 14:30:07,661 - INFO
Processed 128 lines
2024-06-08 14:30:07,668 - INFO
Processed 160 lines
2024-06-08 14:30:07,675 - INFO
Processed 192 lines
2024-06-08 14:30:07,681 - INFO
Processed 224 lines
2024-06-08 14:30:07,687 - INFO
Processed 256 lines
2024-06-08 14:30:07,693 - INFO
Processed 288 lines
2024-06-08 14:30:07,699 - INFO
Processed 320 lines
2024-06-08 14:30:07,706 - INFO
Processed 352 lines
2024-06-08 14:30:07,712 - INFO
Processed 384 lines
2024-06-08 14:30:07,718 - INFO
Processed 416 lines
2024-06-08 14:30:07,724 - INFO
Processed 448 lines
2024-06-08 14:30:07,729 - INFO
Processed 480 lines
2024-06-08 14:30:07,736 - INFO
Processed 5

In [1]:
# 2. Generate model output using prefixes as prompts

# run this directly in terminal, model cannot be loaded in notebook due to memory constraints I think, crashes kernel

# will not overwrite results
!python extraction.py --config_file config.json

2024-05-30 23:12:58,729 - INFO
Parsing arguments...
2024-05-30 23:12:58,736 - INFO
Default device: mps
2024-05-30 23:12:58,736 - INFO
Loading tokenizer...
2024-05-30 23:12:59,093 - INFO
Loading model...


In [34]:
# 3. Evaluate the model output against the dataset
!python calculate_bleu.py --config_file config.json

2024-06-02 23:22:52,286 - INFO - ===== Starting BLEU-score calculation between generated and original text in language en for 50 prefix & suffix length =====
===== Starting BLEU-score calculation between generated and original text in language en for 50 prefix & suffix length =====
2024-06-02 23:22:52,286 - INFO - ===== Decoding original prefixes & suffixes =====
===== Decoding original prefixes & suffixes =====
2024-06-02 23:22:52,294 - INFO - Starting BLEU-score calculation for trial 0
Starting BLEU-score calculation for trial 0
2024-06-02 23:22:52,294 - INFO - Saving BLEU scores for trial 0 to tmp/europarl/en/en-100-100-2.7B/scores/bleu_scores_trial_0.jsonl
Saving BLEU scores for trial 0 to tmp/europarl/en/en-100-100-2.7B/scores/bleu_scores_trial_0.jsonl
2024-06-02 23:22:52,294 - INFO - BLEU scores for trial 0 previously calculated, skipping calculation
BLEU scores for trial 0 previously calculated, skipping calculation
2024-06-02 23:22:52,294 - INFO - Starting BLEU-score calculatio

In [14]:
!python evaluation.py --config_file config.json

2024-06-05 20:49:17,412 - INFO
==== Starting evaluation ====
2024-06-05 20:49:17,412 - INFO
Loading list of example IDs for dataset europarl...
2024-06-05 20:49:17,413 - INFO
Loaded 7398 example IDs
2024-06-05 20:49:17,414 - INFO
Processing example 81...
2024-06-05 20:49:17,508 - INFO
Merged BLEU scores for exid 81
2024-06-05 20:49:17,508 - INFO
Processing example 83...
2024-06-05 20:49:17,557 - INFO
Merged BLEU scores for exid 83
2024-06-05 20:49:17,557 - INFO
Processing example 568...
2024-06-05 20:49:17,596 - INFO
Merged BLEU scores for exid 568
2024-06-05 20:49:17,596 - INFO
Processing example 577...
2024-06-05 20:49:17,635 - INFO
Merged BLEU scores for exid 577
2024-06-05 20:49:17,635 - INFO
Processing example 765...
2024-06-05 20:49:17,675 - INFO
Merged BLEU scores for exid 765
2024-06-05 20:49:17,675 - INFO
Processing example 766...
2024-06-05 20:49:17,713 - INFO
Merged BLEU scores for exid 766
2024-06-05 20:49:17,713 - INFO
Processing example 1061...
2024-06-05 20:49:17,752 - I

In [30]:
!python calculate_meteor.py --config_file config.json

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/annavisman/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
2024-06-05 19:04:26,055 - INFO - ===== Starting METEOR-score calculation between generated and original text in language nl for 50 prefix & suffix length =====
===== Starting METEOR-score calculation between generated and original text in language nl for 50 prefix & suffix length =====
2024-06-05 19:04:26,055 - INFO - ===== Decoding original prefixes & suffixes =====
===== Decoding original prefixes & suffixes =====
2024-06-05 19:04:26,062 - INFO - Starting METEOR-score calculation for trial 0
Starting METEOR-score calculation for trial 0
2024-06-05 19:04:26,062 - INFO - Saving METEOR scores for trial 0 to tmp/europarl/nl/nl-100-100-1.3B/meteor_scores/meteor_scores_trial_0.jsonl
Saving METEOR scores for trial 0 to tmp/europarl/nl/nl-100-100-1.3B/meteor_scores/meteor_scores_trial_0.jsonl
2024-06-05 19:04:31,769 - INFO - Finished METEOR-score calc