# Dataset formatting

I'll bring the dataset into a line-by-line format

In [8]:
from bs4 import BeautifulSoup
import os

if os.path.exists("./datasets/wit3_de_en/train.tags.de-en"):
  os.remove("./datasets/wit3_de_en/train.tags.de-en")

with open("./datasets/wit3_de_en/de-en/train.tags.de-en.de", 'r') as reader:
    xml_train_de = BeautifulSoup(reader.read())

transcripts_de = xml_train_de.find_all("transcript")

with open("./datasets/wit3_de_en/de-en/train.tags.de-en.en", 'r') as reader:
    xml_train_en = BeautifulSoup(reader.read())

transcripts_en = xml_train_en.find_all("transcript")

transcripts_de_en = zip(transcripts_de, transcripts_en)

formatted_train_text = []

for (transcript_de, transcript_en) in transcripts_de_en:
    transcript_de_lines = transcript_de.text.splitlines()
    transcript_en_lines = transcript_en.text.splitlines()
    formatted_transcript = map(lambda line_de, line_en: line_de + ' ' + line_en + '\n', transcript_de_lines, transcript_en_lines)
    formatted_train_text.extend(formatted_transcript)

with open("./datasets/wit3_de_en/train.tags.de-en", "x") as writer:
    writer.writelines(formatted_train_text)

In [9]:
if os.path.exists("./datasets/wit3_de_en/valid.tags.de-en"):
  os.remove("./datasets/wit3_de_en/valid.tags.de-en")

with open("./datasets/wit3_de_en/de-en/valid.tags.de-en.de", 'r') as reader:
    xml_train_de = BeautifulSoup(reader.read())

transcripts_de = xml_train_de.find_all("transcript")

with open("./datasets/wit3_de_en/de-en/valid.tags.de-en.en", 'r') as reader:
    xml_train_en = BeautifulSoup(reader.read())

transcripts_en = xml_train_en.find_all("transcript")

transcripts_de_en = zip(transcripts_de, transcripts_en)

formatted_train_text = []

for (transcript_de, transcript_en) in transcripts_de_en:
    transcript_de_lines = transcript_de.text.splitlines()
    transcript_en_lines = transcript_en.text.splitlines()
    formatted_transcript = map(lambda line_de, line_en: line_de + ' ' + line_en + '\n', transcript_de_lines, transcript_en_lines)
    formatted_train_text.extend(formatted_transcript)

with open("./datasets/wit3_de_en/valid.tags.de-en", "x") as writer:
    writer.writelines(formatted_train_text)

## Training a classifier

In [5]:
MODEL_PATH = "./improved-diffusion/diffusion_models/diff_roc_pad_rand128_transformer_lr0.0001_0.0_2000_sqrt_Lsimple_h128_s2_d0.1_sd101_xstart_e2e"
DATASET_TRAIN_PATH = "./datasets/wit3_de_en/train.tags.de-en.txt"
DATASET_VALID_PATH = "./datasets/wit3_de_en/valid.tags.de-en.txt"
EPOCH = 6
BATCH_SIZE = 10
GRADIENT_ACCUMULATION_STEPS = 1
PRETRAINED_MODEL ='bert-base-uncased'
EXPERIMENT = 'roc'
SEED = 101
TASK = 'translation'
NOTES = ''
APP = f"--init_emb {MODEL_PATH} --n_embd {16} --learned_emb yes"
BLOCK_SIZE = 100
MODEL_TYPE = 'gpt2'

folder_name = "classifier_models"


if not os.path.isdir(folder_name):
    os.mkdir(folder_name)


Model_FILE = EXPERIMENT + \
    '_e={}_b={}_m={}_{}_{}_{}'.format(
        EPOCH, BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS,
        PRETRAINED_MODEL, os.path.basename(DATASET_TRAIN_PATH), SEED, TASK)
Model_FILE = Model_FILE + f'_{NOTES}'
logging_dir = os.path.join(folder_name, 'runs', Model_FILE)
Model_FILE = os.path.join(folder_name, Model_FILE)
app = f" --train_file={DATASET_TRAIN_PATH} --validation_file {DATASET_VALID_PATH} " \
        f" --task {TASK}"
app += " " + APP




COMMANDLINE = f"python transformers/examples/pytorch/language-modeling/train_translation.py \
        --output_dir={Model_FILE} \
        --model_name_or_path={PRETRAINED_MODEL} \
        --tokenizer_name={PRETRAINED_MODEL} \
        --per_device_train_batch_size {BATCH_SIZE} \
        --per_device_eval_batch_size {BATCH_SIZE} \
        --save_steps 50000 \
        --num_train_epochs {EPOCH} \
        --do_train --eval_steps 10000 --evaluation_strategy steps \
        --do_eval --dataloader_num_workers 4 \
        --save_total_limit 1 \
        --overwrite_output_dir  \
        --logging_dir {logging_dir} \
        --block_size {BLOCK_SIZE}  \
        --disable_tqdm True --model_type {MODEL_TYPE} \
        --padding_mode none \
        --gradient_accumulation_steps {GRADIENT_ACCUMULATION_STEPS} " \
                f"--experiment {EXPERIMENT} --seed {SEED}"


COMMANDLINE += app

with open(Model_FILE + '.sh', 'w') as f:
    print(COMMANDLINE, file=f)

print(COMMANDLINE)
os.system(COMMANDLINE)

python transformers/examples/pytorch/language-modeling/train_translation.py         --output_dir=classifier_models/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_         --model_name_or_path=bert-base-uncased         --tokenizer_name=bert-base-uncased         --per_device_train_batch_size 10         --per_device_eval_batch_size 10         --save_steps 50000         --num_train_epochs 6         --do_train --eval_steps 10000 --evaluation_strategy steps         --do_eval --dataloader_num_workers 4         --save_total_limit 1         --overwrite_output_dir          --logging_dir classifier_models/runs/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_         --block_size 100          --disable_tqdm True --model_type gpt2         --padding_mode none         --gradient_accumulation_steps 1 --experiment roc --seed 101 --train_file=./datasets/wit3_de_en/train.tags.de-en.txt --validation_file ./datasets/wit3_de_en/valid.tags.de-en.txt  --task transl

[INFO|configuration_utils.py:652] 2022-11-16 12:57:59,085 >> loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/ydemirag/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
[INFO|configuration_utils.py:688] 2022-11-16 12:57:59,086 >> Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0.dev0"


initializing the tokenizer with small vocab
****************************************************************************************************
loading from dataset-specific vocab
132353

 Initializing the model from scratch 
****************************************************************************************************


Running tokenizer on dataset: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 65/65 [00:01<00:00, 50.36ba/s]
Running tokenizer on dataset:   0%|          | 0/67 [00:00<?, ?ba/s]

11/16/2022 12:58:02 - INFO - datasets.arrow_writer - Done writing 64909 examples in 10989634 bytes .
11/16/2022 12:58:02 - INFO - datasets.fingerprint - Parameter 'function'=<function main.<locals>.tokenize_function at 0x7faf1c85d5f0> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead.


Running tokenizer on dataset: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 47.11ba/s]
padding:   0%|          | 0/65 [00:00<?, ?ba/s]

11/16/2022 12:58:04 - INFO - datasets.arrow_writer - Done writing 66513 examples in 11335561 bytes .
11/16/2022 12:58:04 - INFO - datasets.fingerprint - Parameter 'function'=<function main.<locals>.pad_function at 0x7faf1c37c830> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead.


padding: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 65/65 [00:00<00:00, 73.56ba/s]
padding:   0%|          | 0/67 [00:00<?, ?ba/s]

11/16/2022 12:58:04 - INFO - datasets.arrow_writer - Done writing 64909 examples in 50369904 bytes .
11/16/2022 12:58:04 - INFO - datasets.fingerprint - Parameter 'function'=<function main.<locals>.pad_function at 0x7faf1c37c830> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead.


padding: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:00<00:00, 70.70ba/s]


11/16/2022 12:58:05 - INFO - datasets.arrow_writer - Done writing 66513 examples in 51614624 bytes .
11/16/2022 12:58:06 - INFO - datasets.load - Checking /home/ydemirag/.cache/huggingface/datasets/downloads/b839e05c74a5dac7459de9abd13555710e1df86d301bd838bc0b3c5c009c5fb0.c90b7a47035aca98431814fb3c916f50b10dd20d672f7073136109a2e0b43b46.py for additional imports.
11/16/2022 12:58:06 - INFO - datasets.utils.filelock - Lock 140390123430800 acquired on /home/ydemirag/.cache/huggingface/datasets/downloads/b839e05c74a5dac7459de9abd13555710e1df86d301bd838bc0b3c5c009c5fb0.c90b7a47035aca98431814fb3c916f50b10dd20d672f7073136109a2e0b43b46.py.lock
11/16/2022 12:58:06 - INFO - datasets.load - Creating main folder for metric https://raw.githubusercontent.com/huggingface/datasets/1.8.0/metrics/accuracy/accuracy.py at /home/ydemirag/.cache/huggingface/modules/datasets_modules/metrics/accuracy
11/16/2022 12:58:06 - INFO - datasets.load - Creating specific version folder for metric https://raw.githubuse

[INFO|trainer.py:1277] 2022-11-16 12:58:08,347 >> ***** Running training *****
[INFO|trainer.py:1278] 2022-11-16 12:58:08,347 >>   Num examples = 64909
[INFO|trainer.py:1279] 2022-11-16 12:58:08,347 >>   Num Epochs = 6
[INFO|trainer.py:1280] 2022-11-16 12:58:08,347 >>   Instantaneous batch size per device = 10
[INFO|trainer.py:1281] 2022-11-16 12:58:08,347 >>   Total train batch size (w. parallel, distributed & accumulation) = 10
[INFO|trainer.py:1282] 2022-11-16 12:58:08,347 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:1283] 2022-11-16 12:58:08,347 >>   Total optimization steps = 38946
[INFO|integrations.py:576] 2022-11-16 12:58:08,348 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
wandb: Currently logged in as: yunus-demirag. Use `wandb login --relogin` to force relogin
wandb: Tracking run with wandb version 0.13.5
wandb: Run data is saved locally in /home/ydemirag/studium/Diffusion-LM/wandb/run-20221116_125809-3tk41xru
wa

{'loss': 4.0488, 'learning_rate': 4.935808555435732e-05, 'epoch': 0.08}
{'loss': 2.0614, 'learning_rate': 4.871617110871463e-05, 'epoch': 0.15}
{'loss': 1.1636, 'learning_rate': 4.807425666307195e-05, 'epoch': 0.23}
{'loss': 0.7762, 'learning_rate': 4.743234221742926e-05, 'epoch': 0.31}
{'loss': 0.5427, 'learning_rate': 4.679042777178658e-05, 'epoch': 0.39}
{'loss': 0.406, 'learning_rate': 4.614851332614389e-05, 'epoch': 0.46}
{'loss': 0.3334, 'learning_rate': 4.550659888050121e-05, 'epoch': 0.54}
{'loss': 0.2787, 'learning_rate': 4.486468443485852e-05, 'epoch': 0.62}
{'loss': 0.2319, 'learning_rate': 4.422276998921584e-05, 'epoch': 0.69}
{'loss': 0.2036, 'learning_rate': 4.358085554357316e-05, 'epoch': 0.77}
{'loss': 0.1861, 'learning_rate': 4.293894109793047e-05, 'epoch': 0.85}
{'loss': 0.1682, 'learning_rate': 4.2297026652287783e-05, 'epoch': 0.92}
{'loss': 0.1517, 'learning_rate': 4.1655112206645105e-05, 'epoch': 1.0}
{'loss': 0.0751, 'learning_rate': 4.1013197761002414e-05, 'epoch

[INFO|trainer.py:2385] 2022-11-16 13:22:40,716 >> ***** Running Evaluation *****
[INFO|trainer.py:2387] 2022-11-16 13:22:40,717 >>   Num examples = 66513
[INFO|trainer.py:2390] 2022-11-16 13:22:40,717 >>   Batch size = 10


{'eval_loss': 0.10834651440382004, 'eval_runtime': 193.9224, 'eval_samples_per_second': 342.988, 'eval_steps_per_second': 34.302, 'epoch': 1.54}
{'loss': 0.0353, 'learning_rate': 3.6519796641503626e-05, 'epoch': 1.62}
{'loss': 0.0341, 'learning_rate': 3.587788219586094e-05, 'epoch': 1.69}
{'loss': 0.0311, 'learning_rate': 3.523596775021825e-05, 'epoch': 1.77}
{'loss': 0.0295, 'learning_rate': 3.459405330457557e-05, 'epoch': 1.85}
{'loss': 0.0252, 'learning_rate': 3.395213885893288e-05, 'epoch': 1.93}
{'loss': 0.0253, 'learning_rate': 3.3310224413290195e-05, 'epoch': 2.0}
{'loss': 0.0117, 'learning_rate': 3.2668309967647517e-05, 'epoch': 2.08}
{'loss': 0.01, 'learning_rate': 3.2026395522004825e-05, 'epoch': 2.16}
{'loss': 0.0094, 'learning_rate': 3.138448107636214e-05, 'epoch': 2.23}
{'loss': 0.0082, 'learning_rate': 3.074256663071946e-05, 'epoch': 2.31}
{'loss': 0.0074, 'learning_rate': 3.0100652185076774e-05, 'epoch': 2.39}
{'loss': 0.0077, 'learning_rate': 2.945873773943409e-05, 'epo

[INFO|trainer.py:2385] 2022-11-16 13:50:11,622 >> ***** Running Evaluation *****
[INFO|trainer.py:2387] 2022-11-16 13:50:11,622 >>   Num examples = 66513
[INFO|trainer.py:2390] 2022-11-16 13:50:11,622 >>   Batch size = 10


{'eval_loss': 0.09247037023305893, 'eval_runtime': 194.0155, 'eval_samples_per_second': 342.823, 'eval_steps_per_second': 34.286, 'epoch': 3.08}
{'loss': 0.0021, 'learning_rate': 2.3681507728649928e-05, 'epoch': 3.16}
{'loss': 0.0018, 'learning_rate': 2.303959328300724e-05, 'epoch': 3.24}
{'loss': 0.0017, 'learning_rate': 2.2397678837364558e-05, 'epoch': 3.31}
{'loss': 0.0015, 'learning_rate': 2.1755764391721873e-05, 'epoch': 3.39}
{'loss': 0.0012, 'learning_rate': 2.1113849946079188e-05, 'epoch': 3.47}
{'loss': 0.0016, 'learning_rate': 2.0471935500436503e-05, 'epoch': 3.54}
{'loss': 0.0014, 'learning_rate': 1.983002105479382e-05, 'epoch': 3.62}
{'loss': 0.0014, 'learning_rate': 1.9188106609151133e-05, 'epoch': 3.7}
{'loss': 0.0012, 'learning_rate': 1.854619216350845e-05, 'epoch': 3.77}
{'loss': 0.001, 'learning_rate': 1.7904277717865764e-05, 'epoch': 3.85}
{'loss': 0.0011, 'learning_rate': 1.726236327222308e-05, 'epoch': 3.93}
{'loss': 0.0011, 'learning_rate': 1.6620448826580394e-05, 

[INFO|trainer.py:2385] 2022-11-16 14:17:46,281 >> ***** Running Evaluation *****
[INFO|trainer.py:2387] 2022-11-16 14:17:46,281 >>   Num examples = 66513
[INFO|trainer.py:2390] 2022-11-16 14:17:46,281 >>   Batch size = 10


{'eval_loss': 0.08524111658334732, 'eval_runtime': 193.8138, 'eval_samples_per_second': 343.18, 'eval_steps_per_second': 34.322, 'epoch': 4.62}
{'loss': 0.0004, 'learning_rate': 1.0843218815796231e-05, 'epoch': 4.7}
{'loss': 0.0003, 'learning_rate': 1.0201304370153546e-05, 'epoch': 4.78}
{'loss': 0.0003, 'learning_rate': 9.559389924510861e-06, 'epoch': 4.85}
{'loss': 0.0003, 'learning_rate': 8.917475478868177e-06, 'epoch': 4.93}
{'loss': 0.0003, 'learning_rate': 8.275561033225492e-06, 'epoch': 5.01}
{'loss': 0.0002, 'learning_rate': 7.633646587582807e-06, 'epoch': 5.08}
{'loss': 0.0002, 'learning_rate': 6.991732141940123e-06, 'epoch': 5.16}
{'loss': 0.0002, 'learning_rate': 6.349817696297438e-06, 'epoch': 5.24}
{'loss': 0.0002, 'learning_rate': 5.707903250654753e-06, 'epoch': 5.32}
{'loss': 0.0002, 'learning_rate': 5.065988805012068e-06, 'epoch': 5.39}
{'loss': 0.0002, 'learning_rate': 4.424074359369383e-06, 'epoch': 5.47}
{'loss': 0.0002, 'learning_rate': 3.782159913726699e-06, 'epoch

[INFO|trainer.py:1506] 2022-11-16 14:43:03,109 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)


[INFO|trainer.py:2135] 2022-11-16 14:43:03,110 >> Saving model checkpoint to classifier_models/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_
[INFO|configuration_utils.py:438] 2022-11-16 14:43:03,111 >> Configuration saved in classifier_models/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_/config.json


{'train_runtime': 6294.7622, 'train_samples_per_second': 61.87, 'train_steps_per_second': 6.187, 'train_loss': 0.14429008824810946, 'epoch': 6.0}


[INFO|modeling_utils.py:1081] 2022-11-16 14:43:03,775 >> Model weights saved in classifier_models/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_/pytorch_model.bin
[INFO|trainer.py:2385] 2022-11-16 14:43:03,780 >> ***** Running Evaluation *****
[INFO|trainer.py:2387] 2022-11-16 14:43:03,780 >>   Num examples = 66513
[INFO|trainer.py:2390] 2022-11-16 14:43:03,780 >>   Batch size = 10


***** train metrics *****
  epoch                    =        6.0
  train_loss               =     0.1443
  train_runtime            = 1:44:54.76
  train_samples            =      64909
  train_samples_per_second =      61.87
  train_steps_per_second   =      6.187
11/16/2022 14:43:03 - INFO - __main__ - *** Evaluate ***
{'eval_loss': 0.0784742459654808, 'eval_runtime': 196.7309, 'eval_samples_per_second': 338.091, 'eval_steps_per_second': 33.813, 'epoch': 6.0}
***** eval metrics *****
  epoch                   =        6.0
  eval_loss               =     0.0785
  eval_runtime            = 0:03:16.73
  eval_samples            =      66513
  eval_samples_per_second =    338.091
  eval_steps_per_second   =     33.813
  perplexity              =     1.0816


[INFO|modelcard.py:460] 2022-11-16 14:46:20,982 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
wandb: Waiting for W&B process to finish... (success).
wandb: | 0.485 MB of 0.485 MB uploaded (0.000 MB deduped)
wandb: Run history:
wandb:                      eval/loss ‚ñà‚ñÑ‚ñÉ‚ñÅ
wandb:                   eval/runtime ‚ñÅ‚ñÅ‚ñÅ‚ñà
wandb:        eval/samples_per_second ‚ñà‚ñà‚ñà‚ñÅ
wandb:          eval/steps_per_second ‚ñà‚ñà‚ñà‚ñÅ
wandb:                    train/epoch ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
wandb:              train/global_step ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
wandb:            train/learning_rate ‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñá‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚

0

In [2]:
import argparse
import os, json, sys
import stanza
import spacy_stanza
import numpy as np
import torch as th
from transformers import set_seed
import torch.distributed as dist
from improved_diffusion.rounding import rounding_func, load_models, load_tokenizer
from improved_diffusion.test_util import get_weights, denoised_fn_round
from functools import partial
from improved_diffusion import dist_util, logger
from improved_diffusion.script_util import (
    NUM_CLASSES,
    model_and_diffusion_defaults,
    create_model_and_diffusion,
    add_dict_to_argparser,
    args_to_dict,
)
from transformers import AutoModelForCausalLM
sys.path.insert(0, './improved-diffusion/scripts')
from infill_util import langevin_fn3, get_score, langevin_fn3_compose, langevin_fn1, langevin_fn4, langevin_fn_tree, langevin_fn_length
from spacy.lang.en import English

MODEL_PATH = "./improved-diffusion/diffusion_models/diff_roc_pad_rand128_transformer_lr0.0001_0.0_2000_sqrt_Lsimple_h128_s2_d0.1_sd101_xstart_e2e"

partial_seq = ['A kid friendly venue named Alimentum is located on the riverside .',
                       'Alimentum , situated by the river , is quite child friendly .']

emb_model, tokenizer = load_models(modality='roc', mode='random', model_name_or_path='', emb_dim=128, file=MODEL_PATH, )
model3 = get_weights(emb_model, {'emb_scale_factor': 1.0})
model3 = model3.cuda()


##model_control = Classifier_GPT2.from_pretrained('./classifier_models/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_').cuda()

model = AutoModelForCausalLM.from_pretrained('./classifier_models/roc_e=6_b=10_m=bert-base-uncased_train.tags.de-en.txt_101_translation_')



loading from ./improved-diffusion/diffusion_models/diff_roc_pad_rand128_transformer_lr0.0001_0.0_2000_sqrt_Lsimple_h128_s2_d0.1_sd101_xstart_e2e/vocab.json
11043


If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


In [11]:
partial_seq = ['A kid friendly venue named Alimentum is located on the riverside .',
                       'Alimentum , situated by the river , is quite child friendly .']

tokens2id = {v:k for k, v in tokenizer.items()}
todo_pad_token = -1
pad_token = tokens2id['PAD']
encoded_partial_seq = [th.LongTensor([tokens2id.get(x, tokens2id['UNK']) for x in seq.split()]).cuda() for seq in partial_seq]



embedding = model3(encoded_partial_seq[0])
embedding
input_embs = th.nn.Parameter(embedding)
model_out = model(input_embs)

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)