In [2]:
from datasets import load_dataset

imdb_dataset = load_dataset("imdb")
imdb_dataset

Found cached dataset imdb (/home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)
100%|██████████| 3/3 [00:00<00:00, 345.27it/s]


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [3]:
sample = imdb_dataset["train"].shuffle(seed=42).select(range(3))

for row in sample:
    print(f"\n'>>> Review: {row['text']}'")
    print(f"'>>> Label: {row['label']}'")

Loading cached shuffled indices for dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-9c48ce5d173413c7.arrow



'>>> Review: There is no relation at all between Fortier and Profiler but the fact that both are police series about violent crimes. Profiler looks crispy, Fortier looks classic. Profiler plots are quite simple. Fortier's plot are far more complicated... Fortier looks more like Prime Suspect, if we have to spot similarities... The main character is weak and weirdo, but have "clairvoyance". People like to compare, to judge, to evaluate. How about just enjoying? Funny thing too, people writing Fortier looks American but, on the other hand, arguing they prefer American series (!!!). Maybe it's the language, or the spirit, but I think this series is more English than American. By the way, the actors are really good and funny. The acting is not superficial at all...'
'>>> Label: 1'

'>>> Review: This movie is a great. The plot is very true to the book which is a classic written by Mark Twain. The movie starts of with a scene where Hank sings a song with a bunch of kids called "when you stu

In [4]:
from transformers import AutoModelForMaskedLM

model_checkpoint = "distilbert-base-uncased"
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)


In [5]:
sample = imdb_dataset["train"].shuffle(seed=42).select(range(3))

for row in sample:
    print(f"\n'>>> Review: {row['text']}'")
    print(f"'>>> Label: {row['label']}'")

Loading cached shuffled indices for dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-9c48ce5d173413c7.arrow



'>>> Review: There is no relation at all between Fortier and Profiler but the fact that both are police series about violent crimes. Profiler looks crispy, Fortier looks classic. Profiler plots are quite simple. Fortier's plot are far more complicated... Fortier looks more like Prime Suspect, if we have to spot similarities... The main character is weak and weirdo, but have "clairvoyance". People like to compare, to judge, to evaluate. How about just enjoying? Funny thing too, people writing Fortier looks American but, on the other hand, arguing they prefer American series (!!!). Maybe it's the language, or the spirit, but I think this series is more English than American. By the way, the actors are really good and funny. The acting is not superficial at all...'
'>>> Label: 1'

'>>> Review: This movie is a great. The plot is very true to the book which is a classic written by Mark Twain. The movie starts of with a scene where Hank sings a song with a bunch of kids called "when you stu

In [6]:
def tokenize_function(examples):
    result = tokenizer(examples["text"])
    if tokenizer.is_fast:
        result["word_ids"] = [result.word_ids(i) for i in range(len(result["input_ids"]))]
    return result


# Use batched=True to activate fast multithreading!
tokenized_datasets = imdb_dataset.map(
    tokenize_function, batched=True, remove_columns=["text", "label"]
)
tokenized_datasets

Loading cached processed dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-8576ab0b0c7f9ea2.arrow
Loading cached processed dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-61de0825c0bf43e4.arrow
Loading cached processed dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-217b3955a8e2552a.arrow


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids'],
        num_rows: 50000
    })
})

In [7]:
# Slicing produces a list of lists for each feature
tokenized_samples = tokenized_datasets["train"][:3]

for idx, sample in enumerate(tokenized_samples["input_ids"]):
    print(f"'>>> Review {idx} length: {len(sample)}'")

'>>> Review 0 length: 363'
'>>> Review 1 length: 304'
'>>> Review 2 length: 133'


In [8]:
concatenated_examples = {
    k: sum(tokenized_samples[k], []) for k in tokenized_samples.keys()
}
total_length = len(concatenated_examples["input_ids"])
print(f"'>>> Concatenated reviews length: {total_length}'")

'>>> Concatenated reviews length: 800'


In [9]:
chunk_size = 128
chunks = {
    k: [t[i : i + chunk_size] for i in range(0, total_length, chunk_size)]
    for k, t in concatenated_examples.items()
}

for chunk in chunks["input_ids"]:
    print(f"'>>> Chunk length: {len(chunk)}'")


'>>> Chunk length: 128'
'>>> Chunk length: 128'
'>>> Chunk length: 128'
'>>> Chunk length: 128'
'>>> Chunk length: 128'
'>>> Chunk length: 128'
'>>> Chunk length: 32'


In [10]:
def group_texts(examples):
    # Concatenate all texts
    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
    # Compute length of concatenated texts
    total_length = len(concatenated_examples[list(examples.keys())[0]])
    # We drop the last chunk if it's smaller than chunk_size
    total_length = (total_length // chunk_size) * chunk_size
    # Split by chunks of max_len
    result = {
        k: [t[i : i + chunk_size] for i in range(0, total_length, chunk_size)]
        for k, t in concatenated_examples.items()
    }
    # Create a new labels column
    result["labels"] = result["input_ids"].copy()
    return result

In [11]:
lm_datasets = tokenized_datasets.map(group_texts, batched=True)
lm_datasets

Loading cached processed dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-4f30a51cfaaacdff.arrow
Loading cached processed dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-c8edfb9b3ebec645.arrow
Loading cached processed dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-3ff06484a52a8ead.arrow


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 61291
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 59904
    })
    unsupervised: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 122957
    })
})

In [12]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)

In [13]:
samples = [lm_datasets["train"][i] for i in range(2)]
for sample in samples:
    _ = sample.pop("word_ids")

for chunk in data_collator(samples)["input_ids"]:
    print(f"\n'>>> {tokenizer.decode(chunk)}'")

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.



'>>> [CLS] i rented [MASK] am curious - yellow from my [MASK] store because of all [MASK] [MASK] that surrounded it when it was [MASK] released in 1967. i also heard that at first it [MASK] seized by u. s [MASK] customs if it ever tried to enter this country [MASK] therefore [MASK] a fan of films considered " controversial " i [MASK] had to see this for myself. < br / > < br / > the plot is centered around a young swedish [MASK] student [MASK] [MASK] who [MASK] [MASK] learn everything she can about life. in particular she wants [MASK] focus her attentions to making some sort of documentary on [MASK] the [MASK] swede thought about certain political issues such'

'>>> as [MASK] vietnam [MASK] and [MASK] issues in [MASK] united states. in between asking politicians and ordinary den [MASK] imaginary of stockholm [MASK] their opinions on politics, she has sex with her drama teacher, classmates, [MASK] married men. < br / > < br / > what [MASK] [MASK] [MASK] [MASK] am curious - yellow is th

In [14]:
import collections
import numpy as np

from transformers import default_data_collator

wwm_probability = 0.2


def whole_word_masking_data_collator(features):
    for feature in features:
        word_ids = feature.pop("word_ids")

        # Create a map between words and corresponding token indices
        mapping = collections.defaultdict(list)
        current_word_index = -1
        current_word = None
        for idx, word_id in enumerate(word_ids):
            if word_id is not None:
                if word_id != current_word:
                    current_word = word_id
                    current_word_index += 1
                mapping[current_word_index].append(idx)

        # Randomly mask words
        mask = np.random.binomial(1, wwm_probability, (len(mapping),))
        input_ids = feature["input_ids"]
        labels = feature["labels"]
        new_labels = [-100] * len(labels)
        for word_id in np.where(mask)[0]:
            word_id = word_id.item()
            for idx in mapping[word_id]:
                new_labels[idx] = labels[idx]
                input_ids[idx] = tokenizer.mask_token_id
                
        feature["labels"] = new_labels

    return default_data_collator(features)

In [15]:
samples = [lm_datasets["train"][i] for i in range(2)]
batch = whole_word_masking_data_collator(samples)

for chunk in batch["input_ids"]:
    print(f"\n'>>> {tokenizer.decode(chunk)}'")


'>>> [CLS] i [MASK] [MASK] [MASK] curious [MASK] yellow from my [MASK] store [MASK] of all [MASK] controversy that surrounded [MASK] when it was first released [MASK] 1967. i also heard that at [MASK] it was seized [MASK] u [MASK] s. customs [MASK] it ever tried to enter this [MASK], therefore being a fan [MASK] films [MASK] " [MASK] [MASK] i really [MASK] [MASK] see this for myself. < br [MASK] > [MASK] [MASK] / [MASK] the plot is [MASK] around a young swedish drama student [MASK] lena who wants to learn everything she can about life. in particular she wants to [MASK] her [MASK] [MASK] [MASK] [MASK] some sort of [MASK] [MASK] what the [MASK] [MASK] [MASK] thought about [MASK] political [MASK] such'

'>>> as the vietnam war and race issues [MASK] [MASK] united states. in between asking politicians and ordinary [MASK] [MASK] [MASK] of stockholm about [MASK] [MASK] on politics, she has sex [MASK] her drama teacher, classmates, and married men. < br / > [MASK] [MASK] / [MASK] what kills 

In [16]:
train_size = 10_000
test_size = int(0.1 * train_size)

downsampled_dataset = lm_datasets["train"].train_test_split(
    train_size=train_size, test_size=test_size, seed=42
)
downsampled_dataset

Loading cached split indices for dataset at /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-f629002ddec8d262.arrow and /home/dlf/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-285c21409c6749ce.arrow


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 10000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 1000
    })
})

In [17]:
from transformers import TrainingArguments

batch_size = 64
# Show the training loss with every epoch
logging_steps = len(downsampled_dataset["train"]) // batch_size
model_name = model_checkpoint.split("/")[-1]

training_args = TrainingArguments(
    output_dir=f"{model_name}-finetuned-imdb",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    push_to_hub=False,
    fp16=False,
    logging_steps=logging_steps,
)
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=downsampled_dataset["train"],
    eval_dataset=downsampled_dataset["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)
import math

# eval_results = trainer.evaluate()
# print(f">>> Perplexity: {math.exp(eval_results['eval_loss']):.2f}")

In [18]:
def insert_random_mask(batch):
    features = [dict(zip(batch, t)) for t in zip(*batch.values())]
    masked_inputs = data_collator(features)
    # Create a new "masked" column for each column in the dataset
    return {"masked_" + k: v.numpy() for k, v in masked_inputs.items()}

In [19]:
print(downsampled_dataset)
downsampled_dataset = downsampled_dataset.remove_columns(["word_ids"])
eval_dataset = downsampled_dataset["test"].map(
    insert_random_mask,
    batched=True,
    remove_columns=downsampled_dataset["test"].column_names,
)
print(eval_dataset)
eval_dataset = eval_dataset.rename_columns(
    {
        "masked_input_ids": "input_ids",
        "masked_attention_mask": "attention_mask",
        "masked_labels": "labels",
    }
)
eval_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 10000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'word_ids', 'labels'],
        num_rows: 1000
    })
})


                                                                 

Dataset({
    features: ['masked_input_ids', 'masked_attention_mask', 'masked_labels'],
    num_rows: 1000
})




Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 1000
})

In [20]:
from torch.utils.data import DataLoader
from transformers import default_data_collator

batch_size = 64
train_dataloader = DataLoader(
    downsampled_dataset["train"],
    shuffle=True,
    batch_size=batch_size,
    collate_fn=data_collator,
)
eval_dataloader = DataLoader(
    eval_dataset, batch_size=batch_size, collate_fn=default_data_collator
)

In [21]:
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)

loading configuration file config.json from cache at /home/dlf/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/1c4513b2eedbda136f57676a34eea67aba266e5c/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.24.0",
  "vocab_size": 30522
}

loading weights file pytorch_model.bin from cache at /home/dlf/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/1c4513b2eedbda136f57676a34eea67aba266e5c/pytorch_model.bin
All model checkpoint weights were used when initializing DistilBertForMaskedL

In [22]:
from torch.optim import AdamW

optimizer = AdamW(model.parameters(), lr=5e-5)

In [23]:
from transformers import get_scheduler

num_train_epochs = 3
num_update_steps_per_epoch = len(train_dataloader)
num_training_steps = num_train_epochs * num_update_steps_per_epoch

lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

In [24]:
from tqdm.auto import tqdm
import torch
import math

progress_bar = tqdm(range(num_training_steps))

for epoch in range(num_train_epochs):
    # Training
    model.train()
    for batch in train_dataloader:
        for label in batch["labels"]:
            print(label.tolist())
        exit(0)
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

    # Evaluation
    model.eval()
    losses = []
    for step, batch in enumerate(eval_dataloader):
        with torch.no_grad():
            outputs = model(**batch)

        loss = outputs.loss
        losses.append(loss)

    losses = torch.cat(losses)
    losses = losses[: len(eval_dataset)]
    try:
        perplexity = math.exp(torch.mean(losses))
    except OverflowError:
        perplexity = float("inf")

    print(f">>> Epoch {epoch}: Perplexity: {perplexity}")

    # Save and upload


  0%|          | 0/471 [00:00<?, ?it/s]

[-100, -100, -100, -100, 5931, -100, -100, -100, -100, -100, 2453, -100, 2175, 3182, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 7987, -100, 1028, -100, -100, -100, -100, -100, -100, -100, -100, 1011, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2131, -100, -100, -100, 1000, -100, -100, -100, -100, -100, 1996, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2092, 1011, -100, -100, -100, 4198, -100, -100, 2143, -100, -100, 2437, -100, -100, -100, -100, -100, 2003, -100, -100, 1997, -100, 2027, -100, -100, -100, 2000, -100, -100, 1012, -100, -100, -100, -100, -100, -100, -100, -100, 2008, -100, 2003, -100, 13403, 1025, -100, -100, -100, 2009, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
[-100, -100, 2007, -100, -100, -100, 1047, -100, -100, -100, -100, -100, -100, -100, -100, -100, 15429, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 6516, -100, 27594, -100, -100, -100, -100, -100, -100, -100, 1026,

  0%|          | 1/471 [00:22<2:56:20, 22.51s/it]

[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2017, -100, -100, -100, -100, -100, 2009, -100, -100, -100, -100, -100, 4276, -100, -100, -100, -100, -100, 1005, -100, -100, -100, -100, -100, -100, -100, 2008, -100, -100, -100, -100, -100, -100, -100, 1998, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2003, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 19582, -100, -100, -100, 1012, 1012, -100, -100, -100, 2077, 1045, -100, -100, -100, -100, -100, 2074, 3427, 2006, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 7039, -100, 2041, -100, -100, -100, 18866, -100, -100, -100, 2387, -100, 1996, 2694, -100, -100, -100, 1024, -100, -100, -100, 1055, 28691, -100, -100]
[-100, 4600, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2589, -100, 4440, -100, 2058, -100, 3762, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 3147, -100, -100, -100, -100, -100,

  0%|          | 2/471 [00:42<2:45:32, 21.18s/it]

[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2138, 2296, -100, 2003, -100, -100, -100, -100, -100, 1026, -100, 1013, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1012, 1026, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1997, 1008, 1008, -100, -100, -100, -100, -100, -100, -100, 18720, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1011, -100, -100, -100, -100, -100, 2067, 1997, -100, -100, -100, -100, -100, 5732, -100, -100, -100, -100, -100, -100, 2919, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2514, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
[2143, -100, -100, -100, 2016, 2481, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2008, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1013, -100, -100, -100, -100, -100, -100, -100, -100, -

  1%|          | 3/471 [01:02<2:40:40, 20.60s/it]

[-100, -100, -100, 999, 2009, 2246, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1055, -100, -100, -100, -100, -100, -100, -100, -100, -100, 13718, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18885, -100, 4438, -100, -100, 8724, -100, 2298, -100, -100, 8101, -100, -100, -100, -100, -100, -100, -100, -100, 1997, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 10071, -100, -100, 2084, -100, -100, -100, -100, 16606, -100, -100, -100, 5019, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2003, -100, 1998, -100, -100, -100, -100, 1045, -100, -100, -100, -100, -100, -100, 2678, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 7956]
[-100, 2377, -100, 2004, 1037, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18091, -100, 6919, 2135, -100, -100, -100, -100, -100, 12669, -100, -100, -100, -100, 2682, -100, -100, -100, -100, -100, 2504, -10

  1%|          | 4/471 [01:22<2:36:28, 20.10s/it]

[-100, -100, -100, -100, 7180, 2091, -100, -100, -100, -100, -100, -100, 7534, -100, -100, -100, -100, -100, -100, 26106, -100, -100, -100, -100, -100, -100, -100, 2879, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2000, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2008, -100, -100, -100, -100, 1997, -100, -100, -100, -100, -100, -100, -100, -100, 2004, -100, -100, -100, 2043, -100, -100, 1005, -100, -100, -100, -100, -100, -100, 10437, 1000, -100, -100, -100, -100, -100, 1012, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1998, -100, -100, 3013, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2830, -100, -100, -100, -100, -100, -100, -100, -100]
[-100, -100, -100, -100, -100, -100, 8380, -100, -100, -100, -100, 1012, -100, -100, 1013, 1028, -100, 7987, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 14652, -100, -100, -100,

  1%|          | 5/471 [01:39<2:29:36, 19.26s/it]

[-100, -100, -100, -100, -100, 3351, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1999, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2196, -100, 2137, -100, -100, -100, -100, -100, -100, -100, -100, 2327, 1005, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2003, -100, 9467, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1012, 2009, -100, -100, -100, 21146, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2508, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2003, -100, -100, -100, 3984, -100, -100, -100, -100, -100, -100, 1996, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
[-100, -100, -100, -100, -100, -100, -100, -100, 2034, 2387, -100, -100, -100, -100, -100, -100, 4907, -100, -100, -100, 2021, -100, 4669, -100, -100, 2488, -100, -100, -100, -100, 7987, -100, -100, -100, -100, -100, -100, -100, -

  1%|▏         | 6/471 [01:56<2:23:07, 18.47s/it]

[1010, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 2006, -100, 5830, -100, 2515, 2009, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1996, -100, -100, -100, -100, -100, -100, 1055, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 3549, -100, 2111, 1012, -100, -100, -100, -100, -100, 2041, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1999, -100, -100, -100, 1012, -100, -100, 2023, 2143, -100, 7078, -100, 1012, -100, -100, 1996, 5436, -100, -100, -100, -100, -100, 2108, -100, 2357, -100, -100, -100, -100, -100, -100, 1038, -100, -100, -100, 4926, -100, -100, -100, 4641, -100, -100, -100]
[2008, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 9779, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1026, -100, 1013, -100, -100, -100, -100, -100, -100, -100, -1

KeyboardInterrupt: 

  1%|▏         | 6/471 [01:57<2:32:03, 19.62s/it]

: 

In [None]:
import torch
print(torch.cuda.is_available())



False
