In [1]:
print("Abc")

Abc


In [2]:
! pip install -q kaggle

In [3]:
! mkdir ~/.kaggle

In [4]:
! cp kaggle.json ~/.kaggle/

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
! kaggle competitions download -c nitro-lang-processing-1

Downloading nitro-lang-processing-1.zip to /content
  0% 0.00/2.87M [00:00<?, ?B/s]
100% 2.87M/2.87M [00:00<00:00, 229MB/s]


In [7]:
! mkdir nitro-lang-processing-1

In [8]:
! unzip nitro-lang-processing-1.zip -d nitro-lang-processing-1

Archive:  nitro-lang-processing-1.zip
  inflating: nitro-lang-processing-1/random_seed_setter.py  
  inflating: nitro-lang-processing-1/sample_submission.csv  
  inflating: nitro-lang-processing-1/tag_to_id.json  
  inflating: nitro-lang-processing-1/teams_rand_seed.csv  
  inflating: nitro-lang-processing-1/test.json  
  inflating: nitro-lang-processing-1/train.json  


In [9]:
! pip install unidecode
! pip install transformers

Collecting unidecode
  Downloading Unidecode-1.3.4-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 5.0 MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.4
Collecting transformers
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 4.4 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 3.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 47.5 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 49.0 MB/s 
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.ma

In [1]:
seed = 8 

from transformers import AutoTokenizer, AutoModel, pipeline, AutoModelForTokenClassification
from tqdm import tqdm

import re
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
np.random.seed(seed)
np.random.RandomState(seed)
from sklearn.utils.class_weight import compute_class_weight

import random
random.seed(seed)

import torch
import torch.nn as nn
torch.manual_seed(seed)
torch.use_deterministic_algorithms(False)

# import jax
# jax.random.PRNGKey(seed)

from unidecode import unidecode

from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

import json


MAX_LENGTH = 128
EPOCHS = 2
BATCH_SIZE = 16
NUM_LAYERS_FROZEN = 8


In [2]:

with open("./nitro-lang-processing-1/train.json") as fin:
    raw_train_data = json.load(fin)
with open("./nitro-lang-processing-1/test.json") as fin:
    raw_test_data = json.load(fin)
with open("./nitro-lang-processing-1/tag_to_id.json") as fin:
    tag_to_id = json.load(fin)

def read_dataset(dataset, tokenizer, train=True):
    data = []
    labels = []
    max_length = 0
    reshaped_dataset = []
    reshaped_labels = []
    reshaped_length = 110
    for item in dataset:
        prelucrate_item = []
        for token in item['tokens']:
            prelucrate_item.append(re.sub(r"\W+", 'n', token))
        for i in range(0, len(prelucrate_item), reshaped_length):
            reshaped_dataset.append(prelucrate_item[i: min(i + reshaped_length, len(prelucrate_item))])
            print(item.keys())
            reshaped_labels.append( item['ner_ids'][i: min(i + reshaped_length, len(item['ner_ids']))])

    for index in range(len(reshaped_dataset)):
        items, sequence_labels =  reshaped_dataset[index], reshaped_labels[index]
        sequence = tokenizer(
            items,
            is_split_into_words=True,
            padding='max_length',
            truncation=True,
            max_length=MAX_LENGTH,
            return_offsets_mapping=True

        )
        sequence = {key: torch.as_tensor(value) for key, value in sequence.items()}
        data.append(sequence)

        if train:
            encoded_labels = np.ones(len(sequence["offset_mapping"]), dtype=int) * -100
            # set only labels whose first offset position is 0 and the second is not 0
            i = 0
            for idx, offsets in enumerate(sequence["offset_mapping"]):
                if offsets[0] == 0 and offsets[1] != 0:
                    # overwrite label
                    encoded_labels[idx] = sequence_labels[i]
                    i += 1

            # max_length = max(len(sequence), max_length)
            labels.append(torch.as_tensor(encoded_labels))
    # print(max_length)
    if train:
        return data, labels

    return data

def read_dataset_old(dataset, tokenizer, train=True):
    data = []
    labels = []
    max_length = 0

    for item in dataset:
        prelucrate_item = []
        for token in item['tokens']:
            prelucrate_item.append(re.sub(r"\W+", 'n', token))
        sequence = tokenizer(
            prelucrate_item,
            is_split_into_words=True,
            padding='max_length',
            truncation=True,
            max_length=MAX_LENGTH,
            return_offsets_mapping=True

        )
        sequence = {key: torch.as_tensor(value) for key, value in sequence.items()}
        data.append(sequence)

        if train:
            encoded_labels = np.ones(len(sequence["offset_mapping"]), dtype=int) * -100
            sequence_labels = item['ner_ids']
            # set only labels whose first offset position is 0 and the second is not 0
            i = 0
            for idx, offsets in enumerate(sequence["offset_mapping"]):
                if offsets[0] == 0 and offsets[1] != 0:
                    # overwrite label
                    encoded_labels[idx] = sequence_labels[i]
                    i += 1

            # max_length = max(len(sequence), max_length)
            labels.append(torch.as_tensor(encoded_labels))
    # print(max_length)
    if train:
        return data, labels

    return data

tokenizer = AutoTokenizer.from_pretrained("dumitrescustefan/bert-base-romanian-cased-v1")

train_data, train_labels = read_dataset(raw_train_data, tokenizer=tokenizer)


dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'tokens', 'space_after'])
dict_keys(['ner_tags', 'ner_ids', 'token

In [3]:
X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.2, random_state=seed)

model = AutoModelForTokenClassification.from_pretrained("dumitrescustefan/bert-base-romanian-cased-v1", num_labels=len(tag_to_id))
# for param in model.bert.parameters():
#     param.requires_grad = False

# print(model.bert.encoder.layer)

for param in model.bert.embeddings.parameters():
    param.requires_grad = False
for layer in model.bert.encoder.layer[:NUM_LAYERS_FROZEN]:
    for param in layer.parameters():
        param.requires_grad = False
# ner_model = pipeline('ner', model=model, tokenizer=tokenizer)

Some weights of the model checkpoint at dumitrescustefan/bert-base-romanian-cased-v1 were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification we

In [4]:
test = tokenizer(
    ["testând", "de", "trei", "ori"],
    is_split_into_words=True,
    padding='max_length',
    return_offsets_mapping=True,
    truncation=False,
    max_length=4
)
print(test)
print(tokenizer.convert_ids_to_tokens([4231, 476]))
print(tokenizer.convert_tokens_to_ids(['test', '##ând']))
print(tokenizer.convert_ids_to_tokens([23570]))



{'input_ids': [2, 4231, 476, 363, 1206, 644, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'offset_mapping': [(0, 0), (0, 4), (4, 7), (0, 2), (0, 4), (0, 3), (0, 0)]}
['test', '##ând']
[4231, 476]
['șarpe']


In [5]:
print(train_labels[0])

tensor([-100,    0,    3,    0,    0,    0,    0,    0,    0,    4,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0, -100,    0,    1,
           0,    0,    0,    0,    1, -100,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0,    1,
           0,    1, -100,    0,    1, -100, -100,    0,    0,    1,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100])


In [None]:
def pad(samples, max_length):

    return torch.tensor([
        sample[:max_length] + [0] * max(0, max_length - len(sample))
        for sample in samples
    ])

In [7]:
# padded_train_data = pad(train_data, 563)
# padded_train_data[0]

In [8]:
# print(padded_train_data.shape)

In [5]:
class MyDataset(Dataset):
    def __init__(self, data, labels):
        super().__init__()
        self.data = data
        self.labels = labels

    def __getitem__(self, index):
        return self.data[index], self.labels[index]

    def __len__(self):
        return len(self.labels)

class TestDataset(Dataset):
    def __init__(self, data, labels):
        super().__init__()
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)


# print(model(torch.tensor(tokenizer.encode("Testing the model", add_special_tokens=True)).unsqueeze(0)))

# print(tokenizer.encode_plus(["Convorbiri", "literare", "."]))
print(tokenizer.convert_tokens_to_ids("[PAD]"))
print(tokenizer.convert_ids_to_tokens(10))


0
&


In [6]:
train_dataset = MyDataset(X_train, y_train)
validation_dataset = MyDataset(X_val, y_val)

In [10]:
print(len(X_train))
print(len(y_train))

9864
9864


In [7]:
# instantiate the DataLoaders
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)
validation_dataloader = DataLoader(
    dataset=validation_dataset,
    batch_size=BATCH_SIZE
)

In [8]:
def train_epoch(model, train_dataloader, loss_crt, optimizer, device):
    model.train()
    epoch_loss = 0.0
    epoch_acc = 0.0
    num_batches = len(train_dataloader)
    predictions = []
    labels = []
    for idx, batch in tqdm(enumerate(train_dataloader)):
        batch_data, batch_labels = batch
        sequence_ids = batch_data['input_ids'].to(device, dtype=torch.long)
        sequence_masks = batch_data['attention_mask'].to(device)
        batch_labels = batch_labels.to(device)

        raw_output = model(input_ids=sequence_ids, attention_mask=sequence_masks, labels=batch_labels)
        loss, output = raw_output['loss'], raw_output['logits']
        logits = output.view(-1, model.num_labels)
        batch_predictions = torch.argmax(logits, dim=1)

        proper_labels = batch_labels.view(-1) != -100
        # loss = loss_crt(logits, batch_labels.view(-1))

        filtered_labels = torch.masked_select(batch_labels.view(-1), proper_labels)
        filtered_predictions = torch.masked_select(batch_predictions, proper_labels)

        labels += filtered_labels.squeeze().tolist()
        predictions += filtered_predictions.tolist()

        batch_acc = accuracy_score(filtered_labels.cpu().numpy(), filtered_predictions.cpu().numpy())
        epoch_acc += batch_acc

        loss_scalar = loss.item()

        if idx % 500 == 0:
            print(epoch_acc/(idx + 1))
            print(batch_predictions)

        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=10
        )

        model.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss_scalar

    epoch_loss = epoch_loss/num_batches
    epoch_acc = epoch_acc/num_batches

    return epoch_loss, epoch_acc, labels

def eval_epoch(model, val_dataloader, loss_crt, device):
    model.eval()
    epoch_loss = 0.0
    epoch_acc = 0.0
    num_batches = len(val_dataloader)
    predictions = []
    labels = []
    with torch.no_grad():
        for idx, batch in tqdm(enumerate(val_dataloader)):
            batch_data, batch_labels = batch
            sequence_ids = batch_data['input_ids'].to(device, dtype=torch.long)
            sequence_masks = batch_data['attention_mask'].to(device)
            batch_labels = batch_labels.to(device)

            raw_output = model(input_ids=sequence_ids, attention_mask=sequence_masks, labels=batch_labels)
            loss, output = raw_output['loss'], raw_output['logits']
            logits = output.view(-1, model.num_labels)
            batch_predictions = torch.argmax(logits, dim=1)

            proper_labels = batch_labels.view(-1) != -100

            filtered_labels = torch.masked_select(batch_labels.view(-1), proper_labels)
            filtered_predictions = torch.masked_select(batch_predictions, proper_labels)

            labels += filtered_labels.squeeze().tolist()
            predictions += filtered_predictions.tolist()

            batch_acc = accuracy_score(filtered_labels.cpu().numpy(), filtered_predictions.cpu().numpy())
            epoch_acc += batch_acc

            loss_scalar = loss.item()

            epoch_loss += loss_scalar

    epoch_loss = epoch_loss/num_batches
    epoch_acc = epoch_acc/num_batches

    return epoch_loss, epoch_acc, labels

In [9]:
weights = np.zeros(16)
proper_labels = []
for sequence in y_train:
    mini_label = []
    for label in sequence:
        if label != -100:
            # mini_label.append(label)
            proper_labels.append(int(label))
            weights[label] += 1
    # proper_labels.append(mini_label)
max_weight = np.max(weights)
for index, weight in enumerate(weights):
    weights[index] = max_weight / weights[index]
print(weights)
weights = compute_class_weight(class_weight="balanced", classes=np.arange(0, 16), y=proper_labels)

print(weights)


[  1.          10.48788374  19.93010524  32.26937851  74.76745778
  81.25768849 113.21043538 651.35387674  58.29733096  19.25315861
 107.87981561  97.82950134 150.63494253  50.13481255 133.72693878
 118.66388989]
[ 0.08397729  0.88074402  1.67367617  2.70989486  6.27876826  6.82380022
  9.50710522 54.69893141  4.89565169  1.61682802  9.05945423  8.21545611
 12.64991379  4.21018554 11.23002551  9.96507153]


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# move the model to GPU (when available)
model.to(device)

# create a SGD optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=1, threshold=1e-2, verbose=True)
# optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
#
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=1, threshold=1e-3, verbose=True)

# set up loss function
loss_criterion = nn.CrossEntropyLoss(weight=torch.as_tensor(weights, dtype=torch.float).to(device))

train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
for epoch in range(1, EPOCHS+1):
    train_loss, train_accuracy, pepega_labels = train_epoch(model, train_dataloader, loss_criterion, optimizer, device)
    val_loss, val_accuracy, pepega_pepega_labels = eval_epoch(model, validation_dataloader, loss_criterion, device)
    scheduler.step(val_loss)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)
    print('\nEpoch %d'%(epoch))
    print('train loss: %10.8f, accuracy: %10.8f'%(train_loss, train_accuracy))
    print('val loss: %10.8f, accuracy: %10.8f'%(val_loss, val_accuracy))


2it [00:00,  7.18it/s]

0.04568527918781726
tensor([ 2, 13, 15,  ...,  1,  7,  1], device='cuda:0')


502it [01:12,  6.85it/s]

0.9234298506566244
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


638it [01:31,  6.96it/s]
160it [00:12, 12.79it/s]



Epoch 1
train loss: 0.24293727, accuracy: 0.92723086
val loss: 0.16688070, accuracy: 0.94887703


0it [00:00, ?it/s]

0.9197183098591549
tensor([0, 0, 0,  ..., 2, 1, 2], device='cuda:0')


502it [01:12,  6.98it/s]

0.9498916216432893
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


638it [01:31,  6.95it/s]
160it [00:12, 12.81it/s]


Epoch 2
train loss: 0.16057137, accuracy: 0.94913014
val loss: 0.16378039, accuracy: 0.94940926





In [62]:
! nvidia-smi

Sat Mar 26 23:33:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0    33W /  70W |  10000MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
model.save_pretrained("model_pretrained_Adam_128_16_god_seed")
torch.save(model.state_dict(), "model_Adam_128_16_clip_god_seed.pt")

In [27]:
# model.load_state_dict(torch.load("model_SGD_64_2.pt"))

In [28]:
# model.from_pretrained("model_pretrained_SGD_64_2")

In [None]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#
# # move the model to GPU (when available)
# model.to(device)
#
# # create a SGD optimizer
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# set up loss function
# loss_criterion = nn.CrossEntropyLoss()
# val_loss, val_accuracy, pepega_pepega_labels = eval_epoch(model, validation_dataloader, loss_criterion, device)
# print(val_accuracy)


In [13]:
def read_test(dataset, tokenizer):
    data = []
    max_length = 0
    counter = 0
    reshaped_dataset = []
    # reshaped_length = 4
    # for item in dataset:
    #     for i in range(0, len(item['tokens']), reshaped_length):
    #         reshaped_dataset.append(item['tokens'][i: min(i + reshaped_length, len(item['tokens']) ) ] )

    for item in dataset:
        reshaped_dataset.append(item['tokens'])

    for item in reshaped_dataset:
        # counter += len(item)
        prelucrate_item = []
        for token in item:
            prelucrate_item.append(re.sub(r"\W+", 'or', token))
        # print(prelucrate_item)
        sequence = tokenizer(
            prelucrate_item,
            is_split_into_words=True,
            padding="max_length",
            max_length=MAX_LENGTH,
            # truncation=True,
            return_offsets_mapping=True
        )
        # sequence = tokenizer.encode(
        #     prelucrate_item,
        #     is_pretokenized=True,
        # )
        sequence = {key: torch.as_tensor(value) for key, value in sequence.items()}
        data.append(sequence)

    #     if len(sequence['input_ids']) > max_length:
    #         print(tokenizer.convert_ids_to_tokens(sequence['input_ids']))
    #         print((sequence['offset_mapping']))
    #         print(item)
    #     max_length = max(len(sequence['input_ids']), max_length)
    # print(max_length)
    return data

reshaped_test_data = read_test(raw_test_data, tokenizer=tokenizer)

test_dataloader = DataLoader(
    dataset=reshaped_test_data,
    batch_size=BATCH_SIZE
)


In [14]:
def test_epoch(model, test_dataloader, device):
    model.eval()
    epoch_loss = 0.0
    num_batches = len(test_dataloader)
    predictions = []
    with torch.no_grad():
        for idx, batch in tqdm(enumerate(test_dataloader)):
            batch_data = batch
            sequence_ids = batch_data['input_ids'].to(device)
            sequence_masks = batch_data['attention_mask'].to(device)
            offset_mapping = batch_data['offset_mapping']

            raw_output = model(sequence_ids, attention_mask=sequence_masks)
            output =  raw_output['logits']
            logits = output.view(-1, model.num_labels)
            batch_predictions = torch.argmax(logits, dim=1)
            print(batch_predictions)


            filtered_predictions = []

            # raw_batch_predictions = torch.argmax(output, dim=2)
            # # print(offset_mapping.shape)
            # for index_bt, bt in enumerate(offset_mapping):
            #     for index, offset in enumerate(bt):
            #         if offset[0] == 0 and offset[1] != 0:
            #             filtered_predictions.append(raw_batch_predictions[index_bt][index])

            for index, offset in enumerate(offset_mapping.view(-1, 2)):
                if offset[0] == 0 and offset[1] != 0:
                    filtered_predictions.append(batch_predictions[index])

            predictions += filtered_predictions

    return predictions

all_predictions = test_epoch(model, test_dataloader, device)
print(len(all_predictions))

2it [00:00,  9.22it/s]

tensor([0, 0, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 2, 0,  ..., 0, 0, 0], device='cuda:0')


4it [00:00, 10.01it/s]

tensor([0, 0, 0,  ..., 2, 2, 0], device='cuda:0')
tensor([ 0, 14, 14,  ...,  0,  0,  0], device='cuda:0')
tensor([0, 2, 2,  ..., 0, 0, 0], device='cuda:0')


6it [00:00, 10.22it/s]

tensor([0, 4, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


9it [00:01,  8.07it/s]

tensor([0, 3, 0,  ..., 0, 0, 9], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


11it [00:01,  8.89it/s]

tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 0, 0, 0], device='cuda:0')


15it [00:01,  9.73it/s]

tensor([0, 3, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


17it [00:01,  9.74it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 2, 2,  ..., 0, 0, 0], device='cuda:0')


19it [00:02,  9.92it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 8, 8,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 0, 0, 9], device='cuda:0')


23it [00:02, 10.03it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 5,  ..., 3, 3, 0], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 2, 0], device='cuda:0')


25it [00:02,  9.97it/s]

tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 3, 3], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


29it [00:03, 10.24it/s]

tensor([0, 0, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([ 0,  0,  9,  ...,  0,  0, 13], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


31it [00:03, 10.25it/s]

tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


35it [00:03, 10.34it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')


37it [00:03, 10.30it/s]

tensor([0, 0, 0,  ..., 9, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 1, 0,  ..., 0, 0, 9], device='cuda:0')


41it [00:04, 10.31it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 3,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 9, 0,  ..., 1, 1, 1], device='cuda:0')


43it [00:04, 10.31it/s]

tensor([ 0,  0,  0,  ...,  1, 10,  1], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


47it [00:04, 10.25it/s]

tensor([ 0,  2,  5,  ..., 15,  0,  0], device='cuda:0')
tensor([ 0, 15, 15,  ...,  0,  3,  3], device='cuda:0')
tensor([0, 2, 2,  ..., 0, 0, 0], device='cuda:0')


49it [00:04, 10.23it/s]

tensor([0, 0, 0,  ..., 1, 1, 1], device='cuda:0')
tensor([0, 0, 0,  ..., 1, 1, 0], device='cuda:0')
tensor([0, 0, 1,  ..., 3, 3, 3], device='cuda:0')


53it [00:05, 10.34it/s]

tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


55it [00:05, 10.31it/s]

tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 2, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


59it [00:05, 10.36it/s]

tensor([ 0,  0,  0,  ..., 12, 12,  3], device='cuda:0')
tensor([0, 0, 3,  ..., 0, 9, 9], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


61it [00:06, 10.28it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([ 0,  0,  0,  ...,  0,  0, 15], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 4], device='cuda:0')


65it [00:06, 10.31it/s]

tensor([0, 3, 3,  ..., 0, 0, 3], device='cuda:0')
tensor([0, 3, 3,  ..., 0, 0, 3], device='cuda:0')
tensor([0, 0, 3,  ..., 0, 0, 0], device='cuda:0')


67it [00:06, 10.30it/s]

tensor([ 0,  0,  0,  ...,  0, 13,  0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


71it [00:07, 10.23it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 9, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 1, 1, 8], device='cuda:0')


73it [00:07, 10.23it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 9, 0, 0], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')


77it [00:07, 10.32it/s]

tensor([0, 0, 9,  ..., 0, 0, 3], device='cuda:0')
tensor([ 0,  0, 13,  ...,  0,  0,  0], device='cuda:0')
tensor([0, 4, 4,  ..., 0, 0, 0], device='cuda:0')


79it [00:07, 10.36it/s]

tensor([0, 3, 3,  ..., 3, 3, 3], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 6], device='cuda:0')


83it [00:08, 10.41it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 3, 3], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 1], device='cuda:0')


85it [00:08, 10.38it/s]

tensor([ 0, 14, 14,  ...,  0,  0,  0], device='cuda:0')
tensor([0, 0, 1,  ..., 0, 8, 8], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


89it [00:08, 10.16it/s]

tensor([0, 0, 3,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 1, 1, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


91it [00:09, 10.12it/s]

tensor([0, 6, 6,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 1, 1, 1], device='cuda:0')


95it [00:09, 10.15it/s]

tensor([0, 0, 6,  ..., 0, 0, 0], device='cuda:0')
tensor([ 0,  8,  8,  ...,  0,  0, 10], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


97it [00:09, 10.11it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 1], device='cuda:0')
tensor([0, 0, 0,  ..., 1, 0, 0], device='cuda:0')


101it [00:10, 10.12it/s]

tensor([ 0,  0,  0,  ..., 13, 13,  0], device='cuda:0')
tensor([0, 0, 7,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 0, 0, 0], device='cuda:0')


103it [00:10,  9.92it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


105it [00:10, 10.00it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 3], device='cuda:0')


109it [00:10, 10.05it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


111it [00:11, 10.12it/s]

tensor([ 0, 15, 15,  ...,  0,  1,  0], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 9], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 2], device='cuda:0')


115it [00:11, 10.20it/s]

tensor([0, 0, 0,  ..., 3, 0, 3], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


117it [00:11, 10.22it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 3,  ..., 0, 4, 4], device='cuda:0')


121it [00:11, 10.30it/s]

tensor([0, 3, 3,  ..., 0, 0, 9], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


123it [00:12, 10.25it/s]

tensor([0, 0, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 3], device='cuda:0')


125it [00:12, 10.14it/s]

tensor([0, 0, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([ 0, 13,  0,  ...,  0,  3,  3], device='cuda:0')
tensor([ 0,  0,  9,  ...,  0,  0, 13], device='cuda:0')


129it [00:12, 10.19it/s]

tensor([ 0,  0,  0,  ...,  2,  2, 10], device='cuda:0')
tensor([0, 0, 4,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


131it [00:12, 10.19it/s]

tensor([0, 0, 0,  ..., 3, 3, 0], device='cuda:0')
tensor([0, 8, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 3], device='cuda:0')


135it [00:13,  9.87it/s]

tensor([0, 3, 0,  ..., 0, 0, 9], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


137it [00:13, 10.01it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 1, 0, 0], device='cuda:0')
tensor([ 0, 10, 10,  ...,  0,  0,  8], device='cuda:0')


139it [00:13,  8.12it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


141it [00:14,  8.64it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


144it [00:14,  9.20it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 3, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 4,  ..., 0, 0, 0], device='cuda:0')


147it [00:14,  9.64it/s]

tensor([0, 0, 0,  ..., 0, 0, 3], device='cuda:0')
tensor([0, 1, 1,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 9,  ..., 0, 0, 0], device='cuda:0')


150it [00:15,  9.92it/s]

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


152it [00:15, 10.02it/s]

tensor([ 0, 11, 11, 11, 11, 11,  0,  0,  0, 11, 11, 11,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 11, 11,  0,  0,  0,  0,  0,  0,
        11, 11, 11, 11,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        11, 11,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         9,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  9,  9,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  9,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  9,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  9,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0, 




In [21]:
print(all_predictions)

[tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(4, device='cuda:0'), tensor(2, device='cuda:0'), tensor(2, device='cuda:0'), tensor(2, device='cuda:0'), tensor(2, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(11, device='cuda:0'), tensor(11, device='cuda:0'), tensor(11, device='cuda:0'), tensor(11, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, devic

In [15]:
g = open("adam_sample_128_16_god_seed.csv", "w")
idx = 0
g.write("Id,ner_label\n")
for pred in all_predictions:
  g.write(str(idx) + "," + str(pred.item()) + "\n")
  idx += 1
g.close()

In [33]:

print(re.sub(r'[\\\s+]', 't', '\\\\\\\nhahah    tester'))




tttthahahtttttester
