<a href="https://colab.research.google.com/github/appletreeleaf/Project/blob/main/Project1_TextClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import requirements

In [None]:
!pip install transformers
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pdb
import argparse
from dataclasses import dataclass, field
from typing import Optional
from collections import defaultdict


import torch
from torch.nn.utils.rnn import pad_sequence

import numpy as np
from tqdm import tqdm, trange

from transformers import (
    BertForSequenceClassification,
    BertTokenizer,
    AutoConfig,
    AdamW
)

# 1. Preprocess

In [None]:
def make_id_file(task, tokenizer):
    def make_data_strings(file_name):
        data_strings = []
        with open(os.path.join(file_name), 'r', encoding='utf-8') as f: # txt파일을 읽기 모드로
            id_file_data = [tokenizer.encode(line.lower()) for line in f.readlines()] # lower case seq를 tokenize
        for item in id_file_data:
            data_strings.append(' '.join([str(k) for k in item])) # 정수 encoding된 tokens를 문장으로 바꿔서 list에 추가함
        return data_strings

    print('it will take some times...')
    train_pos = make_data_strings('sentiment.train.1')
    train_neg = make_data_strings('sentiment.train.0')
    dev_pos = make_data_strings('sentiment.dev.1')
    dev_neg = make_data_strings('sentiment.dev.0')

    print('make id file finished!')
    return train_pos, train_neg, dev_pos, dev_neg

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
# load data with pickle
import pickle
datum = []
file_list = ['train_pos', 'train_neg', 'dev_pos', 'dev_neg']
for name in file_list:
    with open(f'/content/drive/MyDrive/Groom_8th_project1/{name}.pkl', 'rb') as f:
        datum.append(pickle.load(f))
train_pos, train_neg, dev_pos, dev_neg = datum[0], datum[1], datum[2], datum[3]

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
new_train_pos, train_remain = train_test_split(train_pos, test_size=0.9)
new_train_neg, train_remain = train_test_split(train_neg, test_size=0.9)

In [None]:
class SentimentDataset(object):
    def __init__(self, tokenizer, pos, neg):
        self.tokenizer = tokenizer
        self.data = []
        self.label = []

        for pos_sent in pos:
            self.data += [self._cast_to_int(pos_sent.strip().split())] # 공백 기준으로 split
            self.label += [[1]] # 긍정은 1
        for neg_sent in neg:
            self.data += [self._cast_to_int(neg_sent.strip().split())]
            self.label += [[0]] # 부정은 0

    def _cast_to_int(self, sample):
        return [int(word_id) for word_id in sample]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sample = self.data[index]
        return np.array(sample), np.array(self.label[index])

In [None]:
train_dataset = SentimentDataset(tokenizer, train_pos, train_neg)
dev_dataset = SentimentDataset(tokenizer, dev_pos, dev_neg)

In [None]:
for i, item in enumerate(train_dataset):
    print(item)
    if i == 10:
        break

(array([ 101, 6581, 2833, 1012,  102]), array([1]))
(array([  101, 21688,  8013,  2326,  1012,   102]), array([1]))
(array([  101,  2027,  2036,  2031,  3679, 19247,  1998,  3256,  6949,
        2029,  2003,  2428,  2204,  1012,   102]), array([1]))
(array([  101,  2009,  1005,  1055,  1037,  2204, 15174,  2098,  7570,
       22974,  2063,  1012,   102]), array([1]))
(array([ 101, 1996, 3095, 2003, 5379, 1012,  102]), array([1]))
(array([ 101, 2204, 3347, 2833, 1012,  102]), array([1]))
(array([ 101, 2204, 2326, 1012,  102]), array([1]))
(array([  101, 11350,  1997,  2154,  2003, 25628,  1998,  7167,  1997,
       19247,  1012,   102]), array([1]))
(array([  101,  2307,  2173,  2005,  6265,  2030,  3347, 27962,  1998,
        5404,  1012,   102]), array([1]))
(array([ 101, 1996, 2047, 2846, 3504, 6429, 1012,  102]), array([1]))
(array([ 101, 2023, 2173, 2001, 2200, 2204, 1012,  102]), array([1]))


In [None]:
def collate_fn_style(samples):
    input_ids, labels = zip(*samples)
    max_len = max(len(input_id) for input_id in input_ids)
    sorted_indices = np.argsort([len(input_id) for input_id in input_ids])[::-1]
    attention_mask = torch.tensor(
        [[1] * len(input_ids[index]) + [0] * (max_len - len(input_ids[index])) for index in   # bug?
         sorted_indices])
    input_ids = pad_sequence([torch.tensor(input_ids[index]) for index in sorted_indices],
                             batch_first=True)
    token_type_ids = torch.tensor([[0] * len(input_ids[index]) for index in sorted_indices])
    position_ids = torch.tensor([list(range(len(input_ids[index]))) for index in sorted_indices])
    labels = torch.tensor(np.stack(labels, axis=0)[sorted_indices])

    return input_ids, attention_mask, token_type_ids, position_ids, labels

In [None]:
train_batch_size = 48
eval_batch_size= 48

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=train_batch_size,
                                           shuffle=True, collate_fn=collate_fn_style,
                                           pin_memory=True, num_workers=2)
dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=eval_batch_size,
                                         shuffle=False, collate_fn=collate_fn_style,
                                         num_workers=2)

In [None]:
# set random seed
random_seed=42
np.random.seed(random_seed)
torch.manual_seed(random_seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

# 2. Train

In [None]:
model.train()
learning_rate = 5e-5      #[5e-3, 5e-4, 5e-5, 5e-6, 5e-7]
optimizer = AdamW(model.parameters(), lr=learning_rate, eps = 1e-08)
train_epoch = 2
dropout = torch.nn.Dropout(p=0.2)



In [None]:
def compute_acc(predictions, target_labels):
    return (np.array(predictions) == np.array(target_labels)).mean()

In [None]:
wandb.init(
    project='goorm_team_1',
    name='BERT_tuning')

[34m[1mwandb[0m: Currently logged in as: [33m02younge[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
lowest_valid_loss = 9999.
# lr scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer,
                                        step_size=3,
                                        gamma=0.3,
                                        verbose=False)

for epoch in range(train_epoch):
    with tqdm(train_loader, unit="batch") as tepoch:
        for iteration, (input_ids, attention_mask, token_type_ids, position_ids, labels) in enumerate(tepoch):
            tepoch.set_description(f"Epoch {epoch}")
            # Tensor를 gpu로 이동
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            token_type_ids = token_type_ids.to(device)
            position_ids = position_ids.to(device)
            labels = labels.to(device, dtype=torch.long)

            optimizer.zero_grad()

            output = model(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           position_ids=position_ids,
                           labels=labels)

            loss = output.loss
            loss.backward()

            optimizer.step()
            tepoch.set_postfix(loss=loss.item())
            if iteration != 0 and iteration % int(len(train_loader) / 5) == 0:
                # Evaluate the model five times per epoch
                with torch.no_grad():
                    model.eval()
                    valid_losses = []
                    predictions = []
                    target_labels = []
                    for input_ids, attention_mask, token_type_ids, position_ids, labels in tqdm(dev_loader,
                                                                                                desc='Eval',
                                                                                                position=1,
                                                                                                leave=None):
                        input_ids = input_ids.to(device)
                        attention_mask = attention_mask.to(device)
                        token_type_ids = token_type_ids.to(device)
                        position_ids = position_ids.to(device)
                        labels = labels.to(device, dtype=torch.long)

                        output = model(input_ids=input_ids,
                                       attention_mask=attention_mask,
                                       token_type_ids=token_type_ids,
                                       position_ids=position_ids,
                                       labels=labels)

                        logits = output.logits
                        loss = output.loss
                        valid_losses.append(loss.item())

                        batch_predictions = [0 if example[0] > example[1] else 1 for example in logits]
                        batch_labels = [int(example) for example in labels]

                        predictions += batch_predictions
                        target_labels += batch_labels

                acc = compute_acc(predictions, target_labels)
                valid_loss = sum(valid_losses) / len(valid_losses)
                wandb.log({'train_loss': loss,
                           'valid_loss': valid_loss,
                           'valid_acc': acc})

            model.train()

        if lowest_valid_loss > valid_loss:
          lowset_valid_loss = valid_loss
          print('Acc for model which have lower valid loss: ', acc)
          torch.save(model.state_dict(), "./pytorch_model.bin")
    scheduler.step()

Epoch 0:  20%|██        | 1847/9235 [06:19<24:45,  4.97batch/s, loss=0.0079]
Eval:   0%|          | 0/84 [00:00<?, ?it/s][A
Eval:   1%|          | 1/84 [00:00<00:14,  5.89it/s][A
Eval:   4%|▎         | 3/84 [00:00<00:07, 11.48it/s][A
Eval:   6%|▌         | 5/84 [00:00<00:05, 13.95it/s][A
Eval:   8%|▊         | 7/84 [00:00<00:05, 14.98it/s][A
Eval:  11%|█         | 9/84 [00:00<00:04, 15.60it/s][A
Eval:  13%|█▎        | 11/84 [00:00<00:04, 16.31it/s][A
Eval:  15%|█▌        | 13/84 [00:00<00:04, 16.28it/s][A
Eval:  18%|█▊        | 15/84 [00:01<00:04, 16.24it/s][A
Eval:  20%|██        | 17/84 [00:01<00:04, 16.34it/s][A
Eval:  23%|██▎       | 19/84 [00:01<00:03, 16.52it/s][A
Eval:  25%|██▌       | 21/84 [00:01<00:03, 16.37it/s][A
Eval:  27%|██▋       | 23/84 [00:01<00:03, 16.69it/s][A
Eval:  30%|██▉       | 25/84 [00:01<00:03, 16.77it/s][A
Eval:  32%|███▏      | 27/84 [00:01<00:03, 16.64it/s][A
Eval:  35%|███▍      | 29/84 [00:01<00:03, 16.81it/s][A
Eval:  37%|███▋      | 31

Acc for model which have lower valid loss:  0.97425


Epoch 1:  20%|██        | 1847/9235 [06:18<24:29,  5.03batch/s, loss=0.00789]
Eval:   0%|          | 0/84 [00:00<?, ?it/s][A
Eval:   1%|          | 1/84 [00:00<00:16,  4.93it/s][A
Eval:   4%|▎         | 3/84 [00:00<00:07, 10.29it/s][A
Eval:   6%|▌         | 5/84 [00:00<00:06, 13.06it/s][A
Eval:   8%|▊         | 7/84 [00:00<00:05, 14.23it/s][A
Eval:  11%|█         | 9/84 [00:00<00:05, 14.98it/s][A
Eval:  13%|█▎        | 11/84 [00:00<00:04, 15.69it/s][A
Eval:  15%|█▌        | 13/84 [00:00<00:04, 15.79it/s][A
Eval:  18%|█▊        | 15/84 [00:01<00:04, 15.79it/s][A
Eval:  20%|██        | 17/84 [00:01<00:04, 15.68it/s][A
Eval:  23%|██▎       | 19/84 [00:01<00:04, 16.14it/s][A
Eval:  25%|██▌       | 21/84 [00:01<00:03, 16.05it/s][A
Eval:  27%|██▋       | 23/84 [00:01<00:03, 16.41it/s][A
Eval:  30%|██▉       | 25/84 [00:01<00:03, 16.46it/s][A
Eval:  32%|███▏      | 27/84 [00:01<00:03, 16.35it/s][A
Eval:  35%|███▍      | 29/84 [00:01<00:03, 16.39it/s][A
Eval:  37%|███▋      | 3

Acc for model which have lower valid loss:  0.9725


Epoch 2:   1%|          | 92/9235 [00:19<32:09,  4.74batch/s, loss=0.725]


KeyboardInterrupt: ignored

# 3. Inference

In [None]:
import pandas as pd
test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/test_no_label.csv')

In [None]:
test_dataset = test_df['Id']

In [None]:
def make_id_file_test(tokenizer, test_dataset):
    data_strings = []
    id_file_data = [tokenizer.encode(sent.lower()) for sent in test_dataset]
    for item in id_file_data:
        data_strings.append(' '.join([str(k) for k in item]))
    return data_strings
#O

In [None]:
test = make_id_file_test(tokenizer, test_dataset)

In [None]:
test[:10]

['101 2009 1005 1055 1037 2878 2047 3325 1998 2047 26389 2169 2051 2017 2175 1012 102',
 '101 2061 15640 2013 2019 2214 5440 1012 102',
 '101 2009 2003 1996 2087 14469 7273 1999 1996 3028 1012 102',
 '101 2079 2025 3696 1037 10084 2007 2122 2111 1012 102',
 '101 1045 2001 6091 1998 2016 2081 2033 2514 2061 6625 1998 6160 1012 102',
 '101 1996 2069 2518 2057 2363 2008 2001 2980 2001 1996 4157 1012 102',
 '101 2053 1010 2025 1996 3924 2012 2004 2226 1010 1996 3924 1999 3502 2152 1012 102',
 '101 2027 3288 2009 2041 2392 2005 2017 1998 2024 2200 14044 1012 102',
 '101 4606 1996 12043 2106 1050 1005 1056 2130 2113 2129 2000 2147 1996 3274 1012 102',
 '101 2027 2031 2019 6581 4989 1997 25025 2015 2000 5454 2013 1012 102']

In [None]:
class SentimentTestDataset(object):
    def __init__(self, tokenizer, test):
        self.tokenizer = tokenizer
        self.data = []

        for sent in test:
            self.data += [self._cast_to_int(sent.strip().split())]

    def _cast_to_int(self, sample):
        return [int(word_id) for word_id in sample]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sample = self.data[index]
        return np.array(sample)

In [None]:
test_dataset = SentimentTestDataset(tokenizer, test)

In [None]:
def collate_fn_style_test(samples):
    input_ids = samples
    max_len = max(len(input_id) for input_id in input_ids)
    attention_mask = torch.tensor([[1] * len(input_id) + [0] * (max_len - len(input_id)) for input_id in input_ids])
    input_ids = pad_sequence([torch.tensor(input_id) for input_id in input_ids], batch_first=True)
    token_type_ids = torch.tensor([[0] * len(input_id) for input_id in input_ids])
    position_ids = torch.tensor([list(range(len(input_id))) for input_id in input_ids])
    return input_ids, attention_mask, token_type_ids, position_ids

In [None]:
test_batch_size = 32
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size,
                                          shuffle=False, collate_fn=collate_fn_style_test,
                                          num_workers=2)

In [None]:
with torch.no_grad(): # inference시에 gradient는 쓸모가 없으므로 autograd engine을 꺼버린다.
    model.eval() # training과 inference시에 다르게 동작하는 layer들이 존재하므로, eval(inference) 모드로 바꿔줌줌
    predictions = []
    for input_ids, attention_mask, token_type_ids, position_ids in tqdm(test_loader,
                                                                        desc='Test',
                                                                        position=1,
                                                                        leave=None):

        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        token_type_ids = token_type_ids.to(device)
        position_ids = position_ids.to(device)

        output = model(input_ids=input_ids,
                       attention_mask=attention_mask,
                       token_type_ids=token_type_ids,
                       position_ids=position_ids)

        logits = output.logits
        print(logits)
        batch_predictions = [0 if example[0] > example[1] else 1 for example in logits]
        predictions += batch_predictions


Test:   0%|          | 0/32 [00:00<?, ?it/s][A
Test:   3%|▎         | 1/32 [00:00<00:05,  5.65it/s][A
Test:   9%|▉         | 3/32 [00:00<00:02, 10.65it/s][A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  


Test:  16%|█▌        | 5/32 [00:00<00:02, 12.99it/s][A
Test:  25%|██▌       | 8/32 [00:00<00:01, 16.38it/s][A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  


Test:  34%|███▍      | 11/32 [00:00<00:01, 19.07it/s][A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  


Test:  44%|████▍     | 14/32 [00:00<00:00, 20.54it/s][A
Test:  53%|█████▎    | 17/32 [00:00<00:00, 21.46it/s][A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  


Test:  62%|██████▎   | 20/32 [00:01<00:00, 22.60it/s][A
Test:  72%|███████▏  | 23/32 [00:01<00:00, 23.32it/s][A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  


Test:  81%|████████▏ | 26/32 [00:01<00:00, 23.55it/s][A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  


Test:  91%|█████████ | 29/32 [00:01<00:00, 23.09it/s][A
Test: 100%|██████████| 32/32 [00:01<00:00, 24.70it/s][A
                                                     [A

tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769]], device='cuda:0')
tensor([[-0.0946,  0.4769],
        [-0.0946,  0.4769],
        [-0.0946,  0.4769],
  

# 4. Submission

In [None]:
test_df['Category'] = predictions

In [None]:
test_df.to_csv('submission_Try_7.csv', index=False)