In [None]:
import json
import pandas as pd

def load_dataset(path, test=True):
    '''Convert samples in JSON to dataframe
    0 if the text is AI-generated
    1 if the text is human-generated
    '''
    data = []
    columns = ['id', 'text', 'label']
    with open(path) as f:
        lines = f.readlines()        
        if test:
            for line in lines:
                line_dict = json.loads(line)
                data.append([line_dict['id'], line_dict['text'], line_dict['label']])
        else:
            columns = columns[:-1]
            for line in lines:
                line_dict = json.loads(line)
                data.append([line_dict['id'], line_dict['text']])

    return pd.DataFrame(data, columns=columns).set_index('id')

In [None]:
import torch

import lightning.pytorch as pl

from torch import optim, nn, utils
from torch.utils.data import Dataset, DataLoader

from transformers import XLNetTokenizer, XLNetModel, AutoTokenizer, AlbertModel, AutoModel, DebertaV2Model, DebertaV2Tokenizer, ElectraModel, RobertaModel, AlbertTokenizer

import numpy as np

import math

from tqdm import tqdm

from argparse import ArgumentParser

# import pytorch_lightning as pl
import torch
from torchmetrics import Accuracy
from torch import nn
from torch.nn import functional as F
# from torch.optim import Adam
from torch.optim.optimizer import Optimizer

# from sklearn.model_selection import train_test_split

class LogisticRegression(pl.LightningModule):
    """
    Logistic regression model
    """

    def __init__(
        self,
        input_dim: int,
        num_classes: int,
        bias: bool = True,
        learning_rate: float = 1e-4,
        optimizer: Optimizer = optim.AdamW,
        l1_strength: float = 0.0,
        l2_strength: float = 0.0,
        **kwargs
    ):
        """
        Args:
            input_dim: number of dimensions of the input (at least 1)
            num_classes: number of class labels (binary: 2, multi-class: >2)
            bias: specifies if a constant or intercept should be fitted (equivalent to fit_intercept in sklearn)
            learning_rate: learning_rate for the optimizer
            optimizer: the optimizer to use (default='Adam')
            l1_strength: L1 regularization strength (default=None)
            l2_strength: L2 regularization strength (default=None)
        """
        super().__init__()
        self.save_hyperparameters()
        self.optimizer = optimizer
        # self.accuracy = Accuracy(task='multiclass', num_classes=2, top_k = 1)

        self.validation_step_outputs = []
        self.test_step_outputs = []

        self.linear = nn.Linear(in_features=self.hparams.input_dim, out_features=self.hparams.num_classes, bias=bias)

    def forward(self, x):
        y_hat = self.linear(x)
        return y_hat

    def training_step(self, batch, batch_idx):
        x, y = batch

        # flatten any input
        x = x.view(x.size(0), -1)

        y_hat = self(x)

        # PyTorch cross_entropy function combines log_softmax and nll_loss in single function
        loss = F.cross_entropy(y_hat, y, reduction='sum')

        # L1 regularizer
        if self.hparams.l1_strength > 0:
            l1_reg = sum(param.abs().sum() for param in self.parameters())
            loss += self.hparams.l1_strength * l1_reg

        # L2 regularizer
        if self.hparams.l2_strength > 0:
            l2_reg = sum(param.pow(2).sum() for param in self.parameters())
            loss += self.hparams.l2_strength * l2_reg

        loss /= x.size(0)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self(x)
        # acc = self.accuracy(y_hat, y)
        loss = F.cross_entropy(y_hat, y)
        self.log("val_loss", loss)

    # def on_validation_epoch_end(self):
    #     acc = torch.stack([x['acc'] for x in self.validation_step_outputs]).mean()
    #     val_loss = torch.stack([x['val_loss'] for x in self.validation_step_outputs]).mean()
    #     tensorboard_logs = {'val_ce_loss': val_loss, 'val_acc': acc}
    #     progress_bar_metrics = tensorboard_logs
    #     return {'val_loss': val_loss, 'log': tensorboard_logs, 'progress_bar': progress_bar_metrics}

    def test_step(self, batch, batch_idx):
        x, y = batch
        y = torch.argmax(y, dim=1)
        y_hat = torch.argmax(self(x), dim=1)
        accuracy = torch.sum(y == y_hat).item() / (len(y) * 1.0)
        self.log('test_acc', accuracy)

    # def on_test_epoch_end(self):
    #     acc = torch.stack([x['acc'] for x in self.test_step_outputs]).mean()
    #     test_loss = torch.stack([x['test_loss'] for x in self.test_step_outputs]).mean()
    #     tensorboard_logs = {'test_ce_loss': test_loss, 'test_acc': acc}
    #     progress_bar_metrics = tensorboard_logs
    #     return {'test_loss': test_loss, 'log': tensorboard_logs, 'progress_bar': progress_bar_metrics}

    def configure_optimizers(self):
        return self.optimizer(self.parameters(), lr = 0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)

    @staticmethod
    def add_model_specific_args(parent_parser):
        parser = ArgumentParser(parents=[parent_parser], add_help=False)
        parser.add_argument('--learning_rate', type=float, default=0.0001)
        parser.add_argument('--input_dim', type=int, default=None)
        parser.add_argument('--num_classes', type=int, default=None)
        parser.add_argument('--bias', default='store_true')
        parser.add_argument('--batch_size', type=int, default=16)
        return parser

class SoftMaxLit(pl.LightningModule):
    """
    Reference
    https://machinelearningmastery.com/introduction-to-softmax-classifier-in-pytorch/
    """
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        self.linear = torch.nn.Linear(n_inputs, n_outputs)
        self.softmax = nn.Softmax(dim=1)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.softmax(self.linear(x))
        
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        x, y = batch
        
        y_hat = self(x)

        loss = self.criterion(y_hat, y)
        
        self.log('train_loss', loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr = 0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
        return optimizer

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log("val_loss", loss)
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        y = torch.argmax(y, dim=1)
        y_hat = torch.argmax(self(x), dim=1)
        accuracy = torch.sum(y == y_hat).item() / (len(y) * 1.0)
        self.log('test_acc', accuracy)

class Data(Dataset):
    "The data for multi-class classification"
    def __init__(self, df, *, x=None, load_batch_size=None, tokenizer=None, pretrained=None):
        self.y, self.len = self._get_y_and_len_from_df(df)
        
        if x is not None:
            self.x = x
        else:
            self.x = self._get_x_from_df(df, load_batch_size, tokenizer, pretrained)
        
    def _get_x_from_df(self, df, load_batch_size, tokenizer, pretrained):
        docs = df['text'].tolist()
        inputs = tokenizer(docs, return_tensors="pt", padding=True)

        cls_arr = []
        for i, (x, y) in zip(tqdm(range(math.ceil(len(df) / load_batch_size))), self._get_x_y_from_df_with_batch(df, load_batch_size)):
            cls = pretrained(**{k: inputs[k][x:y] for k in list(inputs.keys())}).last_hidden_state[:, 0, :].detach()
#             cls = pretrained(**{'input_ids':inputs['input_ids'][x:y],'token_type_ids':inputs['token_type_ids'][x:y],'attention_mask':inputs['attention_mask'][x:y]}).last_hidden_state[:, 0, :].detach()
            cls_arr.append(cls)
        return torch.concat(cls_arr).type(torch.float32)
    
    def _get_y_and_len_from_df(self, df):
        dim_0 = df['text'].shape[0]
        matrix = np.zeros((dim_0,2))
        for i, y in enumerate(df['label'].tolist()):
            matrix[i][y] = 1
        return torch.from_numpy(matrix).type(torch.float32), dim_0

    def _get_x_y_from_df_with_batch(self, df, step_size):
        l = list(range(0, len(df), step_size))
        for ind, _ in enumerate(l):
            if l[ind] + step_size >= len(df):
                yield (l[ind], len(df))
            else:    
                yield (l[ind], l[ind + 1])

    def __getitem__(self, idx):
        "accessing one element in the dataset by index"
        return self.x[idx], self.y[idx] 
 
    def __len__(self):
        "size of the entire dataset"
        return self.len

    @staticmethod
    def concat(df, datasets):
        "concatenate dataset embeddings from x provided they are applied on the same df"
        x = torch.cat([dataset.x for dataset in datasets], 1)
        return Data(df, x=x)

# MODELS
class TransformerModel():
    # # XLNet: https://huggingface.co/docs/transformers/model_doc/xlnet # size = 768
    # # ALBERT: https://huggingface.co/docs/transformers/model_doc/albert # size = 768
    # # ELECTRA: 256
    # # Roberta: 768

    MODELS = {
        'albert': {'name': 'albert-base-v2', 'dim': 768,'tokenizer': AlbertTokenizer, 'pretrained': AlbertModel},
        'electra': {'name': 'google/electra-small-discriminator', 'dim': 256,'tokenizer': AutoTokenizer, 'pretrained': ElectraModel},
        'roberta': {'name': 'roberta-base', 'dim': 768,'tokenizer': AutoTokenizer, 'pretrained': RobertaModel},
        'xlnet': {'name': 'xlnet-base-cased', 'dim': 768, 'tokenizer': XLNetTokenizer, 'pretrained': XLNetModel}, 
    }

    def __init__(self, model_tag):
        if model_tag not in list(self.MODELS.keys()):
            raise ValueError(f'Invalid model: {model_tag}. Valide models are: {self.MODELS.join(" ")}')
        
        self.model_tag = model_tag
        self.dim = self.MODELS[model_tag]['dim']
        self.tokenizer = self.MODELS[model_tag]['tokenizer'].from_pretrained(self.MODELS[model_tag]['name'])
        self.pretrained = self.MODELS[model_tag]['pretrained'].from_pretrained(self.MODELS[model_tag]['name'])
        
    def dataset(self, df, dev, save=False, delete=False):
        # cur_df = df[:100] if dev else df
        dataset = Data(df, load_batch_size = 30, tokenizer=self.tokenizer, pretrained=self.pretrained)  # 10 > 30 > 40 yes # 4 is the best

        if save:
            torch.save(dataset.x, f"pretrained--dev={dev}--model={self.model_tag}.pt")
        
        if delete:
            del dataset.x
            torch.cuda.empty_cache()

        return dataset

def get_dataloaders(dataset, batch_size):
    train_dataset, val_dataset, test_dataset = utils.data.random_split(dataset,(0.8, 0.1, 0.1))
    train_dataloader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)
    val_dataloader = DataLoader(dataset = val_dataset, batch_size = batch_size, shuffle=True)
    test_dataloader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle=True)
    return {'train': train_dataloader, 'val': val_dataloader, 'test': test_dataloader}

In [1]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import ModelCheckpoint

import pytorch_lightning as plpl

import torch

# from helper import load_dataset
# from model import Data, SoftMaxLit, TransformerModel, get_dataloaders, LogisticRegression

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.device_count()

1

In [4]:
device = torch.cuda.current_device()

In [5]:
device

0

In [6]:
BATCH_SIZE = 4
NUM_EPOCH = 4
DEV = True

# Because DEV
df = load_dataset('../dataset/training.json', test=True)[:100]

model_names = ['albert', 'electra', 'roberta', 'xlnet']

In [7]:
for cur_model_name in model_names:
    TransformerModel(cur_model_name).dataset(df, DEV, save=True, delete=True)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.bias', 'predictions.dense.weight', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.decoder.bias', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|█████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:14<00:00,  3.71s/it]
Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_pr

In [8]:
checkpoints = []
for cur_model_name in model_names:
    cur_model_name
    cur_dataset_x = torch.load(f'pretrained--dev=True--model={cur_model_name}.pt')
    cur_data = Data(df, x=cur_dataset_x)
    cur_dataloaders = get_dataloaders(cur_data, BATCH_SIZE)
    cur_model = SoftMaxLit(TransformerModel.MODELS[cur_model_name]['dim'], 2)
    checkpoint_callback = ModelCheckpoint(
        save_top_k=1,
        monitor='val_loss',
        mode='min',
        filename=f'model={cur_model_name}--dev={DEV}' + '--{epoch}-{step}--{val_loss:.2f}'
    )

    trainer = pl.Trainer(callbacks=[checkpoint_callback], max_epochs=NUM_EPOCH)
    trainer.fit(model=cur_model, train_dataloaders=cur_dataloaders['train'], val_dataloaders=cur_dataloaders['val'])
    trainer.test(cur_model, dataloaders=cur_dataloaders['test'])
    checkpoints.append(checkpoint_callback.best_model_path)

    del cur_dataset_x
    del cur_data.x
    torch.cuda.empty_cache()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2023-09-16 20:14:57.203127: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-16 20:14:57.234589: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To 

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | linear    | Linear           | 514   
1 | softmax   | Softmax          | 0     
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
514       Trainable params
0         Non-trainable params
514       Total params
0.002     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | linear    | Linear           | 1.5 K 
1 | softmax   | Softmax          | 0     
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | linear    | Linear           | 1.5 K 
1 | softmax   | Softmax          | 0     
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

In [9]:
pretrained_datasets_x = [
    "pretrained--dev=True--model=albert.pt",
    "pretrained--dev=True--model=electra.pt",
    "pretrained--dev=True--model=roberta.pt",
    "pretrained--dev=True--model=xlnet.pt"
]

checkpoints = [
    'lightning_logs_02_10_2023_11_31/albert/checkpoints/model=albert--dev=False--epoch=272-step=30849--val_loss=0.36.ckpt',
    'lightning_logs_02_10_2023_11_31/electra/checkpoints/model=electra--dev=False--epoch=298-step=33787--val_loss=0.39.ckpt',
    'lightning_logs_02_10_2023_11_31/roberta/checkpoints/model=roberta--dev=False--epoch=299-step=33900--val_loss=0.35.ckpt',
    'lightning_logs_02_10_2023_11_31/xlnet/checkpoints/model=xlnet--dev=False--epoch=49-step=5650--val_loss=0.38.ckpt'
]

model_y_arr = []
for model_name, pretrained_dataset_x, ckpt in zip(model_names, pretrained_datasets_x, checkpoints):
    n_inputs = TransformerModel.MODELS[model_name]['dim']
    model = SoftMaxLit(n_inputs, 2).load_from_checkpoint(n_inputs=n_inputs, n_outputs=2, checkpoint_path=ckpt)
    x = torch.load(pretrained_dataset_x).to(device)
    y_hat = model(x)
    
    # Free up memory
    del x
    torch.cuda.empty_cache()
#     y_first = y_hat[:,0]
    y_first = y_hat
#     y_first = y_first.reshape((y_first.shape[0], 1))

    model_y_arr.append(y_first)

lr_dataset_x = torch.cat(model_y_arr, dim=1).detach()

In [10]:
NUM_EPOCH = 100

lr_dataset = Data(df, x=lr_dataset_x)
lr_dataloaders = get_dataloaders(lr_dataset, BATCH_SIZE)

# lr_model = LogisticRegression(input_dim=lr_dataset_x.shape[1], num_classes=2)
lr_model = SoftMaxLit(lr_dataset_x.shape[1], 2)

checkpoint_callback = ModelCheckpoint(
    save_top_k=1,
    monitor='val_loss',
    mode='min',
    filename=f'model=lr--dev={DEV}'
)

trainer = pl.Trainer(callbacks = [checkpoint_callback], max_epochs=NUM_EPOCH) # callbacks=[checkpoint_callback]
trainer.fit(model=lr_model, train_dataloaders=lr_dataloaders['train'], val_dataloaders=lr_dataloaders['val'])
trainer.test(lr_model, dataloaders=lr_dataloaders['test'])

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | linear    | Linear           | 18    
1 | softmax   | Softmax          | 0     
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
18        Trainable params
0         Non-trainable params
18        Total params
0.000     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test_acc': 1.0}]

In [11]:
x = lr_dataset.x.to(device)
lr_model.to(device)
lr_model(x)

tensor([[0.2016, 0.7984],
        [0.0704, 0.9296],
        [0.0551, 0.9449],
        [0.0864, 0.9136],
        [0.9796, 0.0204],
        [0.9885, 0.0115],
        [0.0472, 0.9528],
        [0.1729, 0.8271],
        [0.0630, 0.9370],
        [0.0547, 0.9453],
        [0.0522, 0.9478],
        [0.3348, 0.6652],
        [0.0708, 0.9292],
        [0.9475, 0.0525],
        [0.0675, 0.9325],
        [0.0664, 0.9336],
        [0.9421, 0.0579],
        [0.9724, 0.0276],
        [0.0552, 0.9448],
        [0.0654, 0.9346],
        [0.7427, 0.2573],
        [0.0918, 0.9082],
        [0.3929, 0.6071],
        [0.9803, 0.0197],
        [0.9251, 0.0749],
        [0.9760, 0.0240],
        [0.0715, 0.9285],
        [0.0447, 0.9553],
        [0.9605, 0.0395],
        [0.1610, 0.8390],
        [0.6432, 0.3568],
        [0.8098, 0.1902],
        [0.9887, 0.0113],
        [0.9903, 0.0097],
        [0.1242, 0.8758],
        [0.0752, 0.9248],
        [0.0696, 0.9304],
        [0.0526, 0.9474],
        [0.0

In [12]:
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
