## Imports

In [77]:
import pandas as pd
import numpy as np

import torch
from tqdm import tqdm
from torch import nn
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from timeit import default_timer as timer
from os import walk
from pathlib import Path
from sklearn.model_selection import train_test_split

## Data

In [78]:
ROOT_DATA_RAW = './'
HUMAN_JSON_FILE_NAME = 'human.jsonl'
HUMAN_JSON_PATH = f'{ROOT_DATA_RAW}/{HUMAN_JSON_FILE_NAME}'
MODELS_JSON_FOLDER_PATH = f'{ROOT_DATA_RAW}/machines'

In [79]:
BATCH_SIZE = 32
LSTM_UNITS = 256
LSTM_LAYERS = 5
EMBEDDING_SIZE = 512

In [80]:
TEST_SET_FRACTION = 0.3

In [81]:
df = pd.read_json(path_or_buf=HUMAN_JSON_PATH, lines=True)
df['is_llm'] = 0
df['dataset_name'] = Path(HUMAN_JSON_FILE_NAME).stem

In [82]:
dir_path, dir_names, file_names = next(walk(MODELS_JSON_FOLDER_PATH))

print(dir_names)
for file_name in file_names:
    temp_df = pd.read_json(path_or_buf=f'{MODELS_JSON_FOLDER_PATH}/{file_name}', lines=True)
    temp_df['is_llm'] = 1
    temp_df['dataset_name'] = Path(file_name).stem

    df = pd.concat([df, temp_df], ignore_index=True)

df.drop(labels=['id'], inplace=True, axis='columns')

[]


In [83]:
df.tail()

Unnamed: 0,text,is_llm,dataset_name
15213,Gabby Petito Case: Social Media Detectives and...,1,text-bison-002
15214,Search Intensifies for Missing Woman Gabby Pet...,1,text-bison-002
15215,University of Wisconsin Oshkosh Student Claims...,1,text-bison-002
15216,Gabby Petito Case: The Internet's Role in a Tr...,1,text-bison-002
15217,Gabby Petito Remembered as a 'Super Kind-Heart...,1,text-bison-002


### Tokenize

In [84]:
checkpoint = 'distilbert-base-cased'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [85]:
df['tokenized_text'] = tokenizer(list(df['text'].to_list()))['input_ids']

Token indices sequence length is longer than the specified maximum sequence length for this model (843 > 512). Running this sequence through the model will result in indexing errors


## Model

In [86]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [87]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, layers_num, device, output_size=1, dropout=0):
        super().__init__()

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.layers_num = layers_num
        self.output_size= output_size
        self.dropout = dropout
        self.device = device

        self.embed = nn.Embedding(self.vocab_size, self.embedding_size, device=self.device)

        self.lstm = nn.LSTM(
            input_size=self.embedding_size,
            hidden_size=self.hidden_size,
            num_layers=self.layers_num,
            batch_first=True,
            dropout=self.dropout,
            device=self.device
        )

        self.fc = nn.Linear(
            self.hidden_size,
            self.output_size
        )

    def forward(self, X, lengths):
        embeddings = self.embed(X)

        seq_output, (h_n, c_n) = self.lstm(embeddings)

        out = seq_output.sum(dim=1).div(lengths.float().unsqueeze(dim=1))
        logits = self.fc(out)
        return logits

## Dataset

In [88]:
class TextDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.size

    def __getitem__(self, index):
        return (
            self.X[index],
            self.y[index]
        )

In [89]:
def collate_fn(batch):
  # We want to sort the batch by seq length,
  # in order to make the computation more efficient
  batch = sorted(batch, key=lambda x: len(x[0]), reverse=True)

  inputs = [torch.LongTensor(x[0]).to(device) for x in batch]
  padded_input = nn.utils.rnn.pad_sequence(inputs, batch_first=True)

  lengths = torch.LongTensor([len(x[0]) for x in batch]).to(device)

  y = torch.FloatTensor(np.array([x[1] for x in batch])).reshape(-1, 1).to(device)

  return padded_input, lengths, y

## Train and test functions

In [90]:
def calculate_accuracy(y_true, y_hat):
    correct_pred = torch.eq(torch.sigmoid(y_hat).round(), y_true).sum().item()
    return (correct_pred / len(y_hat)) * 100

def train_step(model, dataloader, loss_fn, optimizer, device):

    model.train()

    train_loss, train_acc = 0, 0
    steps = 0

    for X, lengths, y in dataloader:
        X, y = X.to(device), y.to(device)

        y_hat = model(X, lengths)

        loss = loss_fn(y_hat, y)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc += calculate_accuracy(y_true=y, y_hat=y_hat)
        steps += 1

    return train_loss / steps, train_acc / steps


def test_step(model, dataloader, loss_fn, device):

    model.eval()

    test_loss, test_acc = 0, 0
    steps = 0

    with torch.inference_mode():
        for X, lengths, y in dataloader:

            X, y = X.to(device), y.to(device)

            y_hat = model(X, lengths)

            loss = loss_fn(y_hat, y)
            test_loss += loss.item()

            test_acc += calculate_accuracy(y_true=y, y_hat=y_hat)
            steps += 1

    return test_loss / steps, test_acc / steps

def train(model,
          train_dataloader,
          test_dataloader,
          optimizer,
          loss_fn,
          epochs,
          device):

    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
        }

    model.to(device)

    for epoch in tqdm(range(epochs)):

        start_time = timer()
        train_loss, train_acc = train_step(
            model=model,
            dataloader=train_dataloader,
            loss_fn=loss_fn,
            optimizer=optimizer,
            device=device,
        )
        end_time = timer()

        test_loss, test_acc = test_step(
            model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device,
        )

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f} | "
            f"time: {(end_time-start_time):.4f}"
        )

    return results

## Train on each LLM dataset separately and test against all others

In [91]:
human_df = df.loc[df['is_llm'] == 0]
human_train_df, human_test_df = train_test_split(human_df, test_size=TEST_SET_FRACTION)

In [92]:
for llm_name in df['dataset_name'].unique():
    if llm_name == Path(HUMAN_JSON_FILE_NAME).stem:
        continue

    llm_df = df.loc[df['dataset_name'] == llm_name]
    llm_train_df, llm_test_df = train_test_split(llm_df, test_size=TEST_SET_FRACTION)

    train_df = pd.concat([human_train_df, llm_train_df], ignore_index=True)
    test_df = pd.concat([human_test_df, llm_test_df], ignore_index=True)

    train_dataset = TextDataset(train_df['tokenized_text'], train_df['is_llm'])
    test_dataset = TextDataset(test_df['tokenized_text'], test_df['is_llm'])

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=False,
        collate_fn=collate_fn
    )

    test_dataloader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        drop_last=False,
        collate_fn=collate_fn
    )

    model = RNN(tokenizer.vocab_size, EMBEDDING_SIZE, LSTM_UNITS, LSTM_LAYERS, device, dropout=0.6).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.00008)
    loss_fn = nn.BCEWithLogitsLoss()

    print(f'Training model for {llm_name}...')

    current_results = train(
        model,
        train_dataloader,
        test_dataloader,
        optimizer,
        loss_fn,
        epochs=15,
        device=device
    )

    print(f'Finished training model for {llm_name}')

Training model for gemini-pro...


  7%|▋         | 1/15 [00:16<03:55, 16.83s/it]

Epoch: 1 | train_loss: 0.6938 | train_acc: 48.4375 | test_loss: 0.6914 | test_acc: 57.7806 | time: 14.5844


 13%|█▎        | 2/15 [00:34<03:41, 17.07s/it]

Epoch: 2 | train_loss: 0.6932 | train_acc: 47.3307 | test_loss: 0.6930 | test_acc: 51.3393 | time: 14.9668


 20%|██        | 3/15 [00:51<03:26, 17.21s/it]

Epoch: 3 | train_loss: 0.6933 | train_acc: 49.6094 | test_loss: 0.6927 | test_acc: 51.3393 | time: 15.1558


 27%|██▋       | 4/15 [01:08<03:09, 17.18s/it]

Epoch: 4 | train_loss: 0.6922 | train_acc: 54.3620 | test_loss: 0.6830 | test_acc: 62.5425 | time: 14.9263


 33%|███▎      | 5/15 [01:25<02:51, 17.16s/it]

Epoch: 5 | train_loss: 0.5223 | train_acc: 79.8828 | test_loss: 0.5038 | test_acc: 85.5655 | time: 14.9074


 40%|████      | 6/15 [01:42<02:33, 17.06s/it]

Epoch: 6 | train_loss: 0.2607 | train_acc: 92.7083 | test_loss: 0.3876 | test_acc: 88.4566 | time: 14.6521


 47%|████▋     | 7/15 [01:59<02:15, 16.99s/it]

Epoch: 7 | train_loss: 0.1371 | train_acc: 95.4427 | test_loss: 0.2068 | test_acc: 94.3452 | time: 14.6316


 53%|█████▎    | 8/15 [02:16<01:59, 17.07s/it]

Epoch: 8 | train_loss: 0.0767 | train_acc: 97.9167 | test_loss: 0.1795 | test_acc: 95.0468 | time: 14.9753


 60%|██████    | 9/15 [02:33<01:42, 17.06s/it]

Epoch: 9 | train_loss: 0.0485 | train_acc: 98.7630 | test_loss: 0.1842 | test_acc: 94.3027 | time: 14.8499


 67%|██████▋   | 10/15 [02:50<01:25, 17.00s/it]

Epoch: 10 | train_loss: 0.0343 | train_acc: 99.2188 | test_loss: 0.1877 | test_acc: 95.0893 | time: 14.6424


 73%|███████▎  | 11/15 [03:07<01:07, 16.98s/it]

Epoch: 11 | train_loss: 0.0225 | train_acc: 99.4141 | test_loss: 0.2278 | test_acc: 94.6003 | time: 14.6745


 80%|████████  | 12/15 [03:24<00:50, 16.99s/it]

Epoch: 12 | train_loss: 0.0201 | train_acc: 99.4792 | test_loss: 0.2337 | test_acc: 94.8980 | time: 14.8015


 87%|████████▋ | 13/15 [03:41<00:33, 16.94s/it]

Epoch: 13 | train_loss: 0.0152 | train_acc: 99.8698 | test_loss: 0.2731 | test_acc: 93.7075 | time: 14.6221


 93%|█████████▎| 14/15 [03:58<00:16, 16.97s/it]

Epoch: 14 | train_loss: 0.0092 | train_acc: 99.9349 | test_loss: 0.2307 | test_acc: 95.0468 | time: 14.7981


100%|██████████| 15/15 [04:15<00:00, 17.03s/it]


Epoch: 15 | train_loss: 0.0072 | train_acc: 100.0000 | test_loss: 0.2483 | test_acc: 95.3444 | time: 14.8279
Finished training model for gemini-pro
Training model for qwen-qwen1.5-72b-chat-8bit...


  7%|▋         | 1/15 [00:16<03:47, 16.22s/it]

Epoch: 1 | train_loss: 0.6944 | train_acc: 44.5964 | test_loss: 0.6932 | test_acc: 51.3393 | time: 14.2874


 13%|█▎        | 2/15 [00:32<03:30, 16.17s/it]

Epoch: 2 | train_loss: 0.6934 | train_acc: 50.1953 | test_loss: 0.6964 | test_acc: 51.3393 | time: 14.2219


 20%|██        | 3/15 [00:48<03:14, 16.18s/it]

Epoch: 3 | train_loss: 0.6931 | train_acc: 50.1302 | test_loss: 0.6965 | test_acc: 51.3393 | time: 14.2344


 27%|██▋       | 4/15 [01:04<02:58, 16.22s/it]

Epoch: 4 | train_loss: 0.6887 | train_acc: 53.9714 | test_loss: 0.7166 | test_acc: 64.4983 | time: 14.3569


 33%|███▎      | 5/15 [01:21<02:42, 16.28s/it]

Epoch: 5 | train_loss: 0.4376 | train_acc: 86.0677 | test_loss: 0.7804 | test_acc: 74.8724 | time: 14.4619


 40%|████      | 6/15 [01:37<02:25, 16.20s/it]

Epoch: 6 | train_loss: 0.3376 | train_acc: 92.5130 | test_loss: 0.4944 | test_acc: 86.6284 | time: 14.1011


 47%|████▋     | 7/15 [01:53<02:09, 16.22s/it]

Epoch: 7 | train_loss: 0.2095 | train_acc: 95.1172 | test_loss: 0.4620 | test_acc: 89.6046 | time: 14.3150


 53%|█████▎    | 8/15 [02:09<01:53, 16.22s/it]

Epoch: 8 | train_loss: 0.2368 | train_acc: 95.2474 | test_loss: 0.4575 | test_acc: 88.8180 | time: 14.2679


 60%|██████    | 9/15 [02:26<01:38, 16.34s/it]

Epoch: 9 | train_loss: 0.2685 | train_acc: 93.8802 | test_loss: 0.4591 | test_acc: 89.4133 | time: 14.6829


 67%|██████▋   | 10/15 [02:42<01:21, 16.28s/it]

Epoch: 10 | train_loss: 0.2109 | train_acc: 96.0938 | test_loss: 0.4849 | test_acc: 91.0927 | time: 14.2255


 73%|███████▎  | 11/15 [02:58<01:04, 16.18s/it]

Epoch: 11 | train_loss: 0.5172 | train_acc: 87.9557 | test_loss: 0.9275 | test_acc: 76.8920 | time: 14.0152


 80%|████████  | 12/15 [03:14<00:48, 16.25s/it]

Epoch: 12 | train_loss: 0.5852 | train_acc: 81.8359 | test_loss: 0.6266 | test_acc: 81.0587 | time: 14.4675


 87%|████████▋ | 13/15 [03:31<00:32, 16.35s/it]

Epoch: 13 | train_loss: 0.4676 | train_acc: 85.6771 | test_loss: 0.5540 | test_acc: 83.6947 | time: 14.6484


 93%|█████████▎| 14/15 [03:47<00:16, 16.35s/it]

Epoch: 14 | train_loss: 0.4477 | train_acc: 86.9141 | test_loss: 0.5151 | test_acc: 84.3963 | time: 14.4304


100%|██████████| 15/15 [04:03<00:00, 16.26s/it]


Epoch: 15 | train_loss: 0.4077 | train_acc: 88.2812 | test_loss: 0.5078 | test_acc: 84.9915 | time: 14.1918
Finished training model for qwen-qwen1.5-72b-chat-8bit
Training model for vicgalle-gpt2-open-instruct-v1...


  7%|▋         | 1/15 [00:16<03:48, 16.30s/it]

Epoch: 1 | train_loss: 0.6913 | train_acc: 49.0885 | test_loss: 0.7423 | test_acc: 51.3393 | time: 14.3991


 13%|█▎        | 2/15 [00:32<03:29, 16.12s/it]

Epoch: 2 | train_loss: 0.6911 | train_acc: 52.7995 | test_loss: 0.7130 | test_acc: 26.4668 | time: 14.1003


 20%|██        | 3/15 [00:48<03:13, 16.16s/it]

Epoch: 3 | train_loss: 0.6809 | train_acc: 61.1979 | test_loss: 0.6793 | test_acc: 52.1259 | time: 14.3308


 27%|██▋       | 4/15 [01:04<02:57, 16.16s/it]

Epoch: 4 | train_loss: 0.4790 | train_acc: 81.7708 | test_loss: 0.6594 | test_acc: 80.4847 | time: 14.2515


 33%|███▎      | 5/15 [01:20<02:41, 16.15s/it]

Epoch: 5 | train_loss: 0.3182 | train_acc: 89.9089 | test_loss: 0.7648 | test_acc: 77.5723 | time: 14.2381


 40%|████      | 6/15 [01:36<02:25, 16.13s/it]

Epoch: 6 | train_loss: 0.2698 | train_acc: 93.4896 | test_loss: 0.6430 | test_acc: 83.9498 | time: 14.1886


 47%|████▋     | 7/15 [01:53<02:09, 16.23s/it]

Epoch: 7 | train_loss: 0.2198 | train_acc: 94.5312 | test_loss: 0.7804 | test_acc: 81.6752 | time: 14.5645


 53%|█████▎    | 8/15 [02:09<01:53, 16.28s/it]

Epoch: 8 | train_loss: 0.2411 | train_acc: 94.7266 | test_loss: 0.5956 | test_acc: 85.2891 | time: 14.4852


 60%|██████    | 9/15 [02:26<01:37, 16.31s/it]

Epoch: 9 | train_loss: 0.1791 | train_acc: 96.9401 | test_loss: 0.6318 | test_acc: 85.7355 | time: 14.4669


 67%|██████▋   | 10/15 [02:42<01:21, 16.39s/it]

Epoch: 10 | train_loss: 0.1716 | train_acc: 97.0703 | test_loss: 0.6399 | test_acc: 86.3733 | time: 14.6837


 73%|███████▎  | 11/15 [02:58<01:05, 16.29s/it]

Epoch: 11 | train_loss: 0.1747 | train_acc: 96.6146 | test_loss: 0.6484 | test_acc: 87.1173 | time: 14.1742


 80%|████████  | 12/15 [03:14<00:48, 16.26s/it]

Epoch: 12 | train_loss: 0.1280 | train_acc: 97.8516 | test_loss: 0.6595 | test_acc: 86.5221 | time: 14.2702


 87%|████████▋ | 13/15 [03:31<00:32, 16.29s/it]

Epoch: 13 | train_loss: 0.1308 | train_acc: 97.7214 | test_loss: 0.6863 | test_acc: 85.9269 | time: 14.4557


 93%|█████████▎| 14/15 [03:47<00:16, 16.35s/it]

Epoch: 14 | train_loss: 0.1094 | train_acc: 98.5026 | test_loss: 0.7421 | test_acc: 87.2662 | time: 14.5900


100%|██████████| 15/15 [04:03<00:00, 16.27s/it]


Epoch: 15 | train_loss: 0.1264 | train_acc: 98.1771 | test_loss: 0.6729 | test_acc: 86.5221 | time: 14.3454
Finished training model for vicgalle-gpt2-open-instruct-v1
Training model for meta-llama-llama-2-70b-chat-hf...


  7%|▋         | 1/15 [00:17<03:58, 17.02s/it]

Epoch: 1 | train_loss: 0.6934 | train_acc: 50.1302 | test_loss: 0.6907 | test_acc: 48.6607 | time: 14.8429


 13%|█▎        | 2/15 [00:34<03:41, 17.05s/it]

Epoch: 2 | train_loss: 0.6929 | train_acc: 49.9349 | test_loss: 0.6881 | test_acc: 48.6607 | time: 14.9092


 20%|██        | 3/15 [00:51<03:24, 17.02s/it]

Epoch: 3 | train_loss: 0.6739 | train_acc: 54.2969 | test_loss: 0.5257 | test_acc: 69.7917 | time: 14.8181


 27%|██▋       | 4/15 [01:07<03:06, 16.96s/it]

Epoch: 4 | train_loss: 0.3676 | train_acc: 83.5938 | test_loss: 0.1625 | test_acc: 94.6429 | time: 14.6990


 33%|███▎      | 5/15 [01:24<02:49, 16.95s/it]

Epoch: 5 | train_loss: 0.1753 | train_acc: 94.4661 | test_loss: 0.1141 | test_acc: 96.1310 | time: 14.7509


 40%|████      | 6/15 [01:41<02:33, 17.01s/it]

Epoch: 6 | train_loss: 0.1331 | train_acc: 96.2240 | test_loss: 0.1626 | test_acc: 93.6012 | time: 14.9598


 47%|████▋     | 7/15 [01:58<02:15, 16.95s/it]

Epoch: 7 | train_loss: 0.1162 | train_acc: 96.6797 | test_loss: 0.1500 | test_acc: 93.8988 | time: 14.6819


 53%|█████▎    | 8/15 [02:16<01:59, 17.02s/it]

Epoch: 8 | train_loss: 0.0736 | train_acc: 98.5026 | test_loss: 0.1081 | test_acc: 95.8333 | time: 14.9845


 60%|██████    | 9/15 [02:33<01:42, 17.02s/it]

Epoch: 9 | train_loss: 0.0529 | train_acc: 98.6328 | test_loss: 0.0780 | test_acc: 97.1726 | time: 14.8765


 67%|██████▋   | 10/15 [02:50<01:25, 17.01s/it]

Epoch: 10 | train_loss: 0.0495 | train_acc: 98.8932 | test_loss: 0.2014 | test_acc: 93.7500 | time: 14.8431


 73%|███████▎  | 11/15 [03:07<01:08, 17.03s/it]

Epoch: 11 | train_loss: 0.0976 | train_acc: 98.1771 | test_loss: 0.2799 | test_acc: 96.7262 | time: 14.8686


 80%|████████  | 12/15 [03:24<00:51, 17.04s/it]

Epoch: 12 | train_loss: 0.1531 | train_acc: 98.5026 | test_loss: 0.4883 | test_acc: 94.4940 | time: 14.9144


 87%|████████▋ | 13/15 [03:41<00:34, 17.05s/it]

Epoch: 13 | train_loss: 0.1537 | train_acc: 96.4193 | test_loss: 0.4664 | test_acc: 88.4991 | time: 14.9159


 93%|█████████▎| 14/15 [03:58<00:17, 17.08s/it]

Epoch: 14 | train_loss: 0.1183 | train_acc: 94.7917 | test_loss: 0.3684 | test_acc: 92.4107 | time: 14.9745


100%|██████████| 15/15 [04:15<00:00, 17.01s/it]


Epoch: 15 | train_loss: 0.9800 | train_acc: 66.2760 | test_loss: 0.6217 | test_acc: 68.3036 | time: 14.5700
Finished training model for meta-llama-llama-2-70b-chat-hf
Training model for bigscience-bloomz-7b1...


  7%|▋         | 1/15 [00:16<03:47, 16.23s/it]

Epoch: 1 | train_loss: 0.6692 | train_acc: 50.1302 | test_loss: 0.7977 | test_acc: 51.3393 | time: 14.3564


 13%|█▎        | 2/15 [00:32<03:29, 16.12s/it]

Epoch: 2 | train_loss: 0.6568 | train_acc: 52.7344 | test_loss: 0.7779 | test_acc: 44.6641 | time: 14.1859


 20%|██        | 3/15 [00:48<03:13, 16.09s/it]

Epoch: 3 | train_loss: 0.6015 | train_acc: 67.3177 | test_loss: 1.0308 | test_acc: 50.8929 | time: 14.1630


 27%|██▋       | 4/15 [01:04<02:58, 16.18s/it]

Epoch: 4 | train_loss: 0.4547 | train_acc: 81.0547 | test_loss: 0.6791 | test_acc: 75.5102 | time: 14.4691


 33%|███▎      | 5/15 [01:20<02:42, 16.22s/it]

Epoch: 5 | train_loss: 0.2937 | train_acc: 90.1693 | test_loss: 0.6916 | test_acc: 79.6344 | time: 14.4287


 40%|████      | 6/15 [01:36<02:25, 16.17s/it]

Epoch: 6 | train_loss: 0.2364 | train_acc: 92.7734 | test_loss: 0.6532 | test_acc: 81.5689 | time: 14.1965


 47%|████▋     | 7/15 [01:53<02:09, 16.18s/it]

Epoch: 7 | train_loss: 0.2201 | train_acc: 93.2292 | test_loss: 0.6345 | test_acc: 82.5468 | time: 14.3113


 53%|█████▎    | 8/15 [02:09<01:53, 16.22s/it]

Epoch: 8 | train_loss: 0.1989 | train_acc: 93.8802 | test_loss: 0.6145 | test_acc: 84.2262 | time: 14.4481


 60%|██████    | 9/15 [02:25<01:37, 16.21s/it]

Epoch: 9 | train_loss: 0.1868 | train_acc: 95.2474 | test_loss: 0.6063 | test_acc: 84.6301 | time: 14.3292


 67%|██████▋   | 10/15 [02:41<01:21, 16.23s/it]

Epoch: 10 | train_loss: 0.1847 | train_acc: 96.1589 | test_loss: 0.6856 | test_acc: 82.3129 | time: 14.4030


 73%|███████▎  | 11/15 [02:58<01:04, 16.20s/it]

Epoch: 11 | train_loss: 0.1978 | train_acc: 95.7031 | test_loss: 0.5956 | test_acc: 84.7364 | time: 14.2635


 80%|████████  | 12/15 [03:14<00:48, 16.18s/it]

Epoch: 12 | train_loss: 0.1466 | train_acc: 96.6146 | test_loss: 0.5803 | test_acc: 85.2253 | time: 14.2810


 87%|████████▋ | 13/15 [03:30<00:32, 16.16s/it]

Epoch: 13 | train_loss: 0.1228 | train_acc: 97.4609 | test_loss: 0.5608 | test_acc: 87.0111 | time: 14.2365


 93%|█████████▎| 14/15 [03:46<00:16, 16.25s/it]

Epoch: 14 | train_loss: 0.1178 | train_acc: 98.4375 | test_loss: 0.6907 | test_acc: 84.2900 | time: 14.5855


100%|██████████| 15/15 [04:02<00:00, 16.20s/it]


Epoch: 15 | train_loss: 0.1075 | train_acc: 97.9818 | test_loss: 0.5824 | test_acc: 86.8622 | time: 14.2631
Finished training model for bigscience-bloomz-7b1
Training model for gpt-3.5-turbo-0125...


  7%|▋         | 1/15 [00:16<03:52, 16.60s/it]

Epoch: 1 | train_loss: 0.6919 | train_acc: 50.2604 | test_loss: 0.7161 | test_acc: 51.3393 | time: 14.6022


 13%|█▎        | 2/15 [00:33<03:34, 16.53s/it]

Epoch: 2 | train_loss: 0.6893 | train_acc: 53.3854 | test_loss: 0.7242 | test_acc: 48.7883 | time: 14.4855


 20%|██        | 3/15 [00:49<03:19, 16.59s/it]

Epoch: 3 | train_loss: 0.6893 | train_acc: 66.6667 | test_loss: 0.7234 | test_acc: 42.8784 | time: 14.6822


 27%|██▋       | 4/15 [01:06<03:01, 16.52s/it]

Epoch: 4 | train_loss: 0.5191 | train_acc: 79.3620 | test_loss: 0.7371 | test_acc: 79.9957 | time: 14.3946


 33%|███▎      | 5/15 [01:22<02:44, 16.46s/it]

Epoch: 5 | train_loss: 0.3934 | train_acc: 86.7839 | test_loss: 0.5457 | test_acc: 82.5680 | time: 14.3859


 40%|████      | 6/15 [01:38<02:28, 16.45s/it]

Epoch: 6 | train_loss: 0.2573 | train_acc: 94.4010 | test_loss: 0.5525 | test_acc: 88.6480 | time: 14.4267


 47%|████▋     | 7/15 [01:55<02:11, 16.49s/it]

Epoch: 7 | train_loss: 0.1929 | train_acc: 96.0286 | test_loss: 0.5503 | test_acc: 85.4379 | time: 14.5755


 53%|█████▎    | 8/15 [02:11<01:55, 16.46s/it]

Epoch: 8 | train_loss: 0.1717 | train_acc: 96.5495 | test_loss: 0.5186 | test_acc: 87.5638 | time: 14.3950


 60%|██████    | 9/15 [02:28<01:38, 16.42s/it]

Epoch: 9 | train_loss: 0.1702 | train_acc: 96.7448 | test_loss: 0.4634 | test_acc: 90.9226 | time: 14.3707


 67%|██████▋   | 10/15 [02:44<01:22, 16.48s/it]

Epoch: 10 | train_loss: 0.1301 | train_acc: 98.1120 | test_loss: 0.4644 | test_acc: 92.1131 | time: 14.6093


 73%|███████▎  | 11/15 [03:01<01:06, 16.56s/it]

Epoch: 11 | train_loss: 0.1154 | train_acc: 98.5026 | test_loss: 0.5408 | test_acc: 89.8810 | time: 14.7704


 80%|████████  | 12/15 [03:18<00:49, 16.55s/it]

Epoch: 12 | train_loss: 0.1657 | train_acc: 97.2005 | test_loss: 0.4761 | test_acc: 91.3265 | time: 14.5026


 87%|████████▋ | 13/15 [03:34<00:32, 16.49s/it]

Epoch: 13 | train_loss: 0.1406 | train_acc: 98.2422 | test_loss: 0.4840 | test_acc: 90.3274 | time: 14.3863


 93%|█████████▎| 14/15 [03:51<00:16, 16.52s/it]

Epoch: 14 | train_loss: 0.0961 | train_acc: 97.9167 | test_loss: 0.4994 | test_acc: 92.2619 | time: 14.5456


100%|██████████| 15/15 [04:07<00:00, 16.51s/it]


Epoch: 15 | train_loss: 0.1284 | train_acc: 98.4375 | test_loss: 0.4596 | test_acc: 92.4107 | time: 14.5498
Finished training model for gpt-3.5-turbo-0125
Training model for gpt-4-turbo-preview...


  7%|▋         | 1/15 [00:17<04:04, 17.44s/it]

Epoch: 1 | train_loss: 0.6932 | train_acc: 49.9349 | test_loss: 0.6851 | test_acc: 48.6607 | time: 15.2351


 13%|█▎        | 2/15 [00:34<03:46, 17.40s/it]

Epoch: 2 | train_loss: 0.6868 | train_acc: 52.1484 | test_loss: 0.6159 | test_acc: 71.2372 | time: 15.1618


 20%|██        | 3/15 [00:52<03:27, 17.32s/it]

Epoch: 3 | train_loss: 0.4620 | train_acc: 78.9062 | test_loss: 0.3218 | test_acc: 85.7355 | time: 14.9904


 27%|██▋       | 4/15 [01:09<03:09, 17.27s/it]

Epoch: 4 | train_loss: 0.1807 | train_acc: 93.4896 | test_loss: 0.1425 | test_acc: 94.6003 | time: 14.9578


 33%|███▎      | 5/15 [01:26<02:52, 17.26s/it]

Epoch: 5 | train_loss: 0.1101 | train_acc: 97.0052 | test_loss: 0.1211 | test_acc: 95.0468 | time: 15.0400


 40%|████      | 6/15 [01:43<02:35, 17.33s/it]

Epoch: 6 | train_loss: 0.1078 | train_acc: 96.8099 | test_loss: 0.0818 | test_acc: 97.1301 | time: 15.2684


 47%|████▋     | 7/15 [02:01<02:18, 17.35s/it]

Epoch: 7 | train_loss: 0.0598 | train_acc: 98.7630 | test_loss: 0.0542 | test_acc: 99.1071 | time: 15.1676


 53%|█████▎    | 8/15 [02:18<02:01, 17.30s/it]

Epoch: 8 | train_loss: 0.0484 | train_acc: 99.2188 | test_loss: 0.0573 | test_acc: 98.3631 | time: 14.9809


 60%|██████    | 9/15 [02:35<01:43, 17.33s/it]

Epoch: 9 | train_loss: 0.0419 | train_acc: 98.6979 | test_loss: 0.0642 | test_acc: 97.9167 | time: 15.1814


 67%|██████▋   | 10/15 [02:53<01:26, 17.37s/it]

Epoch: 10 | train_loss: 0.0557 | train_acc: 98.5677 | test_loss: 0.0316 | test_acc: 99.7024 | time: 15.2420


 73%|███████▎  | 11/15 [03:10<01:09, 17.27s/it]

Epoch: 11 | train_loss: 0.0336 | train_acc: 99.2188 | test_loss: 0.0372 | test_acc: 98.9583 | time: 14.8589


 80%|████████  | 12/15 [03:27<00:51, 17.27s/it]

Epoch: 12 | train_loss: 0.0209 | train_acc: 99.8047 | test_loss: 0.0472 | test_acc: 98.5119 | time: 15.0701


 87%|████████▋ | 13/15 [03:45<00:34, 17.31s/it]

Epoch: 13 | train_loss: 0.0202 | train_acc: 99.6745 | test_loss: 0.0369 | test_acc: 98.9583 | time: 15.1862


 93%|█████████▎| 14/15 [04:02<00:17, 17.34s/it]

Epoch: 14 | train_loss: 0.0177 | train_acc: 99.7396 | test_loss: 0.0386 | test_acc: 99.1071 | time: 15.1810


100%|██████████| 15/15 [04:19<00:00, 17.33s/it]


Epoch: 15 | train_loss: 0.0159 | train_acc: 99.6094 | test_loss: 0.0459 | test_acc: 98.6607 | time: 15.2021
Finished training model for gpt-4-turbo-preview
Training model for meta-llama-llama-2-7b-chat-hf...


  7%|▋         | 1/15 [00:16<03:52, 16.60s/it]

Epoch: 1 | train_loss: 0.6936 | train_acc: 50.2604 | test_loss: 0.7017 | test_acc: 51.2968 | time: 14.4131


 13%|█▎        | 2/15 [00:33<03:36, 16.67s/it]

Epoch: 2 | train_loss: 0.6930 | train_acc: 54.2969 | test_loss: 0.7003 | test_acc: 51.8920 | time: 14.5290


 20%|██        | 3/15 [00:50<03:20, 16.71s/it]

Epoch: 3 | train_loss: 0.6932 | train_acc: 52.1484 | test_loss: 0.7034 | test_acc: 51.3393 | time: 14.5847


 27%|██▋       | 4/15 [01:06<03:04, 16.75s/it]

Epoch: 4 | train_loss: 0.6927 | train_acc: 51.1068 | test_loss: 0.7010 | test_acc: 52.3384 | time: 14.6085


 33%|███▎      | 5/15 [01:23<02:47, 16.75s/it]

Epoch: 5 | train_loss: 0.6717 | train_acc: 58.6589 | test_loss: 0.5879 | test_acc: 66.7730 | time: 14.5859


 40%|████      | 6/15 [01:40<02:30, 16.74s/it]

Epoch: 6 | train_loss: 0.4189 | train_acc: 88.4115 | test_loss: 0.4903 | test_acc: 87.6488 | time: 14.5462


 47%|████▋     | 7/15 [01:57<02:14, 16.78s/it]

Epoch: 7 | train_loss: 0.2620 | train_acc: 93.0990 | test_loss: 0.5585 | test_acc: 81.5689 | time: 14.7058


 53%|█████▎    | 8/15 [02:14<01:58, 16.92s/it]

Epoch: 8 | train_loss: 0.2259 | train_acc: 94.0104 | test_loss: 0.7306 | test_acc: 82.8444 | time: 15.0157


 60%|██████    | 9/15 [02:31<01:41, 16.87s/it]

Epoch: 9 | train_loss: 0.2120 | train_acc: 94.2708 | test_loss: 0.5434 | test_acc: 87.2662 | time: 14.6113


 67%|██████▋   | 10/15 [02:48<01:24, 16.89s/it]

Epoch: 10 | train_loss: 0.2161 | train_acc: 94.1406 | test_loss: 0.4946 | test_acc: 87.3724 | time: 14.7731


 73%|███████▎  | 11/15 [03:04<01:07, 16.86s/it]

Epoch: 11 | train_loss: 0.1590 | train_acc: 96.3542 | test_loss: 0.7014 | test_acc: 88.9031 | time: 14.5843


 80%|████████  | 12/15 [03:21<00:50, 16.85s/it]

Epoch: 12 | train_loss: 0.1578 | train_acc: 96.9401 | test_loss: 0.8106 | test_acc: 87.4150 | time: 14.6748


 87%|████████▋ | 13/15 [03:38<00:33, 16.84s/it]

Epoch: 13 | train_loss: 0.1644 | train_acc: 96.5495 | test_loss: 0.6570 | test_acc: 89.1369 | time: 14.6532


 93%|█████████▎| 14/15 [03:55<00:16, 16.75s/it]

Epoch: 14 | train_loss: 0.2020 | train_acc: 96.7448 | test_loss: 0.5496 | test_acc: 92.0281 | time: 14.3429


100%|██████████| 15/15 [04:11<00:00, 16.77s/it]


Epoch: 15 | train_loss: 0.1428 | train_acc: 97.3958 | test_loss: 0.6013 | test_acc: 92.2194 | time: 14.3229
Finished training model for meta-llama-llama-2-7b-chat-hf
Training model for mistralai-mixtral-8x7b-instruct-v0.1...


  7%|▋         | 1/15 [00:17<04:04, 17.43s/it]

Epoch: 1 | train_loss: 0.6934 | train_acc: 49.6094 | test_loss: 0.6846 | test_acc: 48.6607 | time: 15.1411


 13%|█▎        | 2/15 [00:35<03:48, 17.56s/it]

Epoch: 2 | train_loss: 0.6883 | train_acc: 51.4323 | test_loss: 0.6546 | test_acc: 69.7917 | time: 15.3490


 20%|██        | 3/15 [00:52<03:31, 17.61s/it]

Epoch: 3 | train_loss: 0.6136 | train_acc: 66.1458 | test_loss: 0.3914 | test_acc: 86.3095 | time: 15.3852


 27%|██▋       | 4/15 [01:10<03:14, 17.65s/it]

Epoch: 4 | train_loss: 0.3613 | train_acc: 86.2630 | test_loss: 0.2611 | test_acc: 90.9226 | time: 15.4335


 33%|███▎      | 5/15 [01:27<02:55, 17.59s/it]

Epoch: 5 | train_loss: 0.2738 | train_acc: 89.5182 | test_loss: 0.2112 | test_acc: 92.8571 | time: 15.1780


 40%|████      | 6/15 [01:45<02:38, 17.58s/it]

Epoch: 6 | train_loss: 0.2028 | train_acc: 94.8568 | test_loss: 0.2398 | test_acc: 90.8376 | time: 15.2647


 47%|████▋     | 7/15 [02:03<02:20, 17.58s/it]

Epoch: 7 | train_loss: 0.1807 | train_acc: 95.1172 | test_loss: 0.2023 | test_acc: 91.5179 | time: 15.3044


 53%|█████▎    | 8/15 [02:20<02:03, 17.58s/it]

Epoch: 8 | train_loss: 0.1586 | train_acc: 94.9870 | test_loss: 0.2048 | test_acc: 92.9634 | time: 15.2796


 60%|██████    | 9/15 [02:38<01:45, 17.63s/it]

Epoch: 9 | train_loss: 0.1209 | train_acc: 96.8099 | test_loss: 0.1852 | test_acc: 93.7075 | time: 15.4324


 67%|██████▋   | 10/15 [02:55<01:27, 17.58s/it]

Epoch: 10 | train_loss: 0.1332 | train_acc: 96.4844 | test_loss: 0.2101 | test_acc: 90.9226 | time: 15.2016


 73%|███████▎  | 11/15 [03:13<01:10, 17.61s/it]

Epoch: 11 | train_loss: 0.0870 | train_acc: 97.7214 | test_loss: 0.1593 | test_acc: 94.3452 | time: 15.3335


 80%|████████  | 12/15 [03:31<00:52, 17.59s/it]

Epoch: 12 | train_loss: 0.0697 | train_acc: 98.2422 | test_loss: 0.1761 | test_acc: 94.4515 | time: 15.2718


 87%|████████▋ | 13/15 [03:48<00:35, 17.61s/it]

Epoch: 13 | train_loss: 0.0643 | train_acc: 98.1120 | test_loss: 0.1825 | test_acc: 94.8980 | time: 15.3869


 93%|█████████▎| 14/15 [04:06<00:17, 17.64s/it]

Epoch: 14 | train_loss: 0.0496 | train_acc: 98.5677 | test_loss: 0.1837 | test_acc: 94.3027 | time: 15.3788


100%|██████████| 15/15 [04:23<00:00, 17.60s/it]


Epoch: 15 | train_loss: 0.0475 | train_acc: 98.6328 | test_loss: 0.2114 | test_acc: 93.0697 | time: 15.2034
Finished training model for mistralai-mixtral-8x7b-instruct-v0.1
Training model for mistralai-mistral-7b-instruct-v0.2...


  7%|▋         | 1/15 [00:17<04:09, 17.82s/it]

Epoch: 1 | train_loss: 0.6898 | train_acc: 49.0234 | test_loss: 0.6785 | test_acc: 50.3827 | time: 15.4659


 13%|█▎        | 2/15 [00:35<03:53, 17.99s/it]

Epoch: 2 | train_loss: 0.6826 | train_acc: 54.4271 | test_loss: 0.6606 | test_acc: 69.2602 | time: 15.6490


 20%|██        | 3/15 [00:53<03:35, 17.99s/it]

Epoch: 3 | train_loss: 0.6062 | train_acc: 66.9271 | test_loss: 0.5329 | test_acc: 69.6003 | time: 15.6416


 27%|██▋       | 4/15 [01:11<03:17, 17.93s/it]

Epoch: 4 | train_loss: 0.3795 | train_acc: 82.7474 | test_loss: 0.2565 | test_acc: 90.7738 | time: 15.4845


 33%|███▎      | 5/15 [01:29<02:59, 17.95s/it]

Epoch: 5 | train_loss: 0.2414 | train_acc: 90.6250 | test_loss: 0.2173 | test_acc: 92.5595 | time: 15.6045


 40%|████      | 6/15 [01:47<02:40, 17.87s/it]

Epoch: 6 | train_loss: 0.2705 | train_acc: 90.1693 | test_loss: 0.1970 | test_acc: 92.9634 | time: 15.3676


 47%|████▋     | 7/15 [02:05<02:22, 17.85s/it]

Epoch: 7 | train_loss: 0.1818 | train_acc: 93.6849 | test_loss: 0.1886 | test_acc: 93.3036 | time: 15.4289


 53%|█████▎    | 8/15 [02:23<02:05, 17.90s/it]

Epoch: 8 | train_loss: 0.1534 | train_acc: 94.8568 | test_loss: 0.2897 | test_acc: 89.2857 | time: 15.6507


 60%|██████    | 9/15 [02:41<01:47, 17.93s/it]

Epoch: 9 | train_loss: 0.1454 | train_acc: 94.8568 | test_loss: 0.1766 | test_acc: 93.6012 | time: 15.6428


 67%|██████▋   | 10/15 [02:59<01:29, 18.00s/it]

Epoch: 10 | train_loss: 0.1144 | train_acc: 96.6146 | test_loss: 0.2231 | test_acc: 92.1769 | time: 15.7791


 73%|███████▎  | 11/15 [03:17<01:12, 18.04s/it]

Epoch: 11 | train_loss: 0.1140 | train_acc: 96.0938 | test_loss: 0.1760 | test_acc: 93.1548 | time: 15.7914


 80%|████████  | 12/15 [03:35<00:53, 17.97s/it]

Epoch: 12 | train_loss: 0.1110 | train_acc: 96.6797 | test_loss: 0.4880 | test_acc: 86.9685 | time: 15.4425


 87%|████████▋ | 13/15 [03:53<00:35, 17.99s/it]

Epoch: 13 | train_loss: 0.1724 | train_acc: 94.2708 | test_loss: 0.1755 | test_acc: 94.0476 | time: 15.7016


 93%|█████████▎| 14/15 [04:11<00:17, 17.89s/it]

Epoch: 14 | train_loss: 0.1091 | train_acc: 97.1354 | test_loss: 0.2000 | test_acc: 93.7500 | time: 15.3105


100%|██████████| 15/15 [04:29<00:00, 17.95s/it]


Epoch: 15 | train_loss: 0.0857 | train_acc: 97.0703 | test_loss: 0.1951 | test_acc: 92.7721 | time: 15.7776
Finished training model for mistralai-mistral-7b-instruct-v0.2
Training model for alpaca-7b...


  7%|▋         | 1/15 [00:15<03:43, 15.99s/it]

Epoch: 1 | train_loss: 0.5743 | train_acc: 48.2422 | test_loss: 0.7250 | test_acc: 51.3393 | time: 14.2319


 13%|█▎        | 2/15 [00:32<03:28, 16.04s/it]

Epoch: 2 | train_loss: 0.4767 | train_acc: 66.4062 | test_loss: 0.7669 | test_acc: 56.7815 | time: 14.3082


 20%|██        | 3/15 [00:48<03:12, 16.06s/it]

Epoch: 3 | train_loss: 0.2678 | train_acc: 92.3177 | test_loss: 0.7847 | test_acc: 77.5510 | time: 14.3170


 27%|██▋       | 4/15 [01:04<02:57, 16.14s/it]

Epoch: 4 | train_loss: 0.2570 | train_acc: 93.3594 | test_loss: 0.7679 | test_acc: 71.3223 | time: 14.4905


 33%|███▎      | 5/15 [01:20<02:41, 16.12s/it]

Epoch: 5 | train_loss: 0.2268 | train_acc: 94.2057 | test_loss: 0.5741 | test_acc: 85.8206 | time: 14.3238


 40%|████      | 6/15 [01:36<02:24, 16.05s/it]

Epoch: 6 | train_loss: 0.1764 | train_acc: 96.0286 | test_loss: 0.7112 | test_acc: 76.6156 | time: 14.1388


 47%|████▋     | 7/15 [01:52<02:08, 16.12s/it]

Epoch: 7 | train_loss: 0.1341 | train_acc: 97.0052 | test_loss: 0.5101 | test_acc: 87.7126 | time: 14.5232


 53%|█████▎    | 8/15 [02:08<01:52, 16.09s/it]

Epoch: 8 | train_loss: 0.1303 | train_acc: 97.9818 | test_loss: 0.4759 | test_acc: 88.4566 | time: 14.2486


 60%|██████    | 9/15 [02:24<01:36, 16.04s/it]

Epoch: 9 | train_loss: 0.0958 | train_acc: 98.5677 | test_loss: 0.4343 | test_acc: 90.5825 | time: 14.1307


 67%|██████▋   | 10/15 [02:40<01:19, 15.97s/it]

Epoch: 10 | train_loss: 0.0945 | train_acc: 98.6979 | test_loss: 0.4282 | test_acc: 91.3265 | time: 14.0659


 73%|███████▎  | 11/15 [02:56<01:03, 16.00s/it]

Epoch: 11 | train_loss: 0.0794 | train_acc: 99.4141 | test_loss: 0.4412 | test_acc: 90.1361 | time: 14.2994


 80%|████████  | 12/15 [03:12<00:47, 15.95s/it]

Epoch: 12 | train_loss: 0.0724 | train_acc: 99.4141 | test_loss: 0.4800 | test_acc: 88.3503 | time: 14.0780


 87%|████████▋ | 13/15 [03:28<00:32, 16.03s/it]

Epoch: 13 | train_loss: 0.0559 | train_acc: 99.6094 | test_loss: 0.4300 | test_acc: 90.5825 | time: 14.4175


 93%|█████████▎| 14/15 [03:44<00:15, 15.98s/it]

Epoch: 14 | train_loss: 0.0450 | train_acc: 99.7396 | test_loss: 0.4509 | test_acc: 90.1361 | time: 14.1195


100%|██████████| 15/15 [04:00<00:00, 16.05s/it]


Epoch: 15 | train_loss: 0.0671 | train_acc: 99.6745 | test_loss: 0.3940 | test_acc: 92.7083 | time: 14.6012
Finished training model for alpaca-7b
Training model for chavinlo-alpaca-13b...


  7%|▋         | 1/15 [00:16<03:48, 16.34s/it]

Epoch: 1 | train_loss: 0.5925 | train_acc: 51.4323 | test_loss: 0.6670 | test_acc: 51.7432 | time: 14.3605


 13%|█▎        | 2/15 [00:33<03:35, 16.58s/it]

Epoch: 2 | train_loss: 0.5217 | train_acc: 61.4583 | test_loss: 0.6160 | test_acc: 68.4736 | time: 14.7516


 20%|██        | 3/15 [00:49<03:17, 16.43s/it]

Epoch: 3 | train_loss: 0.3578 | train_acc: 88.8021 | test_loss: 0.6508 | test_acc: 79.8469 | time: 14.2665


 27%|██▋       | 4/15 [01:05<03:01, 16.48s/it]

Epoch: 4 | train_loss: 0.2820 | train_acc: 90.2344 | test_loss: 0.5680 | test_acc: 82.9719 | time: 14.5886


 33%|███▎      | 5/15 [01:22<02:46, 16.65s/it]

Epoch: 5 | train_loss: 0.2062 | train_acc: 94.8568 | test_loss: 0.4977 | test_acc: 85.2041 | time: 14.9516


 40%|████      | 6/15 [01:39<02:29, 16.57s/it]

Epoch: 6 | train_loss: 0.1507 | train_acc: 97.0703 | test_loss: 0.4209 | test_acc: 89.7534 | time: 14.4386


 47%|████▋     | 7/15 [01:55<02:12, 16.50s/it]

Epoch: 7 | train_loss: 0.1017 | train_acc: 97.9167 | test_loss: 0.4119 | test_acc: 91.8367 | time: 14.3594


 53%|█████▎    | 8/15 [02:11<01:55, 16.45s/it]

Epoch: 8 | train_loss: 0.0972 | train_acc: 98.8281 | test_loss: 0.4286 | test_acc: 91.7304 | time: 14.3549


 60%|██████    | 9/15 [02:28<01:38, 16.44s/it]

Epoch: 9 | train_loss: 0.0818 | train_acc: 98.6979 | test_loss: 0.3424 | test_acc: 94.1539 | time: 14.4404


 67%|██████▋   | 10/15 [02:44<01:22, 16.47s/it]

Epoch: 10 | train_loss: 0.0780 | train_acc: 99.4792 | test_loss: 0.3609 | test_acc: 93.2611 | time: 14.5661


 73%|███████▎  | 11/15 [03:01<01:05, 16.43s/it]

Epoch: 11 | train_loss: 0.0709 | train_acc: 99.2839 | test_loss: 0.3370 | test_acc: 95.0468 | time: 14.3526


 80%|████████  | 12/15 [03:17<00:49, 16.43s/it]

Epoch: 12 | train_loss: 0.0694 | train_acc: 99.4141 | test_loss: 0.3377 | test_acc: 94.7491 | time: 14.4468


 87%|████████▋ | 13/15 [03:33<00:32, 16.34s/it]

Epoch: 13 | train_loss: 0.0688 | train_acc: 99.5443 | test_loss: 0.3563 | test_acc: 93.8563 | time: 14.1423


 93%|█████████▎| 14/15 [03:50<00:16, 16.33s/it]

Epoch: 14 | train_loss: 0.0653 | train_acc: 99.5443 | test_loss: 0.4151 | test_acc: 91.9218 | time: 14.3075


100%|██████████| 15/15 [04:06<00:00, 16.44s/it]


Epoch: 15 | train_loss: 0.0545 | train_acc: 99.7396 | test_loss: 0.3338 | test_acc: 94.7491 | time: 14.4943
Finished training model for chavinlo-alpaca-13b
Training model for text-bison-002...


  7%|▋         | 1/15 [00:17<04:01, 17.24s/it]

Epoch: 1 | train_loss: 0.7282 | train_acc: 49.9349 | test_loss: 0.6958 | test_acc: 46.8750 | time: 15.0010


 13%|█▎        | 2/15 [00:34<03:42, 17.13s/it]

Epoch: 2 | train_loss: 0.6929 | train_acc: 49.9349 | test_loss: 0.6964 | test_acc: 43.0060 | time: 14.8135


 20%|██        | 3/15 [00:51<03:25, 17.16s/it]

Epoch: 3 | train_loss: 0.6931 | train_acc: 47.7214 | test_loss: 0.6951 | test_acc: 47.7679 | time: 14.9594


 27%|██▋       | 4/15 [01:08<03:09, 17.23s/it]

Epoch: 4 | train_loss: 0.6928 | train_acc: 50.1302 | test_loss: 0.6949 | test_acc: 48.3631 | time: 15.0969


 33%|███▎      | 5/15 [01:26<02:52, 17.25s/it]

Epoch: 5 | train_loss: 0.6932 | train_acc: 46.9401 | test_loss: 0.6976 | test_acc: 32.2917 | time: 15.0620


 40%|████      | 6/15 [01:43<02:35, 17.27s/it]

Epoch: 6 | train_loss: 0.6929 | train_acc: 49.0885 | test_loss: 0.6949 | test_acc: 48.3631 | time: 15.0031


 47%|████▋     | 7/15 [02:00<02:18, 17.25s/it]

Epoch: 7 | train_loss: 0.6933 | train_acc: 47.9167 | test_loss: 0.6957 | test_acc: 45.5357 | time: 14.9649


 53%|█████▎    | 8/15 [02:17<02:00, 17.28s/it]

Epoch: 8 | train_loss: 0.6937 | train_acc: 50.3906 | test_loss: 0.7030 | test_acc: 51.3393 | time: 15.1060


 60%|██████    | 9/15 [02:35<01:43, 17.32s/it]

Epoch: 9 | train_loss: 0.6955 | train_acc: 49.7396 | test_loss: 0.6999 | test_acc: 45.3231 | time: 15.1240


 67%|██████▋   | 10/15 [02:52<01:26, 17.35s/it]

Epoch: 10 | train_loss: 0.6939 | train_acc: 50.0000 | test_loss: 0.7011 | test_acc: 49.6173 | time: 15.1693


 73%|███████▎  | 11/15 [03:09<01:09, 17.26s/it]

Epoch: 11 | train_loss: 0.6939 | train_acc: 49.2839 | test_loss: 0.6975 | test_acc: 33.5672 | time: 14.8383


 80%|████████  | 12/15 [03:27<00:51, 17.29s/it]

Epoch: 12 | train_loss: 0.6927 | train_acc: 49.1536 | test_loss: 0.6934 | test_acc: 48.6607 | time: 15.0782


 87%|████████▋ | 13/15 [03:44<00:34, 17.27s/it]

Epoch: 13 | train_loss: 0.6971 | train_acc: 49.3490 | test_loss: 0.7010 | test_acc: 51.3393 | time: 14.9905


 93%|█████████▎| 14/15 [04:01<00:17, 17.22s/it]

Epoch: 14 | train_loss: 0.6935 | train_acc: 48.8281 | test_loss: 0.6945 | test_acc: 42.4107 | time: 14.8754


100%|██████████| 15/15 [04:18<00:00, 17.25s/it]

Epoch: 15 | train_loss: 0.6955 | train_acc: 50.1953 | test_loss: 0.7043 | test_acc: 51.3393 | time: 14.9792
Finished training model for text-bison-002



