In [3]:
#!pip3 install datasets

In [2]:
#!pip3 install transformers -U;

In [3]:
#!pip3 install accelerate -U

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import classification_report
from sklearn.model_selection import cross_validate
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

import matplotlib.pyplot as plt

import torch
import tensorflow as tf

from sklearn.model_selection import train_test_split

import datasets
from datasets import Dataset, DatasetDict

from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
from transformers import Trainer




  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# !git clone https://github.com/cse151a-DrugReviewAnalysis/DrugReviewAnalysis.git

In [4]:
# Get dataset for working locally
df_train = pd.read_csv("drugsTrain_processed_for_Model_3.csv", sep="\t")
df_test = pd.read_csv("drugsTest_processed_for_Model_3.csv", sep="\t")

In [7]:
# Get dataset when working on colab
# df_train = pd.read_csv("DrugReviewAnalysis/Model 3/drugsTrain_processed_for_Model_3.csv", sep="\t")
# df_test = pd.read_csv("DrugReviewAnalysis/Model 3/drugsTest_processed_for_Model_3.csv", sep="\t")

In [5]:
sample_size = 0.2

In [6]:
df_train = df_train.sample(frac=sample_size, random_state=42).reset_index()
df_test = df_test.sample(frac=sample_size, random_state=42).reset_index()

In [7]:
df_train.shape, df_test.shape

((31900, 8), (10640, 8))

In [8]:
df_val, df_test_new = train_test_split(df_test, test_size=0.6, random_state=42)

In [9]:
ds_train = Dataset.from_pandas(df_train[["processed_review", "rating"]])
ds_val = Dataset.from_pandas(df_val.reset_index()[["processed_review", "rating"]])
ds_test = Dataset.from_pandas(df_test_new.reset_index()[["processed_review", "rating"]])

In [10]:
ds = DatasetDict()

ds["train"] = ds_train
ds["validation"] = ds_val
ds["test"] = ds_test

In [14]:
ds

DatasetDict({
    train: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 31900
    })
    validation: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 4256
    })
    test: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 6384
    })
})

In [11]:
# dataset including usefulCount a part of the review to compare against purely the reviews

# local
df_train_useful = pd.read_csv("drugsTrain_processed_with_usefulCount.csv", sep="\t")
df_test_useful = pd.read_csv("drugsTest_processed_with_usefulCount.csv", sep="\t")

#colab
# df_train = pd.read_csv("DrugReviewAnalysis/drugsTrain_processed_for_Model_3.csv", sep="\t")
# df_test = pd.read_csv("DrugReviewAnalysis/drugsTest_processed_for_Model_3.csv", sep="\t")

df_train_useful = df_train_useful.sample(frac=sample_size, random_state=42).reset_index()
df_test_useful = df_test_useful.sample(frac=sample_size, random_state=42).reset_index()

df_val_useful, df_test_new_useful = train_test_split(df_test_useful, test_size=0.6, random_state=42)

ds_train_useful = Dataset.from_pandas(df_train_useful[["processed_review", "rating"]])
ds_val_useful = Dataset.from_pandas(df_val_useful.reset_index()[["processed_review", "rating"]])
ds_test_useful = Dataset.from_pandas(df_test_new_useful.reset_index()[["processed_review", "rating"]])

ds_useful = DatasetDict()

ds_useful["train"] = ds_train_useful
ds_useful["validation"] = ds_val_useful
ds_useful["test"] = ds_test_useful

ds_useful

DatasetDict({
    train: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 31900
    })
    validation: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 4256
    })
    test: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 6384
    })
})

In [12]:
# dataset including preprocessing from original set of models

# local
df_train_prev = pd.read_csv("../drugsTrain_processed.csv", sep="\t")
df_test_prev = pd.read_csv("../drugsTest_processed.csv", sep="\t")

# colab
# df_train = pd.read_csv("DrugReviewAnalysis/drugsTrain_processed.csv", sep="\t")
# df_test = pd.read_csv("DrugReviewAnalysis/drugsTest_processed.csv", sep="\t")

df_train_prev = df_train_prev.sample(frac=sample_size, random_state=42).reset_index()
df_test_prev = df_test_prev.sample(frac=sample_size, random_state=42).reset_index()

df_val_prev, df_test_new_prev = train_test_split(df_test_prev, test_size=0.6, random_state=42)

ds_train_prev = Dataset.from_pandas(df_train_prev[["processed_review", "rating"]])
df_val_prev = Dataset.from_pandas(df_val_prev.reset_index()[["processed_review", "rating"]])
df_test_new_prev = Dataset.from_pandas(df_test_new_prev.reset_index()[["processed_review", "rating"]])

ds_prev = DatasetDict()

ds_prev["train"] = ds_train_prev
ds_prev["validation"] = df_val_prev
ds_prev["test"] = df_test_new_prev

ds_prev

DatasetDict({
    train: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 31900
    })
    validation: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 4256
    })
    test: Dataset({
        features: ['processed_review', 'rating'],
        num_rows: 6384
    })
})

In [13]:
ds["test"][0], ds_prev["test"][0], ds_useful["test"][0]

({'processed_review': 'Brought daily migraine headaches down to 1-4 a month',
  'rating': 9.0},
 {'processed_review': ' brought daily migraine headache 14 month ',
  'rating': 9.0},
 {'processed_review': 'Brought daily migraine headaches down to 1-4 a month 18 users found this comment helpful.',
  'rating': 9.0})

In [14]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [15]:
def preprocess(samples):
  text = samples["processed_review"]
  encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_attention_mask=True)
  encoding["labels"] = samples["rating"]
  return encoding

### Predicting with usefulCount wrapped into the review

In [16]:
encoded_dataset = ds_useful.map(preprocess, batched=True, remove_columns=ds_useful["train"].column_names)

Map: 100%|██████████| 31900/31900 [00:03<00:00, 9433.47 examples/s]
Map: 100%|██████████| 4256/4256 [00:00<00:00, 6527.63 examples/s]
Map: 100%|██████████| 6384/6384 [00:00<00:00, 9327.82 examples/s] 


In [17]:
encoded_dataset.set_format("torch")

In [18]:
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=1)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:

args = TrainingArguments(
    output_dir="comparison/bert-compare-with-usefulCount",
    # output_dir = "DrugReviewAnalysis/Model 3/comparison/bert-compare-with-usefulCount", # for colab
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
)

In [19]:
def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  mse = mean_squared_error(labels, predictions)
  pred_modified = np.where(predictions < 10.0, np.round(predictions), 10.0)
  print(labels)
  acc = accuracy_score(labels, pred_modified)
  f1 = f1_score(labels, pred_modified, average="macro")
  return {"mse": mse, "accuracy-approximation": acc, "f1-approximation": f1}

In [23]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [24]:
trainer.train()

 20%|██        | 499/2495 [02:59<09:33,  3.48it/s]

[ 9.  1.  7. ... 10. 10.  1.]


                                                  
 20%|██        | 499/2495 [03:08<09:33,  3.48it/s]Checkpoint destination directory comparison/bert-compare-with-usefulCount\checkpoint-499 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.6881163120269775, 'eval_mse': 3.6881163120269775, 'eval_accuracy-approximation': 0.3317669172932331, 'eval_f1-approximation': 0.22899504508656926, 'eval_runtime': 8.049, 'eval_samples_per_second': 528.763, 'eval_steps_per_second': 8.324, 'epoch': 1.0}


 20%|██        | 500/2495 [03:11<1:58:24,  3.56s/it]

{'loss': 7.3863, 'grad_norm': 77.19256591796875, 'learning_rate': 1.5991983967935874e-05, 'epoch': 1.0}


 40%|████      | 998/2495 [06:12<08:01,  3.11it/s]  

[ 9.  1.  7. ... 10. 10.  1.]


                                                  
 40%|████      | 998/2495 [06:21<08:01,  3.11it/s]Checkpoint destination directory comparison/bert-compare-with-usefulCount\checkpoint-998 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.46212100982666, 'eval_mse': 3.4621212482452393, 'eval_accuracy-approximation': 0.3717105263157895, 'eval_f1-approximation': 0.24210922837658386, 'eval_runtime': 9.1672, 'eval_samples_per_second': 464.265, 'eval_steps_per_second': 7.309, 'epoch': 2.0}


 40%|████      | 1000/2495 [06:25<1:12:21,  2.90s/it]

{'loss': 3.0433, 'grad_norm': 24.15764045715332, 'learning_rate': 1.1983967935871745e-05, 'epoch': 2.0}


                                                     
 60%|██████    | 1497/2495 [09:43<05:18,  3.13it/s]Checkpoint destination directory comparison/bert-compare-with-usefulCount\checkpoint-1497 already exists and is non-empty. Saving will proceed but saved results may be invalid.


[ 9.  1.  7. ... 10. 10.  1.]
{'eval_loss': 3.35109543800354, 'eval_mse': 3.351095676422119, 'eval_accuracy-approximation': 0.3881578947368421, 'eval_f1-approximation': 0.27139033289153447, 'eval_runtime': 9.1061, 'eval_samples_per_second': 467.378, 'eval_steps_per_second': 7.358, 'epoch': 3.0}


 60%|██████    | 1500/2495 [09:47<34:27,  2.08s/it]  

{'loss': 2.1751, 'grad_norm': 33.4163818359375, 'learning_rate': 7.975951903807616e-06, 'epoch': 3.01}


 80%|████████  | 1996/2495 [12:55<02:39,  3.13it/s]

[ 9.  1.  7. ... 10. 10.  1.]


                                                   
 80%|████████  | 1996/2495 [13:05<02:39,  3.13it/s]Checkpoint destination directory comparison/bert-compare-with-usefulCount\checkpoint-1996 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.218914270401001, 'eval_mse': 3.218914270401001, 'eval_accuracy-approximation': 0.3881578947368421, 'eval_f1-approximation': 0.27804047500657275, 'eval_runtime': 9.1158, 'eval_samples_per_second': 466.884, 'eval_steps_per_second': 7.35, 'epoch': 4.0}


 80%|████████  | 2000/2495 [13:09<13:46,  1.67s/it]

{'loss': 1.6707, 'grad_norm': 28.911714553833008, 'learning_rate': 3.967935871743487e-06, 'epoch': 4.01}


100%|██████████| 2495/2495 [16:19<00:00,  3.13it/s]

[ 9.  1.  7. ... 10. 10.  1.]


                                                   
100%|██████████| 2495/2495 [16:29<00:00,  3.13it/s]Checkpoint destination directory comparison/bert-compare-with-usefulCount\checkpoint-2495 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.2742223739624023, 'eval_mse': 3.2742223739624023, 'eval_accuracy-approximation': 0.4043703007518797, 'eval_f1-approximation': 0.2772368650866167, 'eval_runtime': 9.1659, 'eval_samples_per_second': 464.331, 'eval_steps_per_second': 7.31, 'epoch': 5.0}


100%|██████████| 2495/2495 [16:32<00:00,  2.51it/s]

{'train_runtime': 992.5743, 'train_samples_per_second': 160.693, 'train_steps_per_second': 2.514, 'train_loss': 3.135062532291145, 'epoch': 5.0}





TrainOutput(global_step=2495, training_loss=3.135062532291145, metrics={'train_runtime': 992.5743, 'train_samples_per_second': 160.693, 'train_steps_per_second': 2.514, 'train_loss': 3.135062532291145, 'epoch': 5.0})

In [25]:
trainer.evaluate()

100%|██████████| 67/67 [00:09<00:00,  6.83it/s]

[ 9.  1.  7. ... 10. 10.  1.]





{'eval_loss': 3.218914270401001,
 'eval_mse': 3.218914270401001,
 'eval_accuracy-approximation': 0.3881578947368421,
 'eval_f1-approximation': 0.27804047500657275,
 'eval_runtime': 10.0718,
 'eval_samples_per_second': 422.564,
 'eval_steps_per_second': 6.652,
 'epoch': 5.0}

In [26]:
trainer.save_model("./bert-regression")

In [None]:
# model = AutoModelForSequenceClassification.from_pretrained("./bert-regression/")

# trainer = Trainer(
#     model=model,
#     args=args,
#     train_dataset=encoded_dataset["train"],
#     eval_dataset=encoded_dataset["validation"],
#     tokenizer=tokenizer,
#     compute_metrics=compute_metrics
# )

In [27]:
trainer.evaluate(eval_dataset=encoded_dataset["test"])

100%|██████████| 100/100 [00:14<00:00,  6.71it/s]

[ 9. 10. 10. ...  9.  5.  2.]





{'eval_loss': 3.3146562576293945,
 'eval_mse': 3.3146562576293945,
 'eval_accuracy-approximation': 0.37875939849624063,
 'eval_f1-approximation': 0.27207007061246635,
 'eval_runtime': 15.0594,
 'eval_samples_per_second': 423.921,
 'eval_steps_per_second': 6.64,
 'epoch': 5.0}

### Predicting with just the reviews

In [28]:
encoded_dataset2 = ds.map(preprocess, batched=True, remove_columns=ds["train"].column_names)

Map: 100%|██████████| 31900/31900 [00:03<00:00, 9368.96 examples/s] 
Map: 100%|██████████| 4256/4256 [00:00<00:00, 9374.40 examples/s] 
Map: 100%|██████████| 6384/6384 [00:00<00:00, 9990.57 examples/s] 


In [29]:
encoded_dataset2.set_format("torch")

In [30]:
model2 = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=1)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [31]:
args2 = TrainingArguments(
    output_dir="comparison/bert-compare-no-usefulCount", #for local
    # output_dir = "DrugReviewAnalysis/Model 3/comparison/bert-no-usefulCount", # for colab
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
)

In [32]:
trainer2 = Trainer(
    model=model2,
    args=args2,
    train_dataset=encoded_dataset2["train"],
    eval_dataset=encoded_dataset2["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [33]:
trainer2.train()

 20%|██        | 499/2495 [03:08<10:23,  3.20it/s]

[ 9.  1.  7. ... 10. 10.  1.]



 20%|██        | 499/2495 [03:17<10:23,  3.20it/s]Checkpoint destination directory comparison/bert-compare-no-usefulCount\checkpoint-499 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.7776994705200195, 'eval_mse': 3.7776992321014404, 'eval_accuracy-approximation': 0.29652255639097747, 'eval_f1-approximation': 0.22485919911791336, 'eval_runtime': 8.9183, 'eval_samples_per_second': 477.22, 'eval_steps_per_second': 7.513, 'epoch': 1.0}


 20%|██        | 500/2495 [03:20<2:13:15,  4.01s/it]

{'loss': 7.8737, 'grad_norm': 59.767459869384766, 'learning_rate': 1.5991983967935874e-05, 'epoch': 1.0}


 40%|████      | 998/2495 [06:25<07:48,  3.20it/s]  

[ 9.  1.  7. ... 10. 10.  1.]



 40%|████      | 998/2495 [06:35<07:48,  3.20it/s]Checkpoint destination directory comparison/bert-compare-no-usefulCount\checkpoint-998 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.550009250640869, 'eval_mse': 3.550009250640869, 'eval_accuracy-approximation': 0.3693609022556391, 'eval_f1-approximation': 0.24822161151121427, 'eval_runtime': 8.9696, 'eval_samples_per_second': 474.493, 'eval_steps_per_second': 7.47, 'epoch': 2.0}


 40%|████      | 1000/2495 [06:44<1:38:41,  3.96s/it]

{'loss': 3.1541, 'grad_norm': 22.32584571838379, 'learning_rate': 1.1983967935871745e-05, 'epoch': 2.0}


 60%|██████    | 1497/2495 [09:49<05:09,  3.22it/s]  

[ 9.  1.  7. ... 10. 10.  1.]



 60%|██████    | 1497/2495 [09:58<05:09,  3.22it/s]Checkpoint destination directory comparison/bert-compare-no-usefulCount\checkpoint-1497 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.3958699703216553, 'eval_mse': 3.3958699703216553, 'eval_accuracy-approximation': 0.39027255639097747, 'eval_f1-approximation': 0.2646864659011867, 'eval_runtime': 8.9097, 'eval_samples_per_second': 477.682, 'eval_steps_per_second': 7.52, 'epoch': 3.0}


 60%|██████    | 1500/2495 [10:02<36:13,  2.18s/it]  

{'loss': 2.2421, 'grad_norm': 25.235130310058594, 'learning_rate': 7.975951903807616e-06, 'epoch': 3.01}


 80%|████████  | 1996/2495 [13:07<02:35,  3.21it/s]

[ 9.  1.  7. ... 10. 10.  1.]



 80%|████████  | 1996/2495 [13:16<02:35,  3.21it/s]Checkpoint destination directory comparison/bert-compare-no-usefulCount\checkpoint-1996 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.255185842514038, 'eval_mse': 3.255185842514038, 'eval_accuracy-approximation': 0.3832236842105263, 'eval_f1-approximation': 0.26817406328422794, 'eval_runtime': 8.9395, 'eval_samples_per_second': 476.089, 'eval_steps_per_second': 7.495, 'epoch': 4.0}


 80%|████████  | 2000/2495 [13:20<13:18,  1.61s/it]

{'loss': 1.726, 'grad_norm': 40.28692626953125, 'learning_rate': 3.967935871743487e-06, 'epoch': 4.01}


100%|██████████| 2495/2495 [16:24<00:00,  3.22it/s]

[ 9.  1.  7. ... 10. 10.  1.]



100%|██████████| 2495/2495 [16:33<00:00,  3.22it/s]Checkpoint destination directory comparison/bert-compare-no-usefulCount\checkpoint-2495 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 3.307123899459839, 'eval_mse': 3.307123899459839, 'eval_accuracy-approximation': 0.4041353383458647, 'eval_f1-approximation': 0.27595722680479773, 'eval_runtime': 8.8977, 'eval_samples_per_second': 478.325, 'eval_steps_per_second': 7.53, 'epoch': 5.0}


100%|██████████| 2495/2495 [16:36<00:00,  2.50it/s]

{'train_runtime': 996.7492, 'train_samples_per_second': 160.02, 'train_steps_per_second': 2.503, 'train_loss': 3.291582848313815, 'epoch': 5.0}





TrainOutput(global_step=2495, training_loss=3.291582848313815, metrics={'train_runtime': 996.7492, 'train_samples_per_second': 160.02, 'train_steps_per_second': 2.503, 'train_loss': 3.291582848313815, 'epoch': 5.0})

In [34]:
trainer2.evaluate()

100%|██████████| 67/67 [00:09<00:00,  6.79it/s]

[ 9.  1.  7. ... 10. 10.  1.]





{'eval_loss': 3.255185842514038,
 'eval_mse': 3.255185842514038,
 'eval_accuracy-approximation': 0.3832236842105263,
 'eval_f1-approximation': 0.26817406328422794,
 'eval_runtime': 10.0088,
 'eval_samples_per_second': 425.226,
 'eval_steps_per_second': 6.694,
 'epoch': 5.0}

In [35]:
trainer2.evaluate(eval_dataset=encoded_dataset2["test"])

100%|██████████| 100/100 [00:14<00:00,  7.09it/s]

[ 9. 10. 10. ...  9.  5.  2.]





{'eval_loss': 3.425740957260132,
 'eval_mse': 3.425740957260132,
 'eval_accuracy-approximation': 0.37202380952380953,
 'eval_f1-approximation': 0.2603080149088942,
 'eval_runtime': 14.2575,
 'eval_samples_per_second': 447.766,
 'eval_steps_per_second': 7.014,
 'epoch': 5.0}

### Predicting with old preprocessing method

In [20]:
encoded_dataset3 = ds_prev.map(preprocess, batched=True, remove_columns=ds_prev["train"].column_names)

Map: 100%|██████████| 31900/31900 [00:02<00:00, 11063.81 examples/s]
Map: 100%|██████████| 4256/4256 [00:00<00:00, 12229.82 examples/s]
Map: 100%|██████████| 6384/6384 [00:00<00:00, 12206.43 examples/s]


In [21]:
encoded_dataset3.set_format("torch")

In [22]:
model3 = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=1)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
args3 = TrainingArguments(
    output_dir="comparison/bert-regression-old-preprocessing", # for local
    # output_dir = "DrugReviewAnalysis/Model 3/comparison/bert-regression-old-preprocessing", # for colab
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
)

In [25]:
trainer3 = Trainer(
    model=model3,
    args=args3,
    train_dataset=encoded_dataset3["train"],
    eval_dataset=encoded_dataset3["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [26]:
trainer3.train()

 20%|██        | 499/2495 [02:47<09:21,  3.55it/s]

[ 9.  1.  7. ... 10. 10.  1.]


                                                  
 20%|██        | 499/2495 [02:56<09:21,  3.55it/s]

{'eval_loss': 5.171387672424316, 'eval_mse': 5.171388149261475, 'eval_accuracy-approximation': 0.18186090225563908, 'eval_f1-approximation': 0.14509035140217943, 'eval_runtime': 8.0538, 'eval_samples_per_second': 528.445, 'eval_steps_per_second': 8.319, 'epoch': 1.0}


 20%|██        | 500/2495 [02:59<1:59:19,  3.59s/it]

{'loss': 9.2498, 'grad_norm': 66.50765991210938, 'learning_rate': 1.5991983967935874e-05, 'epoch': 1.0}


 40%|████      | 998/2495 [05:44<07:01,  3.55it/s]  

[ 9.  1.  7. ... 10. 10.  1.]


                                                  
 40%|████      | 998/2495 [05:53<07:01,  3.55it/s]

{'eval_loss': 4.687226295471191, 'eval_mse': 4.687226295471191, 'eval_accuracy-approximation': 0.2793703007518797, 'eval_f1-approximation': 0.1988889931706918, 'eval_runtime': 8.0563, 'eval_samples_per_second': 528.28, 'eval_steps_per_second': 8.316, 'epoch': 2.0}


 40%|████      | 1000/2495 [05:56<1:05:28,  2.63s/it]

{'loss': 4.5881, 'grad_norm': 62.662906646728516, 'learning_rate': 1.1983967935871745e-05, 'epoch': 2.0}


 60%|██████    | 1497/2495 [08:42<04:41,  3.55it/s]  

[ 9.  1.  7. ... 10. 10.  1.]


                                                   
 60%|██████    | 1497/2495 [08:50<04:41,  3.55it/s]

{'eval_loss': 4.7231340408325195, 'eval_mse': 4.723134517669678, 'eval_accuracy-approximation': 0.32377819548872183, 'eval_f1-approximation': 0.22223727353343134, 'eval_runtime': 8.0507, 'eval_samples_per_second': 528.647, 'eval_steps_per_second': 8.322, 'epoch': 3.0}


 60%|██████    | 1500/2495 [08:53<31:38,  1.91s/it]

{'loss': 3.3897, 'grad_norm': 66.24119567871094, 'learning_rate': 7.975951903807616e-06, 'epoch': 3.01}


 80%|████████  | 1996/2495 [11:39<02:20,  3.56it/s]

[ 9.  1.  7. ... 10. 10.  1.]


                                                   
 80%|████████  | 1996/2495 [11:47<02:20,  3.56it/s]

{'eval_loss': 4.640283584594727, 'eval_mse': 4.640283584594727, 'eval_accuracy-approximation': 0.3230733082706767, 'eval_f1-approximation': 0.2297983605231786, 'eval_runtime': 8.052, 'eval_samples_per_second': 528.566, 'eval_steps_per_second': 8.321, 'epoch': 4.0}


 80%|████████  | 2000/2495 [11:51<11:47,  1.43s/it]

{'loss': 2.6128, 'grad_norm': 66.18108367919922, 'learning_rate': 3.967935871743487e-06, 'epoch': 4.01}


100%|██████████| 2495/2495 [14:35<00:00,  3.55it/s]

[ 9.  1.  7. ... 10. 10.  1.]


                                                   
100%|██████████| 2495/2495 [14:44<00:00,  3.55it/s]

{'eval_loss': 4.722833156585693, 'eval_mse': 4.722833633422852, 'eval_accuracy-approximation': 0.3369360902255639, 'eval_f1-approximation': 0.23200962959331348, 'eval_runtime': 8.043, 'eval_samples_per_second': 529.159, 'eval_steps_per_second': 8.33, 'epoch': 5.0}


100%|██████████| 2495/2495 [14:47<00:00,  2.81it/s]

{'train_runtime': 887.4876, 'train_samples_per_second': 179.721, 'train_steps_per_second': 2.811, 'train_loss': 4.410465021840556, 'epoch': 5.0}





TrainOutput(global_step=2495, training_loss=4.410465021840556, metrics={'train_runtime': 887.4876, 'train_samples_per_second': 179.721, 'train_steps_per_second': 2.811, 'train_loss': 4.410465021840556, 'epoch': 5.0})

In [27]:
trainer3.evaluate()

100%|██████████| 67/67 [00:07<00:00,  8.53it/s]

[ 9.  1.  7. ... 10. 10.  1.]





{'eval_loss': 4.640283584594727,
 'eval_mse': 4.640283584594727,
 'eval_accuracy-approximation': 0.3230733082706767,
 'eval_f1-approximation': 0.2297983605231786,
 'eval_runtime': 8.112,
 'eval_samples_per_second': 524.652,
 'eval_steps_per_second': 8.259,
 'epoch': 5.0}

In [28]:
trainer3.evaluate(eval_dataset=encoded_dataset3["test"])

100%|██████████| 100/100 [00:11<00:00,  8.42it/s]

[ 9. 10. 10. ...  9.  5.  2.]





{'eval_loss': 4.751387596130371,
 'eval_mse': 4.751387596130371,
 'eval_accuracy-approximation': 0.32879072681704263,
 'eval_f1-approximation': 0.2348171766226938,
 'eval_runtime': 11.9718,
 'eval_samples_per_second': 533.253,
 'eval_steps_per_second': 8.353,
 'epoch': 5.0}