In [1]:
! pip install -U -q transformers
! pip install -q datasets sentence-transformers evaluate
! pip install -q accelerate -U

In [2]:
import numpy as np
import pandas as pd
import time
import re
from tqdm import notebook
import json

import seaborn as sns
import matplotlib.pyplot as plt

import os
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from transformers import AutoModel
from transformers import AutoConfig
from transformers import get_cosine_schedule_with_warmup

from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.metrics import r2_score

from datasets import Dataset
import evaluate

from sentence_transformers import SentenceTransformer, models, util

import gc
import json

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback, TextClassificationPipeline



In [3]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

In [5]:
SEED = 97
seed_everything(SEED)

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device: ', device.type)

Device:  cuda


In [7]:
def get_rt_dfs():
    dataset = load_dataset('rotten_tomatoes')
    train_df = dataset['train'].to_pandas().dropna()[['text', 'label']]
    val_df = dataset['validation'].to_pandas().dropna()[['text', 'label']]
    test_df = dataset['test'].to_pandas().dropna()[['text', 'label']]
    
    train_df.columns = ['sentence', 'label']
    val_df.columns = ['sentence', 'label']
    test_df.columns = ['sentence', 'label']

    return train_df, val_df, test_df

In [8]:
def get_fin_sent_analysis_dfs():
    df = pd.read_csv('/kaggle/input/financial-sentiment-analysis/data.csv')
    df = df[df.Sentiment != 'neutral'].reset_index(drop=True)
    df.columns = ['sentence', 'label']
    df['label'] = df['label'].apply(lambda x: 0 if x=='negative' else 1)
    
    return df

In [9]:
def get_sem_eval_2017_task4_dfs():

    dataset = load_dataset("maxmoynan/SemEval2017-Task4aEnglish", split='development')
    print(dataset)
#     train_df = dataset['train'].to_pandas().dropna()[['tweet', 'sentiment']]
#     val_df = dataset['development'].to_pandas().dropna()[['tweet', 'sentiment']]
    test_df = dataset.to_pandas().dropna()[['tweet', 'sentiment']]
    
#     train_df.columns = ['sentence', 'label']
#     val_df.columns = ['sentence', 'label']
    test_df.columns = ['sentence', 'label']
    
#     train_df = train_df[train_df.label != 1].reset_index(drop=True)
#     val_df = val_df[val_df.label != 1].reset_index(drop=True)
    test_df = test_df[test_df.label != 1].reset_index(drop=True)
    
#     train_df['label'] = train_df[train_df.label == 2].apply(lambda x: 1)
#     val_df['label'] = val_df[val_df.label == 2].apply(lambda x: 1)
    test_df['label'] = test_df[test_df.label == 2].apply(lambda x: 1)

    return test_df

In [10]:
def get_sst2_dataset_dfs():
    dataset = load_dataset('glue', 'sst2')
    train_df = dataset['train'].to_pandas().dropna()[['sentence', 'label']]
    test_df = dataset['validation'].to_pandas().dropna()[['sentence', 'label']]

    return train_df, test_df

In [11]:
def get_imdb_dataset_dfs():
    dataset = load_dataset('imdb')
    train_df = dataset['train'].to_pandas().dropna()
    train_df.columns = ['sentence', 'label']

    test_df = dataset['test'].to_pandas().dropna()
    test_df.columns = ['sentence', 'label']

    return train_df, test_df

In [12]:
def get_reddit_dfs():
    df = pd.read_csv('/kaggle/input/twitter-and-reddit-sentimental-analysis-dataset/Reddit_Data.csv')
    df = df[df.category != 0].reset_index(drop=True)
    df.columns = ['sentence', 'label']
    df['label'] = df['label'].apply(lambda x: 0 if x==-1 else 1)
    
    return df

In [13]:
def get_twitter_rd_dfs():
    df = pd.read_csv('/kaggle/input/twitter-and-reddit-sentimental-analysis-dataset/Twitter_Data.csv').dropna()
    df = df[df.category != 0].sample(frac=0.5).reset_index(drop=True)
    df.columns = ['sentence', 'label']
    df['label'] = df['label'].apply(lambda x: 0 if x==-1 else 1)
    
    return df

In [14]:
def get_kindle_reviews_dfs():
    df = pd.read_csv('/kaggle/input/amazon-kindle-book-review-for-sentiment-analysis/preprocessed_kindle_review .csv')
    df = df[['reviewText', 'rating']].reset_index(drop=True)
    df.columns = ['sentence', 'label']
    df['label'] = df['label'].apply(lambda x: 0 if x<3 else 1)
    return df

In [15]:
def get_tweet_eval_dfs():
    dataset = load_dataset('tweet_eval', 'sentiment')
    train_df = dataset['train'].to_pandas().dropna()
    val_df = dataset['validation'].to_pandas().dropna()
    test_df = dataset['test'].to_pandas().dropna()

    train_df.columns = ['sentence', 'label']
    val_df.columns = ['sentence', 'label']
    test_df.columns = ['sentence', 'label']

    train_df = train_df[train_df.label != 2].reset_index(drop=True)
    val_df = val_df[val_df.label != 2].reset_index(drop=True)
    test_df = test_df[test_df.label != 2].reset_index(drop=True)

    return train_df, val_df, test_df

In [16]:
def get_yelp_polarity_dfs():
    dataset = load_dataset('yelp_polarity')
    test_df = dataset['test'].to_pandas().dropna()
    test_df.columns = ['sentence', 'label']

    return test_df

In [17]:
def get_amazon_polarity_dfs():
    dataset = load_dataset('amazon_polarity')
    test_df = dataset['test'].to_pandas().dropna()

    test_df = test_df[['content', 'label']].reset_index(drop=True)

    test_df.columns = ['sentence', 'label']

    return test_df

In [18]:
test_dfs = {}

In [19]:
train_df, val_df, test_df = get_rt_dfs()
test_dfs['rottent_tomatoes'] = test_df

Downloading builder script:   0%|          | 0.00/1.89k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/921 [00:00<?, ?B/s]

Downloading and preparing dataset rotten_tomatoes_movie_review/default (download: 476.34 KiB, generated: 1.28 MiB, post-processed: Unknown size, total: 1.75 MiB) to /root/.cache/huggingface/datasets/rotten_tomatoes_movie_review/default/1.0.0/40d411e45a6ce3484deed7cc15b82a53dad9a72aafd9f86f8f227134bec5ca46...


Downloading data:   0%|          | 0.00/488k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Dataset rotten_tomatoes_movie_review downloaded and prepared to /root/.cache/huggingface/datasets/rotten_tomatoes_movie_review/default/1.0.0/40d411e45a6ce3484deed7cc15b82a53dad9a72aafd9f86f8f227134bec5ca46. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [20]:
# test_df = get_amazon_polarity_dfs()
# test_dfs['amazon'] = test_df

test_df = get_yelp_polarity_dfs()
test_dfs['yelp'] = test_df

Downloading builder script:   0%|          | 0.00/2.39k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading and preparing dataset yelp_polarity/plain_text (download: 158.67 MiB, generated: 421.07 MiB, post-processed: Unknown size, total: 579.73 MiB) to /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/14f90415c754f47cf9087eadac25823a395fef4400c7903c5897f55cfaaa6f61...


Downloading data:   0%|          | 0.00/166M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/560000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/38000 [00:00<?, ? examples/s]

Dataset yelp_polarity downloaded and prepared to /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/14f90415c754f47cf9087eadac25823a395fef4400c7903c5897f55cfaaa6f61. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
train_df, val_df, test_df = get_tweet_eval_dfs()
test_dfs['tweet_eval'] = test_df

test_df = get_kindle_reviews_dfs()
test_dfs['kindle'] = test_df

test_df = get_twitter_rd_dfs()
test_dfs['twitter_rd'] = test_df

Downloading builder script:   0%|          | 0.00/2.37k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Downloading and preparing dataset tweet_eval/sentiment (download: 6.17 MiB, generated: 6.62 MiB, post-processed: Unknown size, total: 12.79 MiB) to /root/.cache/huggingface/datasets/tweet_eval/sentiment/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343...


Downloading data files:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.24M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/12.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/527k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.53k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/99.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/629 [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/6 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/45615 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/12284 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset tweet_eval downloaded and prepared to /root/.cache/huggingface/datasets/tweet_eval/sentiment/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [22]:
test_df = get_reddit_dfs()
test_dfs['reddit'] = test_df

train_df, test_df = get_imdb_dataset_dfs()
test_dfs['imdb'] = test_df

train_df, test_df = get_sst2_dataset_dfs()
test_dfs['sst2'] = test_df

Downloading builder script:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading and preparing dataset imdb/plain_text (download: 80.23 MiB, generated: 127.02 MiB, post-processed: Unknown size, total: 207.25 MiB) to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...


Downloading data:   0%|          | 0.00/84.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Dataset imdb downloaded and prepared to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

Downloading builder script:   0%|          | 0.00/7.78k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/4.47k [00:00<?, ?B/s]

Downloading and preparing dataset glue/sst2 (download: 7.09 MiB, generated: 4.81 MiB, post-processed: Unknown size, total: 11.90 MiB) to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


Downloading data:   0%|          | 0.00/7.44M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [23]:
# test_df = get_sem_eval_2017_task4_dfs()
# test_dfs['sem_eval_2017'] = test_df

test_df = get_fin_sent_analysis_dfs()
test_dfs['fin_sent'] = test_df

In [24]:
DATASET_NAME = "sst2"

In [25]:
MODEL_NAME = "bert-base-uncased"
MAX_LEN = 128
# TRAIN_BATCH_SIZE = 32
EVAL_BATCH_SIZE = 128
# SAVE_MODEL_PATH = "/content/drive/MyDrive/Representative Subset/Models"
# SAVE_LOGS_PATH = "/content/drive/MyDrive/Representative Subset/Logs"
# LEARNING_RATE = 5e-5
# EPOCHS = 3

In [26]:
SAVED_METRICS_PATH = f"/kaggle/input/sst2-models-bert/SST2-Models-BERT/run-metrics.json"
SAVED_MODELS_PATH = f"/kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models"

In [27]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [28]:
# Funtion to tokenize data
def tokenize_dataset(data):
    return tokenizer(data["sentence"],
                     max_length=MAX_LEN,
                     truncation=True,
                     padding="max_length"
                     )

In [29]:
def get_hf_dataset(df):
    data = Dataset.from_pandas(df)
    dataset = data.map(tokenize_dataset)
    return dataset

In [30]:
# Function to compute the metric
def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [31]:
with open(SAVED_METRICS_PATH, 'r') as f:
    data = json.load(f)

model_sizes = data['variance_metrics']['subset_sizes']
model_sizes

[5387, 10774, 16161, 21548, 26935, 32322, 37709, 43096, 48483, 53870]

In [32]:
temp_training_args = TrainingArguments(
    output_dir='./results',
    per_device_eval_batch_size=EVAL_BATCH_SIZE,
    report_to='none'
)

In [33]:
def save_metrics(filePath, evaluation_metrics, model_sizes):
    metrics = {'evaluation_metrics': evaluation_metrics, 'model_sizes': model_sizes}
    with open(filePath, "w") as outfile:
        json.dump(metrics, outfile)

In [34]:
for key, test_df in test_dfs.items():
    if key != DATASET_NAME:
        TEST_DATASET_NAME = key
        print(f"Testing for dataset: {TEST_DATASET_NAME} with {len(test_df)} samples.")
        test_dataset = get_hf_dataset(test_df)
        evaluation_metrics = []
        for size in model_sizes:
            print(f"Loading model from: {SAVED_MODELS_PATH}/model-{size}")
            model = AutoModelForSequenceClassification.from_pretrained(f"{SAVED_MODELS_PATH}/model-{size}").to(device)
            trainer = Trainer(
                model=model,                         # the instantiated 🤗 Transformers model to be trained
                args=temp_training_args,                  # training arguments, defined above
                compute_metrics=compute_metrics     # the callback that computes metrics of interest
            )

            # Evaluate the model
            eval_result = trainer.evaluate(eval_dataset=test_dataset)
            print(eval_result)
            evaluation_metrics.append(eval_result)

            del model
            gc.collect()
            
        save_metrics(f"/kaggle/working/eval-metrics-{TEST_DATASET_NAME}.json", evaluation_metrics, model_sizes)

Testing for dataset: rottent_tomatoes with 1066 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/1066 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

{'eval_loss': 0.37482669949531555, 'eval_accuracy': 0.8424015009380863, 'eval_runtime': 7.3482, 'eval_samples_per_second': 145.07, 'eval_steps_per_second': 1.225}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.3914858400821686, 'eval_accuracy': 0.848968105065666, 'eval_runtime': 5.1046, 'eval_samples_per_second': 208.833, 'eval_steps_per_second': 1.763}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 0.40578508377075195, 'eval_accuracy': 0.8696060037523452, 'eval_runtime': 5.1175, 'eval_samples_per_second': 208.303, 'eval_steps_per_second': 1.759}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 0.3995240032672882, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 5.1184, 'eval_samples_per_second': 208.266, 'eval_steps_per_second': 1.758}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.37753501534461975, 'eval_accuracy': 0.8780487804878049, 'eval_runtime': 5.1723, 'eval_samples_per_second': 206.098, 'eval_steps_per_second': 1.74}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.33704182505607605, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 5.1925, 'eval_samples_per_second': 205.296, 'eval_steps_per_second': 1.733}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 0.3550611436367035, 'eval_accuracy': 0.8883677298311444, 'eval_runtime': 5.2705, 'eval_samples_per_second': 202.257, 'eval_steps_per_second': 1.708}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 0.3563624620437622, 'eval_accuracy': 0.8836772983114447, 'eval_runtime': 5.1757, 'eval_samples_per_second': 205.963, 'eval_steps_per_second': 1.739}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 0.42639783024787903, 'eval_accuracy': 0.8986866791744841, 'eval_runtime': 5.1841, 'eval_samples_per_second': 205.627, 'eval_steps_per_second': 1.736}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 0.46985191106796265, 'eval_accuracy': 0.8893058161350844, 'eval_runtime': 5.1911, 'eval_samples_per_second': 205.351, 'eval_steps_per_second': 1.734}
Testing for dataset: yelp with 38000 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/38000 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.34819650650024414, 'eval_accuracy': 0.839, 'eval_runtime': 165.1581, 'eval_samples_per_second': 230.083, 'eval_steps_per_second': 1.798}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.27687373757362366, 'eval_accuracy': 0.8837894736842106, 'eval_runtime': 165.4571, 'eval_samples_per_second': 229.667, 'eval_steps_per_second': 1.795}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 0.320365846157074, 'eval_accuracy': 0.8729736842105263, 'eval_runtime': 165.2909, 'eval_samples_per_second': 229.898, 'eval_steps_per_second': 1.797}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 0.3201858699321747, 'eval_accuracy': 0.8746578947368421, 'eval_runtime': 165.2222, 'eval_samples_per_second': 229.993, 'eval_steps_per_second': 1.798}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.29109272360801697, 'eval_accuracy': 0.8827631578947368, 'eval_runtime': 165.3493, 'eval_samples_per_second': 229.817, 'eval_steps_per_second': 1.796}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.3336627185344696, 'eval_accuracy': 0.8620263157894736, 'eval_runtime': 165.9987, 'eval_samples_per_second': 228.917, 'eval_steps_per_second': 1.789}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 0.34826183319091797, 'eval_accuracy': 0.8569736842105263, 'eval_runtime': 165.4287, 'eval_samples_per_second': 229.706, 'eval_steps_per_second': 1.795}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 0.297839879989624, 'eval_accuracy': 0.8775, 'eval_runtime': 165.4943, 'eval_samples_per_second': 229.615, 'eval_steps_per_second': 1.795}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 0.34296345710754395, 'eval_accuracy': 0.8806315789473684, 'eval_runtime': 165.7715, 'eval_samples_per_second': 229.231, 'eval_steps_per_second': 1.792}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 0.37782591581344604, 'eval_accuracy': 0.8708684210526316, 'eval_runtime': 165.2068, 'eval_samples_per_second': 230.015, 'eval_steps_per_second': 1.798}
Testing for dataset: tweet_eval with 9909 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/9909 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.8560258746147156, 'eval_accuracy': 0.5178120900191745, 'eval_runtime': 43.4282, 'eval_samples_per_second': 228.17, 'eval_steps_per_second': 1.796}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.8587812185287476, 'eval_accuracy': 0.6013724896558684, 'eval_runtime': 43.4671, 'eval_samples_per_second': 227.966, 'eval_steps_per_second': 1.794}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 1.0205217599868774, 'eval_accuracy': 0.5640326975476839, 'eval_runtime': 43.7284, 'eval_samples_per_second': 226.603, 'eval_steps_per_second': 1.784}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 1.2258718013763428, 'eval_accuracy': 0.5467756584922797, 'eval_runtime': 43.6264, 'eval_samples_per_second': 227.133, 'eval_steps_per_second': 1.788}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.8993260264396667, 'eval_accuracy': 0.5974366737309517, 'eval_runtime': 43.3736, 'eval_samples_per_second': 228.457, 'eval_steps_per_second': 1.798}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.9741409420967102, 'eval_accuracy': 0.5376930063578564, 'eval_runtime': 43.5028, 'eval_samples_per_second': 227.778, 'eval_steps_per_second': 1.793}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 1.1830997467041016, 'eval_accuracy': 0.4980320920375416, 'eval_runtime': 43.5657, 'eval_samples_per_second': 227.449, 'eval_steps_per_second': 1.79}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 1.0208396911621094, 'eval_accuracy': 0.5453628014935917, 'eval_runtime': 43.4771, 'eval_samples_per_second': 227.913, 'eval_steps_per_second': 1.794}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 1.6781487464904785, 'eval_accuracy': 0.503078009889999, 'eval_runtime': 43.346, 'eval_samples_per_second': 228.602, 'eval_steps_per_second': 1.799}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 1.5774513483047485, 'eval_accuracy': 0.5429407609244121, 'eval_runtime': 43.396, 'eval_samples_per_second': 228.339, 'eval_steps_per_second': 1.797}
Testing for dataset: kindle with 12000 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/12000 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.44711101055145264, 'eval_accuracy': 0.7811666666666667, 'eval_runtime': 52.3655, 'eval_samples_per_second': 229.159, 'eval_steps_per_second': 1.795}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.4592876434326172, 'eval_accuracy': 0.8093333333333333, 'eval_runtime': 52.4295, 'eval_samples_per_second': 228.879, 'eval_steps_per_second': 1.793}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 0.47016429901123047, 'eval_accuracy': 0.83, 'eval_runtime': 52.4408, 'eval_samples_per_second': 228.83, 'eval_steps_per_second': 1.792}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 0.4619008004665375, 'eval_accuracy': 0.8361666666666666, 'eval_runtime': 52.5911, 'eval_samples_per_second': 228.175, 'eval_steps_per_second': 1.787}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.4649370014667511, 'eval_accuracy': 0.8241666666666667, 'eval_runtime': 52.3769, 'eval_samples_per_second': 229.108, 'eval_steps_per_second': 1.795}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.47136813402175903, 'eval_accuracy': 0.81975, 'eval_runtime': 52.406, 'eval_samples_per_second': 228.981, 'eval_steps_per_second': 1.794}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 0.5288430452346802, 'eval_accuracy': 0.79675, 'eval_runtime': 52.4156, 'eval_samples_per_second': 228.94, 'eval_steps_per_second': 1.793}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 0.4218575060367584, 'eval_accuracy': 0.8374166666666667, 'eval_runtime': 52.4628, 'eval_samples_per_second': 228.733, 'eval_steps_per_second': 1.792}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 0.5921893119812012, 'eval_accuracy': 0.8280833333333333, 'eval_runtime': 52.4123, 'eval_samples_per_second': 228.954, 'eval_steps_per_second': 1.793}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 0.6845782995223999, 'eval_accuracy': 0.8075, 'eval_runtime': 52.1574, 'eval_samples_per_second': 230.073, 'eval_steps_per_second': 1.802}
Testing for dataset: twitter_rd with 53879 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/53879 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.9133320450782776, 'eval_accuracy': 0.5456300228289315, 'eval_runtime': 232.6341, 'eval_samples_per_second': 231.604, 'eval_steps_per_second': 1.81}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.871715247631073, 'eval_accuracy': 0.6200189313090443, 'eval_runtime': 232.093, 'eval_samples_per_second': 232.144, 'eval_steps_per_second': 1.814}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 0.9117639660835266, 'eval_accuracy': 0.6089385474860335, 'eval_runtime': 232.064, 'eval_samples_per_second': 232.173, 'eval_steps_per_second': 1.814}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 1.126426339149475, 'eval_accuracy': 0.574490989068097, 'eval_runtime': 232.2936, 'eval_samples_per_second': 231.944, 'eval_steps_per_second': 1.812}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.8653586506843567, 'eval_accuracy': 0.6130588912192134, 'eval_runtime': 232.5788, 'eval_samples_per_second': 231.659, 'eval_steps_per_second': 1.81}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.8528768420219421, 'eval_accuracy': 0.5926984539430947, 'eval_runtime': 232.4734, 'eval_samples_per_second': 231.764, 'eval_steps_per_second': 1.811}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 0.9979166984558105, 'eval_accuracy': 0.5679763915440152, 'eval_runtime': 232.5173, 'eval_samples_per_second': 231.72, 'eval_steps_per_second': 1.811}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 1.0132575035095215, 'eval_accuracy': 0.5691085580652945, 'eval_runtime': 232.1354, 'eval_samples_per_second': 232.102, 'eval_steps_per_second': 1.814}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 1.4684531688690186, 'eval_accuracy': 0.5589561795875945, 'eval_runtime': 231.9665, 'eval_samples_per_second': 232.271, 'eval_steps_per_second': 1.815}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 1.3788933753967285, 'eval_accuracy': 0.5738599454332857, 'eval_runtime': 232.0918, 'eval_samples_per_second': 232.145, 'eval_steps_per_second': 1.814}
Testing for dataset: reddit with 24107 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/24107 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.9570093154907227, 'eval_accuracy': 0.5243290330609367, 'eval_runtime': 104.2029, 'eval_samples_per_second': 231.347, 'eval_steps_per_second': 1.814}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.953726053237915, 'eval_accuracy': 0.5911975774671258, 'eval_runtime': 104.4716, 'eval_samples_per_second': 230.752, 'eval_steps_per_second': 1.809}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 1.012404441833496, 'eval_accuracy': 0.5814078898245323, 'eval_runtime': 104.0661, 'eval_samples_per_second': 231.651, 'eval_steps_per_second': 1.816}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 1.2105385065078735, 'eval_accuracy': 0.5664744679968474, 'eval_runtime': 104.3085, 'eval_samples_per_second': 231.113, 'eval_steps_per_second': 1.812}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.9769210815429688, 'eval_accuracy': 0.5862612519185298, 'eval_runtime': 104.5075, 'eval_samples_per_second': 230.672, 'eval_steps_per_second': 1.808}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.9520117044448853, 'eval_accuracy': 0.5691292985439914, 'eval_runtime': 104.3537, 'eval_samples_per_second': 231.012, 'eval_steps_per_second': 1.811}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 1.0877220630645752, 'eval_accuracy': 0.5525780893516407, 'eval_runtime': 104.4313, 'eval_samples_per_second': 230.841, 'eval_steps_per_second': 1.81}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 1.1092225313186646, 'eval_accuracy': 0.5482639897125317, 'eval_runtime': 104.4526, 'eval_samples_per_second': 230.794, 'eval_steps_per_second': 1.809}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 1.6245300769805908, 'eval_accuracy': 0.5401750528892023, 'eval_runtime': 104.3718, 'eval_samples_per_second': 230.972, 'eval_steps_per_second': 1.811}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 1.5489519834518433, 'eval_accuracy': 0.5511262288961712, 'eval_runtime': 104.4738, 'eval_samples_per_second': 230.747, 'eval_steps_per_second': 1.809}
Testing for dataset: imdb with 25000 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/25000 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.4317333698272705, 'eval_accuracy': 0.79568, 'eval_runtime': 108.1863, 'eval_samples_per_second': 231.083, 'eval_steps_per_second': 1.812}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.3952668607234955, 'eval_accuracy': 0.82452, 'eval_runtime': 108.2893, 'eval_samples_per_second': 230.863, 'eval_steps_per_second': 1.81}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 0.4484218657016754, 'eval_accuracy': 0.82716, 'eval_runtime': 108.1239, 'eval_samples_per_second': 231.216, 'eval_steps_per_second': 1.813}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 0.44155576825141907, 'eval_accuracy': 0.83672, 'eval_runtime': 108.0751, 'eval_samples_per_second': 231.321, 'eval_steps_per_second': 1.814}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.40754956007003784, 'eval_accuracy': 0.83892, 'eval_runtime': 108.192, 'eval_samples_per_second': 231.071, 'eval_steps_per_second': 1.812}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.4302038252353668, 'eval_accuracy': 0.8244, 'eval_runtime': 107.9671, 'eval_samples_per_second': 231.552, 'eval_steps_per_second': 1.815}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 0.44596067070961, 'eval_accuracy': 0.819, 'eval_runtime': 107.9745, 'eval_samples_per_second': 231.536, 'eval_steps_per_second': 1.815}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 0.41983094811439514, 'eval_accuracy': 0.831, 'eval_runtime': 108.0014, 'eval_samples_per_second': 231.479, 'eval_steps_per_second': 1.815}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 0.46783626079559326, 'eval_accuracy': 0.842, 'eval_runtime': 108.1101, 'eval_samples_per_second': 231.246, 'eval_steps_per_second': 1.813}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 0.5100569725036621, 'eval_accuracy': 0.83844, 'eval_runtime': 108.1946, 'eval_samples_per_second': 231.065, 'eval_steps_per_second': 1.812}
Testing for dataset: fin_sent with 2712 samples.


  if _pandas_api.is_sparse(col):


  0%|          | 0/2712 [00:00<?, ?ex/s]

Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-5387


{'eval_loss': 0.6500097513198853, 'eval_accuracy': 0.6688790560471977, 'eval_runtime': 12.2482, 'eval_samples_per_second': 221.42, 'eval_steps_per_second': 1.796}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-10774


{'eval_loss': 0.5726127624511719, 'eval_accuracy': 0.7433628318584071, 'eval_runtime': 12.2047, 'eval_samples_per_second': 222.209, 'eval_steps_per_second': 1.803}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-16161


{'eval_loss': 0.6176102161407471, 'eval_accuracy': 0.758480825958702, 'eval_runtime': 12.2263, 'eval_samples_per_second': 221.817, 'eval_steps_per_second': 1.799}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-21548


{'eval_loss': 0.7305505871772766, 'eval_accuracy': 0.726401179941003, 'eval_runtime': 12.2428, 'eval_samples_per_second': 221.517, 'eval_steps_per_second': 1.797}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-26935


{'eval_loss': 0.5881152749061584, 'eval_accuracy': 0.7433628318584071, 'eval_runtime': 12.3674, 'eval_samples_per_second': 219.286, 'eval_steps_per_second': 1.779}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-32322


{'eval_loss': 0.7479416131973267, 'eval_accuracy': 0.6242625368731564, 'eval_runtime': 12.2364, 'eval_samples_per_second': 221.633, 'eval_steps_per_second': 1.798}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-37709


{'eval_loss': 0.8969504833221436, 'eval_accuracy': 0.5792772861356932, 'eval_runtime': 12.234, 'eval_samples_per_second': 221.677, 'eval_steps_per_second': 1.798}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-43096


{'eval_loss': 0.6516368985176086, 'eval_accuracy': 0.6980088495575221, 'eval_runtime': 12.4315, 'eval_samples_per_second': 218.156, 'eval_steps_per_second': 1.77}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-48483


{'eval_loss': 0.9179368019104004, 'eval_accuracy': 0.6670353982300885, 'eval_runtime': 12.207, 'eval_samples_per_second': 222.167, 'eval_steps_per_second': 1.802}
Loading model from: /kaggle/input/sst2-models-bert/SST2-Models-BERT/Subset-Models/model-53870


{'eval_loss': 0.9019126296043396, 'eval_accuracy': 0.7035398230088495, 'eval_runtime': 12.3625, 'eval_samples_per_second': 219.373, 'eval_steps_per_second': 1.78}
