In [2]:
!pip install simpletransformers



In [3]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset, DataLoader
import torch
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import logging
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [4]:
model_args = ClassificationArgs(
    num_train_epochs=6,
    overwrite_output_dir=True,
    output_dir="/content/output/",
)

# Models We Will Use:
# Model 1: BERT-Tiny fine-tuned on on sms_spam dataset for spam detection
model1 = ClassificationModel(
    'bert',
    'prajjwal1/bert-tiny',
    args=model_args
)

# Model 2: RoBERTa based Spam Message Detection
model2 = ClassificationModel(
    'roberta',
    'roberta-base',
    args=model_args
)

# Model 3: A fine-tuned version of distilbert-base-uncased on the sms_spam dataset
model3 = ClassificationModel(
    'distilbert',
    'distilbert-base-uncased',
    args=model_args
)

data1 = pd.read_csv('spam_assassin.csv')
data2 = pd.read_csv('spam_ham_dataset.csv')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [5]:
data2 = data2.drop(columns=['label'])
data2 = data2.drop(columns=['Unnamed: 0'])
data2.head()


Unnamed: 0,text,label_num
0,Subject: enron methanol ; meter # : 988291\r\n...,0
1,"Subject: hpl nom for january 9 , 2001\r\n( see...",0
2,"Subject: neon retreat\r\nho ho ho , we ' re ar...",0
3,"Subject: photoshop , windows , office . cheap ...",1
4,Subject: re : indian springs\r\nthis deal is t...,0


In [6]:
all_models= [model1, model2, model3]
all_models_names= ["BERT-Tiny", "Roberta", "Distilbert"]
datasets = [data1, data2]
datasets_names = ["Spam_Assasin", "spam2_ds"]

In [7]:
names= []
results= []
wong_pred= []
eval_data_list= []
for idx_ds, data in enumerate(datasets):
    data.columns = ["text", "labels"]
    train_data, eval_data = train_test_split(data, test_size=0.3, random_state=42, stratify=data['labels'])
    eval_data_list.append(eval_data)
    for idx_md, model in enumerate(all_models):
        model.args.output_dir= f'/content/model_checkpoints/{all_models_names[idx_md]}_{datasets_names[idx_ds]}'

        logging.basicConfig(level=logging.INFO)
        transformers_logger = logging.getLogger("transformers")
        transformers_logger.setLevel(logging.WARNING)

        model.train_model(train_data)
        result, model_outputs, wrong_predictions = model.eval_model(eval_data)
        names.append(all_models_names[idx_md]+ " + " + datasets_names[idx_ds] + ": ")
        results.append("Evaluation Results: " +  str(result['accuracy']))
        wong_pred.append("Number of wrong predictions: " + str(len(wrong_predictions)))

for item in range(len(names)):
    print(names[item])
    print(results[item])
    print(wong_pred[item])
    print()

  self.pid = os.fork()


  0%|          | 0/8 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 6 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/18 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/8 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 6 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/18 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/8 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

Running Epoch 6 of 6:   0%|          | 0/508 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/18 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/7 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/453 [00:00<?, ?it/s]



Running Epoch 2 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 6 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/16 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/7 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/453 [00:00<?, ?it/s]



Running Epoch 2 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 6 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/16 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/7 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/453 [00:00<?, ?it/s]



Running Epoch 2 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

Running Epoch 6 of 6:   0%|          | 0/453 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/16 [00:00<?, ?it/s]

BERT-Tiny + Spam_Assasin: 
Evaluation Results: 0.953421506612996
Number of wrong predictions: 81

Roberta + Spam_Assasin: 
Evaluation Results: 0.9660724554341575
Number of wrong predictions: 59

Distilbert + Spam_Assasin: 
Evaluation Results: 0.9666474985623922
Number of wrong predictions: 58

BERT-Tiny + spam2_ds: 
Evaluation Results: 0.9768041237113402
Number of wrong predictions: 36

Roberta + spam2_ds: 
Evaluation Results: 0.9858247422680413
Number of wrong predictions: 22

Distilbert + spam2_ds: 
Evaluation Results: 0.9858247422680413
Number of wrong predictions: 22



In [8]:
# Ensembling
import os
models = [
    {"type": "bert", "name": "/content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6"},
    {"type": "bert", "name": "/content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6"},
    {"type": "distilbert", "name": "/content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6"},
    {"type": "distilbert", "name": "/content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6"},
    {"type": "roberta", "name": "/content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6"},
    {"type": "roberta", "name": "/content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6"}
]
models_list_comprehensive = []

for each_model in models:
  dir= os.path.dirname(f'{each_model["name"]}/training_args.bin')
  model_args = ClassificationArgs()
  model_args.load(dir)

  checkpoint_path = each_model["name"]

  m= ClassificationModel(
    model_type=each_model["type"],
    model_name=checkpoint_path,
    args=model_args,
    use_cuda=True
  )
  models_list_comprehensive.append(m)


In [9]:
# Voting (soft) Ensemble
data1 = pd.read_csv('spam_assassin.csv')
data2 = pd.read_csv('spam_ham_dataset.csv')

data2 = data2.drop(columns=['label'])
data2 = data2.drop(columns=['Unnamed: 0'])

data1.columns = ["text", "labels"]
data2.columns = ["text", "labels"]

def ensemble_predictions(eval_data):
    pred1, _ = models_list_comprehensive[0].predict(eval_data)
    pred2, _ = models_list_comprehensive[1].predict(eval_data)
    pred3, _ = models_list_comprehensive[2].predict(eval_data)
    pred4, _ = models_list_comprehensive[3].predict(eval_data)
    pred5, _ = models_list_comprehensive[4].predict(eval_data)
    pred6, _ = models_list_comprehensive[5].predict(eval_data)

    # for p in pred1:
    #   print(p)

    preds_df = pd.DataFrame({
        'pred1_spam': pred1,
        'pred2_spam': pred2,
        'pred3_spam': pred3,
        'pred4_spam': pred4,
        'pred5_spam': pred5,
        'pred6_spam': pred6
    })

    preds_df['ave_prob'] = preds_df[['pred1_spam', 'pred2_spam', 'pred3_spam', 'pred4_spam', 'pred5_spam', 'pred6_spam']].mean(axis=1)
    preds_df['final_pred'] = (preds_df['ave_prob'] > 0.5).astype(int)
    return preds_df['final_pred']

def calculate_accuracy(df):
    df['predicted_label'] = ensemble_predictions(df['text'].tolist())
    correct_predictions = (df['predicted_label'] == df['labels']).sum()
    total_predictions = len(df)
    accuracy = correct_predictions / total_predictions
    return accuracy





In [11]:
accuries_on_test= []
# accuries_on_test.append(calculate_accuracy(eval_data_list[0].copy()))
# accuries_on_test.append(calculate_accuracy(eval_data_list[1].copy()))


accuries_on_test.append(calculate_accuracy(data1.copy()))
accuries_on_test.append(calculate_accuracy(data2.copy()))

# Accuracy calculation on Spam Assassin Eval Data with soft ensembling- voting
print(f'Spam Assassin Eval: {accuries_on_test[0]}')
# Accuracy calculation on Spam Ham Eval Data with soft ensembling- voting
print(f'Spam Ham Eval: {accuries_on_test[1]}')


  self.pid = os.fork()


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Spam Assassin Eval: 0.9734299516908212
Spam Ham Eval: 0.9899439180042545


In [10]:
# Weighted Ensemble Based on Wrong predictions

data1 = pd.read_csv('spam_assassin.csv')
data2 = pd.read_csv('spam_ham_dataset.csv')

data2 = data2.drop(columns=['label'])
data2 = data2.drop(columns=['Unnamed: 0'])

data1.columns = ["text", "labels"]
data2.columns = ["text", "labels"]

def ensemble_predictions2(eval_data):
    pred1, _ = models_list_comprehensive[0].predict(eval_data)
    pred2, _ = models_list_comprehensive[1].predict(eval_data)
    pred3, _ = models_list_comprehensive[2].predict(eval_data)
    pred4, _ = models_list_comprehensive[3].predict(eval_data)
    pred5, _ = models_list_comprehensive[4].predict(eval_data)
    pred6, _ = models_list_comprehensive[5].predict(eval_data)

    # for p in pred1:
    #   print(p)

    preds_df = pd.DataFrame({
        'pred1_spam': pred1,
        'pred2_spam': pred2,
        'pred3_spam': pred3,
        'pred4_spam': pred4,
        'pred5_spam': pred5,
        'pred6_spam': pred6
    })

    preds_df['ave_prob'] = preds_df['pred1_spam'] * 0.0869 +  preds_df['pred2_spam'] * 0.1117 + preds_df['pred3_spam'] * 0.1026 +  preds_df['pred4_spam'] * 0.1738 + preds_df['pred5_spam'] * 0.2844 + preds_df['pred6_spam'] * 0.2406
    preds_df['final_pred'] = (preds_df['ave_prob'] > 0.5).astype(int)
    return preds_df['final_pred']

def calculate_accuracy2(df):
    df['predicted_label'] = ensemble_predictions(df['text'].tolist())
    correct_predictions = (df['predicted_label'] == df['labels']).sum()
    total_predictions = len(df)
    accuracy = correct_predictions / total_predictions
    return accuracy

In [11]:
accuries_on_test= []
# accuries_on_test.append(calculate_accuracy(eval_data_list[0].copy()))
# accuries_on_test.append(calculate_accuracy(eval_data_list[1].copy()))


accuries_on_test.append(calculate_accuracy2(data1.copy()))
accuries_on_test.append(calculate_accuracy2(data2.copy()))

# Accuracy calculation on Spam Assassin Eval Data with weighted ensembling based on wrong predictions
print(f'Spam Assassin Eval: {accuries_on_test[0]}')
# Accuracy calculation on Spam Ham Eval Data with weighted ensembling based on wrong predictions
print(f'Spam Ham Eval: {accuries_on_test[1]}')


  self.pid = os.fork()


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Spam Assassin Eval: 0.9734299516908212
Spam Ham Eval: 0.9899439180042545


In [14]:
accuries_on_test= []
accuries_on_test.append(calculate_accuracy(data1.copy()))
accuries_on_test.append(calculate_accuracy(data2.copy()))

# Accuracy calculation on Clean Spam Ham
print(f'Spam Ham Full: {accuries_on_test[0]}')
# Accuracy calculation on Clean Spam Ham Eval Data
print(f'Spam Ham Eval: {accuries_on_test[1]}')

  self.pid = os.fork()


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  self.pid = os.fork()


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Spam Ham Full: 0.9734299516908212
Spam Ham Eval: 0.9899439180042545


In [None]:
!zip -r vBTSH.zip /content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6

  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/ (stored 0%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/vocab.txt (deflated 53%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/config.json (deflated 49%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/tokenizer_config.json (deflated 75%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/training_args.bin (deflated 51%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/optimizer.pt (deflated 56%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/tokenizer.json (deflated 71%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/model.safetensors (deflated 7%)
  adding: content/model_checkpoints/BERT-Tiny_spam2_ds/checkpoint_2718_epoch_6/scheduler.pt (deflated 56%)
  adding: content/model_checkpoints/

In [None]:
!zip -r vBTSA.zip /content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6

  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/ (stored 0%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/vocab.txt (deflated 53%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/config.json (deflated 49%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/tokenizer_config.json (deflated 75%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/training_args.bin (deflated 51%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/optimizer.pt (deflated 82%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/tokenizer.json (deflated 71%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/model.safetensors (deflated 7%)
  adding: content/model_checkpoints/BERT-Tiny_Spam_Assasin/checkpoint_3048_epoch_6/scheduler.pt (deflated 56%)


In [None]:
!zip -r vDSA.zip /content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6

  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/ (stored 0%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/vocab.txt (deflated 53%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/config.json (deflated 46%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/tokenizer_config.json (deflated 76%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/training_args.bin (deflated 51%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/optimizer.pt (deflated 37%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/tokenizer.json (deflated 71%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/model.safetensors (deflated 8%)
  adding: content/model_checkpoints/Distilbert_Spam_Assasin/checkpoint_3048_epoch_6/scheduler.pt (defla

In [None]:
!zip -r vDSH.zip /content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6

  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/ (stored 0%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/vocab.txt (deflated 53%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/config.json (deflated 46%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/tokenizer_config.json (deflated 76%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/training_args.bin (deflated 51%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/optimizer.pt (deflated 27%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/tokenizer.json (deflated 71%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/model.safetensors (deflated 8%)
  adding: content/model_checkpoints/Distilbert_spam2_ds/checkpoint_2718_epoch_6/scheduler.pt (deflated 56%)
  adding: content/model_che

In [None]:
!zip -r vRSH.zip /content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6

  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/ (stored 0%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/config.json (deflated 51%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/tokenizer_config.json (deflated 76%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/training_args.bin (deflated 51%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/vocab.json (deflated 59%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/optimizer.pt (deflated 29%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/tokenizer.json (deflated 72%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/model.safetensors (deflated 14%)
  adding: content/model_checkpoints/Roberta_spam2_ds/checkpoint_2718_epoch_6/scheduler.pt (deflated 56%)
  adding: content/model_checkpoints/Roberta_spam2_ds

In [None]:
!zip -r vRSA.zip /content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6

  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/ (stored 0%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/config.json (deflated 51%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/tokenizer_config.json (deflated 76%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/training_args.bin (deflated 51%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/vocab.json (deflated 59%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/optimizer.pt (deflated 34%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/tokenizer.json (deflated 72%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/model.safetensors (deflated 16%)
  adding: content/model_checkpoints/Roberta_Spam_Assasin/checkpoint_3048_epoch_6/scheduler.pt (deflated 56%)
  adding: conten