In [16]:
pip install -r requirements.txt

Collecting tensorflow (from -r requirements.txt (line 4))
  Using cached tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Using cached tensorflow-2.18.0-cp312-cp312-win_amd64.whl (7.5 kB)
Installing collected packages: tensorflow
Successfully installed tensorflow-2.18.0
Note: you may need to restart the kernel to use updated packages.


In [18]:
pip install tf-keras

Collecting tf-kerasNote: you may need to restart the kernel to use updated packages.

  Using cached tf_keras-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Using cached tf_keras-2.18.0-py3-none-any.whl (1.7 MB)
Installing collected packages: tf-keras
Successfully installed tf-keras-2.18.0


In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification





In [24]:
email_ds = pd.read_csv('./model/datasets/emails_dataset.csv')
email_ds.head()

Unnamed: 0,text,spam
0,Subject: enron methanol ; meter # : 988291\r\n...,0
1,"Subject: hpl nom for january 9 , 2001\r\n( see...",0
2,"Subject: neon retreat\r\nho ho ho , we ' re ar...",0
3,"Subject: photoshop , windows , office . cheap ...",1
4,Subject: re : indian springs\r\nthis deal is t...,0


In [25]:
X_train, X_test, y_train, y_test = train_test_split(email_ds['text'], email_ds['spam'], test_size=0.2, random_state=42)

In [22]:
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
tokenizer.save_pretrained("./model/email_spam_model")

('email_spam_model\\tokenizer_config.json',
 'email_spam_model\\special_tokens_map.json',
 'email_spam_model\\vocab.txt',
 'email_spam_model\\added_tokens.json',
 'email_spam_model\\tokenizer.json')

In [27]:
train_encodings = tokenizer(X_train.tolist(), truncation=True, padding=True)
test_encodings = tokenizer(X_test.tolist(), truncation=True, padding=True)

In [29]:
# Create PyTorch Dataset
class EmailDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

In [45]:
train_dataset = EmailDataset(train_encodings, y_train.tolist())
test_dataset = EmailDataset(test_encodings, y_test.tolist())
test_dataset[0]

{'input_ids': tensor([  101,  3395,  1024,  2128,  1024,  2034,  6959,  1011, 16060, 22684,
          1041,  1004,  1052,  2047,  3066,  2193,  2003, 22777, 18139,  2487,
          6294,  3044,  1030, 14925,  2102,  6021,  1013,  2676,  1013,  2456,
          6185,  1024,  2484,  7610,  2000,  1024,  3419, 28794,  1013, 13058,
          1013,  4372,  4948,  1030,  4372,  4948,  1010,  7628, 11527,  2015,
          1013,  7570,  2226,  1013, 14925,  2102,  1030, 14925,  2102, 10507,
          1024,  3395,  1024,  2128,  1024,  2034,  6959,  1011, 16060, 22684,
          1041,  1004,  1052,  3419,  1024,  2052,  2017,  3531,  2275,  2039,
          2178,  2028,  3204,  7281,  2005,  1996,  2917,  1006,  2197,  3204,
          2001, 19235, 28311,  2509,  1007,  1029,  7628,  1024,  2566,  9606,
          1005,  1055,  3602,  3531,  2079,  2025,  4604,  2041,  2151,  3259,
          2006,  2023,  1012,  2057,  5987,  1996,  3259,  2306,  1037,  2154,
          2030,  2061,  1012,  3531,  2

In [32]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./model/results',
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

In [33]:
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

trainer.train()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/70 [02:58<?, ?it/s]
  1%|          | 10/1090 [01:08<2:05:03,  6.95s/it]

{'loss': 0.7018, 'grad_norm': 2.616570472717285, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.02}


  2%|▏         | 20/1090 [02:23<2:13:03,  7.46s/it]

{'loss': 0.6823, 'grad_norm': 1.4500535726547241, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.04}


  3%|▎         | 30/1090 [03:38<2:11:28,  7.44s/it]

{'loss': 0.6478, 'grad_norm': 1.1301486492156982, 'learning_rate': 3e-06, 'epoch': 0.06}


  4%|▎         | 40/1090 [04:52<2:09:24,  7.40s/it]

{'loss': 0.623, 'grad_norm': 1.2964814901351929, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.07}


  5%|▍         | 50/1090 [06:03<2:05:35,  7.25s/it]

{'loss': 0.5482, 'grad_norm': 2.8508880138397217, 'learning_rate': 5e-06, 'epoch': 0.09}


  6%|▌         | 60/1090 [07:17<2:07:09,  7.41s/it]

{'loss': 0.5202, 'grad_norm': 1.7294577360153198, 'learning_rate': 6e-06, 'epoch': 0.11}


  6%|▋         | 70/1090 [08:28<2:01:49,  7.17s/it]

{'loss': 0.4657, 'grad_norm': 1.7442193031311035, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.13}


  7%|▋         | 80/1090 [09:40<1:59:46,  7.12s/it]

{'loss': 0.4325, 'grad_norm': 2.0070817470550537, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.15}


  8%|▊         | 90/1090 [10:51<1:59:07,  7.15s/it]

{'loss': 0.3386, 'grad_norm': 3.619962215423584, 'learning_rate': 9e-06, 'epoch': 0.17}


  9%|▉         | 100/1090 [12:03<1:57:41,  7.13s/it]

{'loss': 0.2752, 'grad_norm': 2.385684013366699, 'learning_rate': 1e-05, 'epoch': 0.18}


 10%|█         | 110/1090 [13:16<2:00:41,  7.39s/it]

{'loss': 0.2219, 'grad_norm': 1.5647664070129395, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.2}


 11%|█         | 120/1090 [14:30<1:57:52,  7.29s/it]

{'loss': 0.2214, 'grad_norm': 1.3601247072219849, 'learning_rate': 1.2e-05, 'epoch': 0.22}


 12%|█▏        | 130/1090 [15:44<1:57:10,  7.32s/it]

{'loss': 0.1846, 'grad_norm': 0.662755012512207, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.24}


 13%|█▎        | 140/1090 [16:58<1:59:16,  7.53s/it]

{'loss': 0.2091, 'grad_norm': 7.288661003112793, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.26}


 14%|█▍        | 150/1090 [18:13<1:56:49,  7.46s/it]

{'loss': 0.1104, 'grad_norm': 2.9848265647888184, 'learning_rate': 1.5e-05, 'epoch': 0.28}


 15%|█▍        | 160/1090 [19:27<1:55:20,  7.44s/it]

{'loss': 0.1221, 'grad_norm': 4.778802871704102, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.29}


 16%|█▌        | 170/1090 [20:41<1:51:33,  7.28s/it]

{'loss': 0.0925, 'grad_norm': 0.27340999245643616, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.31}


 17%|█▋        | 180/1090 [21:53<1:49:15,  7.20s/it]

{'loss': 0.0655, 'grad_norm': 1.6913907527923584, 'learning_rate': 1.8e-05, 'epoch': 0.33}


 17%|█▋        | 190/1090 [23:06<1:46:32,  7.10s/it]

{'loss': 0.0882, 'grad_norm': 5.564198970794678, 'learning_rate': 1.9e-05, 'epoch': 0.35}


 18%|█▊        | 200/1090 [24:19<1:47:19,  7.24s/it]

{'loss': 0.0697, 'grad_norm': 2.355030059814453, 'learning_rate': 2e-05, 'epoch': 0.37}


 19%|█▉        | 210/1090 [25:31<1:45:24,  7.19s/it]

{'loss': 0.0867, 'grad_norm': 1.0826959609985352, 'learning_rate': 2.1e-05, 'epoch': 0.39}


 20%|██        | 220/1090 [26:44<1:45:09,  7.25s/it]

{'loss': 0.0797, 'grad_norm': 8.970026969909668, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.4}


 21%|██        | 230/1090 [27:57<1:45:46,  7.38s/it]

{'loss': 0.204, 'grad_norm': 0.7986243367195129, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.42}


 22%|██▏       | 240/1090 [29:09<1:41:54,  7.19s/it]

{'loss': 0.0859, 'grad_norm': 2.6408474445343018, 'learning_rate': 2.4e-05, 'epoch': 0.44}


 23%|██▎       | 250/1090 [30:21<1:41:29,  7.25s/it]

{'loss': 0.1281, 'grad_norm': 11.924275398254395, 'learning_rate': 2.5e-05, 'epoch': 0.46}


 24%|██▍       | 260/1090 [31:33<1:38:33,  7.12s/it]

{'loss': 0.2396, 'grad_norm': 2.4409587383270264, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.48}


 25%|██▍       | 270/1090 [32:45<1:37:10,  7.11s/it]

{'loss': 0.0388, 'grad_norm': 0.22552728652954102, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.5}


 26%|██▌       | 280/1090 [33:55<1:34:52,  7.03s/it]

{'loss': 0.049, 'grad_norm': 0.10239113867282867, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.51}


 27%|██▋       | 290/1090 [35:11<1:39:59,  7.50s/it]

{'loss': 0.0959, 'grad_norm': 0.15186285972595215, 'learning_rate': 2.9e-05, 'epoch': 0.53}


 28%|██▊       | 300/1090 [36:23<1:33:06,  7.07s/it]

{'loss': 0.0861, 'grad_norm': 0.43932726979255676, 'learning_rate': 3e-05, 'epoch': 0.55}


 28%|██▊       | 310/1090 [37:35<1:34:01,  7.23s/it]

{'loss': 0.0507, 'grad_norm': 0.08018084615468979, 'learning_rate': 3.1e-05, 'epoch': 0.57}


 29%|██▉       | 320/1090 [38:47<1:31:32,  7.13s/it]

{'loss': 0.1784, 'grad_norm': 6.894588947296143, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.59}


 30%|███       | 330/1090 [39:58<1:30:02,  7.11s/it]

{'loss': 0.088, 'grad_norm': 4.4864020347595215, 'learning_rate': 3.3e-05, 'epoch': 0.61}


 31%|███       | 340/1090 [41:09<1:28:27,  7.08s/it]

{'loss': 0.0518, 'grad_norm': 1.8154315948486328, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.62}


 32%|███▏      | 350/1090 [42:20<1:27:52,  7.12s/it]

{'loss': 0.0265, 'grad_norm': 12.179049491882324, 'learning_rate': 3.5e-05, 'epoch': 0.64}


 33%|███▎      | 360/1090 [43:31<1:26:23,  7.10s/it]

{'loss': 0.0679, 'grad_norm': 0.04191020503640175, 'learning_rate': 3.6e-05, 'epoch': 0.66}


 34%|███▍      | 370/1090 [44:41<1:23:50,  6.99s/it]

{'loss': 0.1151, 'grad_norm': 23.45888900756836, 'learning_rate': 3.7e-05, 'epoch': 0.68}


 35%|███▍      | 380/1090 [45:51<1:23:03,  7.02s/it]

{'loss': 0.136, 'grad_norm': 4.181308269500732, 'learning_rate': 3.8e-05, 'epoch': 0.7}


 36%|███▌      | 390/1090 [47:03<1:23:55,  7.19s/it]

{'loss': 0.039, 'grad_norm': 4.791146755218506, 'learning_rate': 3.9000000000000006e-05, 'epoch': 0.72}


 37%|███▋      | 400/1090 [48:15<1:23:40,  7.28s/it]

{'loss': 0.0225, 'grad_norm': 32.59806442260742, 'learning_rate': 4e-05, 'epoch': 0.73}


 38%|███▊      | 410/1090 [49:30<1:24:11,  7.43s/it]

{'loss': 0.0787, 'grad_norm': 0.13664129376411438, 'learning_rate': 4.1e-05, 'epoch': 0.75}


 39%|███▊      | 420/1090 [50:48<1:30:24,  8.10s/it]

{'loss': 0.0303, 'grad_norm': 0.20217449963092804, 'learning_rate': 4.2e-05, 'epoch': 0.77}


 39%|███▉      | 430/1090 [52:05<1:23:09,  7.56s/it]

{'loss': 0.1098, 'grad_norm': 0.03950214385986328, 'learning_rate': 4.3e-05, 'epoch': 0.79}


 40%|████      | 440/1090 [53:21<1:23:15,  7.69s/it]

{'loss': 0.079, 'grad_norm': 21.04144859313965, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.81}


 41%|████▏     | 450/1090 [54:38<1:21:01,  7.60s/it]

{'loss': 0.0502, 'grad_norm': 0.34277281165122986, 'learning_rate': 4.5e-05, 'epoch': 0.83}


 42%|████▏     | 460/1090 [55:53<1:19:01,  7.53s/it]

{'loss': 0.0156, 'grad_norm': 7.462057113647461, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.84}


 43%|████▎     | 470/1090 [57:10<1:20:28,  7.79s/it]

{'loss': 0.0686, 'grad_norm': 0.07675110548734665, 'learning_rate': 4.7e-05, 'epoch': 0.86}


 44%|████▍     | 480/1090 [58:28<1:17:26,  7.62s/it]

{'loss': 0.0983, 'grad_norm': 0.06784369051456451, 'learning_rate': 4.8e-05, 'epoch': 0.88}


 45%|████▍     | 490/1090 [59:43<1:15:17,  7.53s/it]

{'loss': 0.0874, 'grad_norm': 3.6417031288146973, 'learning_rate': 4.9e-05, 'epoch': 0.9}


 46%|████▌     | 500/1090 [1:00:59<1:14:15,  7.55s/it]

{'loss': 0.1014, 'grad_norm': 6.946353912353516, 'learning_rate': 5e-05, 'epoch': 0.92}


 47%|████▋     | 510/1090 [1:02:21<1:15:06,  7.77s/it]

{'loss': 0.0537, 'grad_norm': 5.706919193267822, 'learning_rate': 4.915254237288136e-05, 'epoch': 0.94}


 48%|████▊     | 520/1090 [1:03:36<1:10:07,  7.38s/it]

{'loss': 0.1805, 'grad_norm': 10.499643325805664, 'learning_rate': 4.8305084745762714e-05, 'epoch': 0.95}


 49%|████▊     | 530/1090 [1:04:49<1:08:41,  7.36s/it]

{'loss': 0.0335, 'grad_norm': 8.50383186340332, 'learning_rate': 4.745762711864407e-05, 'epoch': 0.97}


 50%|████▉     | 540/1090 [1:06:03<1:07:40,  7.38s/it]

{'loss': 0.0729, 'grad_norm': 11.516900062561035, 'learning_rate': 4.6610169491525425e-05, 'epoch': 0.99}


 50%|█████     | 550/1090 [1:07:17<1:06:45,  7.42s/it]

{'loss': 0.016, 'grad_norm': 16.39923667907715, 'learning_rate': 4.5762711864406784e-05, 'epoch': 1.01}


 51%|█████▏    | 560/1090 [1:08:31<1:04:33,  7.31s/it]

{'loss': 0.0012, 'grad_norm': 0.018616262823343277, 'learning_rate': 4.491525423728814e-05, 'epoch': 1.03}


 52%|█████▏    | 570/1090 [1:09:46<1:05:45,  7.59s/it]

{'loss': 0.0246, 'grad_norm': 0.013836686499416828, 'learning_rate': 4.4067796610169495e-05, 'epoch': 1.05}


 53%|█████▎    | 580/1090 [1:11:00<1:02:41,  7.38s/it]

{'loss': 0.0588, 'grad_norm': 0.010019143112003803, 'learning_rate': 4.3220338983050854e-05, 'epoch': 1.06}


 54%|█████▍    | 590/1090 [1:12:14<1:01:12,  7.35s/it]

{'loss': 0.0355, 'grad_norm': 0.011815634556114674, 'learning_rate': 4.2372881355932206e-05, 'epoch': 1.08}


 55%|█████▌    | 600/1090 [1:13:25<55:34,  6.81s/it]  

{'loss': 0.0021, 'grad_norm': 0.024275578558444977, 'learning_rate': 4.152542372881356e-05, 'epoch': 1.1}


 56%|█████▌    | 610/1090 [1:14:32<53:20,  6.67s/it]

{'loss': 0.0418, 'grad_norm': 0.023586010560393333, 'learning_rate': 4.067796610169492e-05, 'epoch': 1.12}


 57%|█████▋    | 620/1090 [1:15:42<54:57,  7.02s/it]

{'loss': 0.0226, 'grad_norm': 0.51836097240448, 'learning_rate': 3.983050847457627e-05, 'epoch': 1.14}


 58%|█████▊    | 630/1090 [1:16:49<52:07,  6.80s/it]

{'loss': 0.013, 'grad_norm': 0.053257424384355545, 'learning_rate': 3.898305084745763e-05, 'epoch': 1.16}


 59%|█████▊    | 640/1090 [1:18:05<56:46,  7.57s/it]  

{'loss': 0.0008, 'grad_norm': 0.015742238610982895, 'learning_rate': 3.813559322033898e-05, 'epoch': 1.17}


 60%|█████▉    | 650/1090 [1:19:18<53:38,  7.31s/it]

{'loss': 0.0838, 'grad_norm': 10.177223205566406, 'learning_rate': 3.728813559322034e-05, 'epoch': 1.19}


 61%|██████    | 660/1090 [1:20:31<52:24,  7.31s/it]

{'loss': 0.0292, 'grad_norm': 8.481857299804688, 'learning_rate': 3.644067796610169e-05, 'epoch': 1.21}


 61%|██████▏   | 670/1090 [1:21:46<53:52,  7.70s/it]

{'loss': 0.0215, 'grad_norm': 3.3174996376037598, 'learning_rate': 3.559322033898305e-05, 'epoch': 1.23}


 62%|██████▏   | 680/1090 [1:23:00<50:10,  7.34s/it]

{'loss': 0.0099, 'grad_norm': 0.0672106146812439, 'learning_rate': 3.474576271186441e-05, 'epoch': 1.25}


 63%|██████▎   | 690/1090 [1:24:13<48:39,  7.30s/it]

{'loss': 0.0121, 'grad_norm': 0.012930461205542088, 'learning_rate': 3.389830508474576e-05, 'epoch': 1.27}


 64%|██████▍   | 700/1090 [1:25:26<47:37,  7.33s/it]

{'loss': 0.0591, 'grad_norm': 0.02857023850083351, 'learning_rate': 3.305084745762712e-05, 'epoch': 1.28}


 65%|██████▌   | 710/1090 [1:26:39<46:23,  7.33s/it]

{'loss': 0.1132, 'grad_norm': 12.443567276000977, 'learning_rate': 3.2203389830508473e-05, 'epoch': 1.3}


 66%|██████▌   | 720/1090 [1:27:52<45:14,  7.34s/it]

{'loss': 0.0111, 'grad_norm': 0.3938584625720978, 'learning_rate': 3.135593220338983e-05, 'epoch': 1.32}


 67%|██████▋   | 730/1090 [1:29:05<43:55,  7.32s/it]

{'loss': 0.0011, 'grad_norm': 0.015520231798291206, 'learning_rate': 3.050847457627119e-05, 'epoch': 1.34}


 68%|██████▊   | 740/1090 [1:30:20<44:32,  7.63s/it]

{'loss': 0.0355, 'grad_norm': 0.0244954451918602, 'learning_rate': 2.9661016949152544e-05, 'epoch': 1.36}


 69%|██████▉   | 750/1090 [1:31:33<41:24,  7.31s/it]

{'loss': 0.001, 'grad_norm': 0.012760709971189499, 'learning_rate': 2.88135593220339e-05, 'epoch': 1.38}


 70%|██████▉   | 760/1090 [1:32:46<40:00,  7.28s/it]

{'loss': 0.1479, 'grad_norm': 4.254879474639893, 'learning_rate': 2.7966101694915255e-05, 'epoch': 1.39}


 71%|███████   | 770/1090 [1:34:01<40:34,  7.61s/it]

{'loss': 0.0013, 'grad_norm': 0.03529846668243408, 'learning_rate': 2.711864406779661e-05, 'epoch': 1.41}


 72%|███████▏  | 780/1090 [1:35:16<38:04,  7.37s/it]

{'loss': 0.0039, 'grad_norm': 0.015308834612369537, 'learning_rate': 2.627118644067797e-05, 'epoch': 1.43}


 72%|███████▏  | 790/1090 [1:36:29<36:21,  7.27s/it]

{'loss': 0.0223, 'grad_norm': 0.8675014972686768, 'learning_rate': 2.5423728813559322e-05, 'epoch': 1.45}


 73%|███████▎  | 800/1090 [1:37:43<35:48,  7.41s/it]

{'loss': 0.0245, 'grad_norm': 5.718473434448242, 'learning_rate': 2.457627118644068e-05, 'epoch': 1.47}


 74%|███████▍  | 810/1090 [1:38:58<34:39,  7.43s/it]

{'loss': 0.0251, 'grad_norm': 0.6613492965698242, 'learning_rate': 2.3728813559322036e-05, 'epoch': 1.49}


 75%|███████▌  | 820/1090 [1:40:11<32:54,  7.31s/it]

{'loss': 0.0404, 'grad_norm': 0.00762080866843462, 'learning_rate': 2.2881355932203392e-05, 'epoch': 1.5}


 76%|███████▌  | 830/1090 [1:41:23<30:54,  7.13s/it]

{'loss': 0.0124, 'grad_norm': 0.32322460412979126, 'learning_rate': 2.2033898305084748e-05, 'epoch': 1.52}


 77%|███████▋  | 840/1090 [1:42:36<30:28,  7.32s/it]

{'loss': 0.0008, 'grad_norm': 0.031235724687576294, 'learning_rate': 2.1186440677966103e-05, 'epoch': 1.54}


 78%|███████▊  | 850/1090 [1:43:51<30:08,  7.53s/it]

{'loss': 0.0022, 'grad_norm': 0.04911212995648384, 'learning_rate': 2.033898305084746e-05, 'epoch': 1.56}


 79%|███████▉  | 860/1090 [1:45:04<27:55,  7.29s/it]

{'loss': 0.0024, 'grad_norm': 0.006674684584140778, 'learning_rate': 1.9491525423728814e-05, 'epoch': 1.58}


 80%|███████▉  | 870/1090 [1:46:17<26:51,  7.32s/it]

{'loss': 0.0125, 'grad_norm': 0.01004003919661045, 'learning_rate': 1.864406779661017e-05, 'epoch': 1.6}


 81%|████████  | 880/1090 [1:47:31<26:06,  7.46s/it]

{'loss': 0.0098, 'grad_norm': 0.29049360752105713, 'learning_rate': 1.7796610169491526e-05, 'epoch': 1.61}


 82%|████████▏ | 890/1090 [1:48:38<22:30,  6.75s/it]

{'loss': 0.0253, 'grad_norm': 0.0066713192500174046, 'learning_rate': 1.694915254237288e-05, 'epoch': 1.63}


 83%|████████▎ | 900/1090 [1:49:52<23:11,  7.32s/it]

{'loss': 0.0265, 'grad_norm': 0.037539899349212646, 'learning_rate': 1.6101694915254237e-05, 'epoch': 1.65}


 83%|████████▎ | 910/1090 [1:51:05<21:43,  7.24s/it]

{'loss': 0.0004, 'grad_norm': 0.0066130440682172775, 'learning_rate': 1.5254237288135596e-05, 'epoch': 1.67}


 84%|████████▍ | 920/1090 [1:52:18<21:05,  7.44s/it]

{'loss': 0.0015, 'grad_norm': 0.0094094043597579, 'learning_rate': 1.440677966101695e-05, 'epoch': 1.69}


 85%|████████▌ | 930/1090 [1:53:31<19:29,  7.31s/it]

{'loss': 0.031, 'grad_norm': 14.157710075378418, 'learning_rate': 1.3559322033898305e-05, 'epoch': 1.71}


 86%|████████▌ | 940/1090 [1:54:44<18:24,  7.36s/it]

{'loss': 0.0474, 'grad_norm': 0.005907065235078335, 'learning_rate': 1.2711864406779661e-05, 'epoch': 1.72}


 87%|████████▋ | 950/1090 [1:55:57<16:57,  7.27s/it]

{'loss': 0.0014, 'grad_norm': 0.1195366308093071, 'learning_rate': 1.1864406779661018e-05, 'epoch': 1.74}


 88%|████████▊ | 960/1090 [1:57:10<15:47,  7.29s/it]

{'loss': 0.0005, 'grad_norm': 0.006805360782891512, 'learning_rate': 1.1016949152542374e-05, 'epoch': 1.76}


 89%|████████▉ | 970/1090 [1:58:23<14:29,  7.25s/it]

{'loss': 0.0394, 'grad_norm': 0.15279391407966614, 'learning_rate': 1.016949152542373e-05, 'epoch': 1.78}


 90%|████████▉ | 980/1090 [1:59:36<13:15,  7.23s/it]

{'loss': 0.0222, 'grad_norm': 0.3049929440021515, 'learning_rate': 9.322033898305085e-06, 'epoch': 1.8}


 91%|█████████ | 990/1090 [2:00:48<12:09,  7.30s/it]

{'loss': 0.0004, 'grad_norm': 0.005016120616346598, 'learning_rate': 8.47457627118644e-06, 'epoch': 1.82}


 92%|█████████▏| 1000/1090 [2:02:00<10:50,  7.22s/it]

{'loss': 0.0506, 'grad_norm': 0.010985421016812325, 'learning_rate': 7.627118644067798e-06, 'epoch': 1.83}


 93%|█████████▎| 1010/1090 [2:03:20<10:07,  7.59s/it]

{'loss': 0.0108, 'grad_norm': 0.06705741584300995, 'learning_rate': 6.779661016949153e-06, 'epoch': 1.85}


 94%|█████████▎| 1020/1090 [2:04:35<08:46,  7.52s/it]

{'loss': 0.0004, 'grad_norm': 0.0053415363654494286, 'learning_rate': 5.932203389830509e-06, 'epoch': 1.87}


 94%|█████████▍| 1030/1090 [2:05:48<07:17,  7.29s/it]

{'loss': 0.0004, 'grad_norm': 0.0054967934265732765, 'learning_rate': 5.084745762711865e-06, 'epoch': 1.89}


 95%|█████████▌| 1040/1090 [2:07:00<06:01,  7.23s/it]

{'loss': 0.0289, 'grad_norm': 0.004431853070855141, 'learning_rate': 4.23728813559322e-06, 'epoch': 1.91}


 96%|█████████▋| 1050/1090 [2:08:13<04:48,  7.21s/it]

{'loss': 0.0408, 'grad_norm': 0.005802992265671492, 'learning_rate': 3.3898305084745763e-06, 'epoch': 1.93}


 97%|█████████▋| 1060/1090 [2:09:24<03:31,  7.06s/it]

{'loss': 0.002, 'grad_norm': 0.007020679768174887, 'learning_rate': 2.5423728813559323e-06, 'epoch': 1.94}


 98%|█████████▊| 1070/1090 [2:10:34<02:18,  6.92s/it]

{'loss': 0.0003, 'grad_norm': 0.004613779950886965, 'learning_rate': 1.6949152542372882e-06, 'epoch': 1.96}


 99%|█████████▉| 1080/1090 [2:11:42<01:04,  6.46s/it]

{'loss': 0.0006, 'grad_norm': 0.003217482240870595, 'learning_rate': 8.474576271186441e-07, 'epoch': 1.98}


100%|██████████| 1090/1090 [2:12:45<00:00,  6.24s/it]

{'loss': 0.0004, 'grad_norm': 0.005734020844101906, 'learning_rate': 0.0, 'epoch': 2.0}


100%|██████████| 1090/1090 [2:12:47<00:00,  7.31s/it]

{'train_runtime': 7967.2003, 'train_samples_per_second': 2.189, 'train_steps_per_second': 0.137, 'train_loss': 0.0989107018135864, 'epoch': 2.0}





TrainOutput(global_step=1090, training_loss=0.0989107018135864, metrics={'train_runtime': 7967.2003, 'train_samples_per_second': 2.189, 'train_steps_per_second': 0.137, 'total_flos': 2309966497763328.0, 'train_loss': 0.0989107018135864, 'epoch': 2.0})

In [34]:
trainer.evaluate()

100%|██████████| 137/137 [03:25<00:00,  1.50s/it]


{'eval_loss': 0.017079537734389305,
 'eval_runtime': 207.1078,
 'eval_samples_per_second': 10.526,
 'eval_steps_per_second': 0.661,
 'epoch': 2.0}

In [35]:
trainer.predict(test_dataset)

100%|██████████| 137/137 [03:32<00:00,  1.55s/it]


PredictionOutput(predictions=array([[ 4.91339  , -4.7248683],
       [-3.5898714,  3.8752549],
       [-3.5908644,  3.9313364],
       ...,
       [-3.383893 ,  3.6332774],
       [ 2.9832778, -2.8182068],
       [ 4.9017305, -4.6804433]], dtype=float32), label_ids=array([0, 1, 1, ..., 1, 0, 0], dtype=int64), metrics={'test_loss': 0.017079537734389305, 'test_runtime': 214.3257, 'test_samples_per_second': 10.171, 'test_steps_per_second': 0.639})

In [36]:
output=trainer.predict(test_dataset)[1]

100%|██████████| 137/137 [03:24<00:00,  1.49s/it]


In [37]:
from sklearn.metrics import confusion_matrix

cm=confusion_matrix(y_test,output)
cm

array([[1596,    0],
       [   0,  584]], dtype=int64)

In [42]:
import numpy as np
prediction_output = trainer.predict(test_dataset)
predicted_labels = np.argmax(prediction_output.predictions, axis=-1)


100%|██████████| 137/137 [03:15<00:00,  1.43s/it]


In [43]:
from sklearn.metrics import accuracy_score, f1_score
acc = accuracy_score(prediction_output.label_ids, predicted_labels)
f1 = f1_score(prediction_output.label_ids, predicted_labels, average='weighted')
print(f"Accuracy: {acc:.2f}, F1: {f1:.2f}")

Accuracy: 0.99, F1: 0.99


In [44]:
# Evaluate on the test dataset (which contains labels)
eval_results = trainer.evaluate(eval_dataset=test_dataset)
print(eval_results)


100%|██████████| 137/137 [03:06<00:00,  1.36s/it]

{'eval_loss': 0.017079537734389305, 'eval_runtime': 187.754, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 0.73, 'epoch': 2.0}





In [48]:
trainer.save_model('./model/email_spam_model')

In [23]:
pip install huggingface_hub

Note: you may need to restart the kernel to use updated packages.


In [24]:
from huggingface_hub import login

login(token="token")  # Lấy token từ https://huggingface.co/settings/tokens

In [25]:
from huggingface_hub import upload_folder

repo_id = "jamesnq/email_spam_model"  # Tên repo trên Hugging Face
local_folder = "email_spam_model"  # Thư mục chứa model đã cập nhật

upload_folder(folder_path=local_folder, repo_id=repo_id, repo_type="model")


training_args.bin: 100%|██████████| 5.18k/5.18k [00:01<00:00, 4.79kB/s]


CommitInfo(commit_url='https://huggingface.co/jamesnq/email_spam_model/commit/dda7c1ea14d7bbb37cf0d088ad8ea5db63bfafbe', commit_message='Upload folder using huggingface_hub', commit_description='', oid='dda7c1ea14d7bbb37cf0d088ad8ea5db63bfafbe', pr_url=None, repo_url=RepoUrl('https://huggingface.co/jamesnq/email_spam_model', endpoint='https://huggingface.co', repo_type='model', repo_id='jamesnq/email_spam_model'), pr_revision=None, pr_num=None)