In [1]:
import torch
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

kaggle=False
model_path="microsoft/deberta-base"

In [2]:
path='/kaggle/input/clickbait-dataset/clickbait_data.csv' if kaggle else 'train.csv'

df=pd.read_csv(path)
df

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1
...,...,...,...,...,...
7608,10869,,,Two giant cranes holding a bridge collapse int...,1
7609,10870,,,@aria_ahrary @TheTawniest The out of control w...,1
7610,10871,,,M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...,1
7611,10872,,,Police investigating after an e-bike collided ...,1


In [3]:
for i in tqdm(range(len(df))):
    combined=""
    if str(df.loc[i, 'keyword']) != 'nan':
        combined += 'KEYWORD: ' + str(df.loc[i, 'keyword']).replace('%20', ' ') + ' | '
    if str(df.loc[i, 'location']) != 'nan':
        combined += 'LOCATION: ' + str(df.loc[i, 'location']) + ' | '
    combined += 'TEXT: ' + df.loc[i, 'text']
    df.loc[i, 'combined'] = combined

# Drop all but combined and target
df = df.drop(columns=['id', 'keyword', 'location', 'text'])

#rename target to label
df = df.rename(columns={'target': 'label'})
df.sample(30)


  0%|          | 0/7613 [00:00<?, ?it/s]

Unnamed: 0,label,combined
7334,1,"KEYWORD: wildfire | LOCATION: Ashland, Oregon ..."
813,0,KEYWORD: blizzard | TEXT: Amazon Deal - wait o...
5486,0,KEYWORD: quarantine | TEXT: Yet another compan...
6596,1,KEYWORD: terrorism | TEXT: @RobPulseNews @huyo...
2886,0,KEYWORD: drought | LOCATION: At Work | TEXT: M...
6909,0,KEYWORD: traumatised | TEXT: @brookesddl I am ...
759,0,KEYWORD: blew up | LOCATION: Waterford MI | TE...
6410,1,KEYWORD: suicide bomber | TEXT: Suicide bomber...
7103,0,KEYWORD: violent storm | LOCATION: Milky Way g...
6814,0,KEYWORD: trapped | LOCATION: call me peach or ...


In [4]:
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

In [5]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(model_path)
model=AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)


for param in model.deberta.embeddings.parameters():
    param.requires_grad = False
        
for layer in model.deberta.encoder.layer[:6]:
    for param in layer.parameters():
        param.requires_grad = False


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
from datasets import Dataset

train_data=Dataset.from_pandas(train_df)
valid_data=Dataset.from_pandas(valid_df)

train_ds=train_data.map(lambda e: tokenizer(e['combined'], padding='max_length', truncation=True, max_length=100), batched=True)
valid_ds=valid_data.map(lambda e: tokenizer(e['combined'], padding='max_length', truncation=True, max_length=100), batched=True)

Map:   0%|          | 0/6090 [00:00<?, ? examples/s]

Map:   0%|          | 0/1523 [00:00<?, ? examples/s]

In [7]:
train_ds[99]

{'label': 0,
 'combined': 'KEYWORD: explode | TEXT: Learn How I Gained Access To The Secrets Of The Top Earners &amp; Used Them To Explode My Home Business Here: http://t.co/SGXP1U5OL1 Please #RT',
 '__index_level_0__': 3398,
 'input_ids': [1,
  33754,
  771,
  11200,
  35,
  25931,
  1721,
  36367,
  35,
  10900,
  1336,
  38,
  48564,
  8076,
  598,
  20,
  34702,
  1525,
  20,
  3107,
  7535,
  268,
  359,
  3914,
  131,
  30374,
  28595,
  598,
  16161,
  4636,
  1308,
  2193,
  2090,
  1398,
  35,
  2054,
  640,
  90,
  4,
  876,
  73,
  33020,
  29269,
  134,
  791,
  245,
  3384,
  134,
  3401,
  849,
  13963,
  2,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
 

In [8]:
from transformers import Trainer, TrainingArguments



def accuracy(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=100,
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    save_total_limit=3,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=valid_ds,
    tokenizer=tokenizer,
    compute_metrics=accuracy
)

In [9]:
trainer.train()



  0%|          | 0/2286 [00:00<?, ?it/s]

You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.6867, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.01}
{'loss': 0.6967, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.03}
{'loss': 0.6941, 'learning_rate': 3e-06, 'epoch': 0.04}
{'loss': 0.6909, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.05}
{'loss': 0.6968, 'learning_rate': 5e-06, 'epoch': 0.07}
{'loss': 0.6884, 'learning_rate': 6e-06, 'epoch': 0.08}
{'loss': 0.6737, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.09}
{'loss': 0.6911, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.1}
{'loss': 0.6879, 'learning_rate': 9e-06, 'epoch': 0.12}
{'loss': 0.6898, 'learning_rate': 1e-05, 'epoch': 0.13}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.6101089715957642, 'eval_accuracy': 0.7202889034799738, 'eval_runtime': 2.5621, 'eval_samples_per_second': 594.425, 'eval_steps_per_second': 74.547, 'epoch': 0.13}
{'loss': 0.581, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.14}
{'loss': 0.6545, 'learning_rate': 1.2e-05, 'epoch': 0.16}
{'loss': 0.4561, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.17}
{'loss': 0.4403, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.18}
{'loss': 0.4773, 'learning_rate': 1.5e-05, 'epoch': 0.2}
{'loss': 0.4597, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.21}
{'loss': 0.3746, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.22}
{'loss': 0.5288, 'learning_rate': 1.8e-05, 'epoch': 0.24}
{'loss': 0.4762, 'learning_rate': 1.9e-05, 'epoch': 0.25}
{'loss': 0.4646, 'learning_rate': 2e-05, 'epoch': 0.26}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5162909626960754, 'eval_accuracy': 0.7623112278397899, 'eval_runtime': 2.5191, 'eval_samples_per_second': 604.583, 'eval_steps_per_second': 75.821, 'epoch': 0.26}
{'loss': 0.3327, 'learning_rate': 2.1e-05, 'epoch': 0.28}
{'loss': 0.5107, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.29}
{'loss': 0.5454, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.3}
{'loss': 0.4657, 'learning_rate': 2.4e-05, 'epoch': 0.31}
{'loss': 0.4309, 'learning_rate': 2.5e-05, 'epoch': 0.33}
{'loss': 0.4975, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.34}
{'loss': 0.4926, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.35}
{'loss': 0.384, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.37}
{'loss': 0.5883, 'learning_rate': 2.9e-05, 'epoch': 0.38}
{'loss': 0.3986, 'learning_rate': 3e-05, 'epoch': 0.39}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4380653202533722, 'eval_accuracy': 0.8286277084701248, 'eval_runtime': 2.5121, 'eval_samples_per_second': 606.268, 'eval_steps_per_second': 76.032, 'epoch': 0.39}
{'loss': 0.4422, 'learning_rate': 3.1e-05, 'epoch': 0.41}
{'loss': 0.5154, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.42}
{'loss': 0.4622, 'learning_rate': 3.3e-05, 'epoch': 0.43}
{'loss': 0.3556, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.45}
{'loss': 0.3772, 'learning_rate': 3.5e-05, 'epoch': 0.46}
{'loss': 0.4709, 'learning_rate': 3.6e-05, 'epoch': 0.47}
{'loss': 0.6709, 'learning_rate': 3.7e-05, 'epoch': 0.49}
{'loss': 0.3587, 'learning_rate': 3.8e-05, 'epoch': 0.5}
{'loss': 0.3905, 'learning_rate': 3.9000000000000006e-05, 'epoch': 0.51}
{'loss': 0.6514, 'learning_rate': 4e-05, 'epoch': 0.52}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.3976203203201294, 'eval_accuracy': 0.8378200919238346, 'eval_runtime': 2.531, 'eval_samples_per_second': 601.731, 'eval_steps_per_second': 75.463, 'epoch': 0.52}
{'loss': 0.4708, 'learning_rate': 4.1e-05, 'epoch': 0.54}
{'loss': 0.463, 'learning_rate': 4.2e-05, 'epoch': 0.55}
{'loss': 0.4424, 'learning_rate': 4.3e-05, 'epoch': 0.56}
{'loss': 0.4615, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.58}
{'loss': 0.3978, 'learning_rate': 4.5e-05, 'epoch': 0.59}
{'loss': 0.2917, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.6}
{'loss': 0.6186, 'learning_rate': 4.7e-05, 'epoch': 0.62}
{'loss': 0.5564, 'learning_rate': 4.8e-05, 'epoch': 0.63}
{'loss': 0.4497, 'learning_rate': 4.9e-05, 'epoch': 0.64}
{'loss': 0.4992, 'learning_rate': 5e-05, 'epoch': 0.66}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5079574584960938, 'eval_accuracy': 0.8233749179251477, 'eval_runtime': 2.5121, 'eval_samples_per_second': 606.258, 'eval_steps_per_second': 76.031, 'epoch': 0.66}
{'loss': 0.4899, 'learning_rate': 4.972004479283315e-05, 'epoch': 0.67}
{'loss': 0.2801, 'learning_rate': 4.944008958566629e-05, 'epoch': 0.68}
{'loss': 0.5072, 'learning_rate': 4.916013437849945e-05, 'epoch': 0.7}
{'loss': 0.4846, 'learning_rate': 4.888017917133259e-05, 'epoch': 0.71}
{'loss': 0.4825, 'learning_rate': 4.860022396416574e-05, 'epoch': 0.72}
{'loss': 0.4619, 'learning_rate': 4.832026875699888e-05, 'epoch': 0.73}
{'loss': 0.3868, 'learning_rate': 4.804031354983203e-05, 'epoch': 0.75}
{'loss': 0.4798, 'learning_rate': 4.7760358342665176e-05, 'epoch': 0.76}
{'loss': 0.4533, 'learning_rate': 4.7480403135498324e-05, 'epoch': 0.77}
{'loss': 0.4233, 'learning_rate': 4.720044792833147e-05, 'epoch': 0.79}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.49879950284957886, 'eval_accuracy': 0.8089297439264609, 'eval_runtime': 2.5287, 'eval_samples_per_second': 602.275, 'eval_steps_per_second': 75.532, 'epoch': 0.79}
{'loss': 0.5045, 'learning_rate': 4.6920492721164614e-05, 'epoch': 0.8}
{'loss': 0.441, 'learning_rate': 4.664053751399776e-05, 'epoch': 0.81}
{'loss': 0.4067, 'learning_rate': 4.6360582306830904e-05, 'epoch': 0.83}
{'loss': 0.5441, 'learning_rate': 4.608062709966405e-05, 'epoch': 0.84}
{'loss': 0.4502, 'learning_rate': 4.580067189249721e-05, 'epoch': 0.85}
{'loss': 0.3722, 'learning_rate': 4.552071668533035e-05, 'epoch': 0.87}
{'loss': 0.3865, 'learning_rate': 4.52407614781635e-05, 'epoch': 0.88}
{'loss': 0.4836, 'learning_rate': 4.496080627099664e-05, 'epoch': 0.89}
{'loss': 0.2994, 'learning_rate': 4.468085106382979e-05, 'epoch': 0.91}
{'loss': 0.5978, 'learning_rate': 4.4400895856662936e-05, 'epoch': 0.92}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4200180470943451, 'eval_accuracy': 0.8358502954694682, 'eval_runtime': 2.5354, 'eval_samples_per_second': 600.691, 'eval_steps_per_second': 75.333, 'epoch': 0.92}
{'loss': 0.4619, 'learning_rate': 4.4120940649496084e-05, 'epoch': 0.93}
{'loss': 0.3431, 'learning_rate': 4.384098544232923e-05, 'epoch': 0.94}
{'loss': 0.4761, 'learning_rate': 4.3561030235162374e-05, 'epoch': 0.96}
{'loss': 0.4312, 'learning_rate': 4.328107502799552e-05, 'epoch': 0.97}
{'loss': 0.3064, 'learning_rate': 4.3001119820828664e-05, 'epoch': 0.98}
{'loss': 0.4537, 'learning_rate': 4.272116461366182e-05, 'epoch': 1.0}
{'loss': 0.4773, 'learning_rate': 4.244120940649496e-05, 'epoch': 1.01}
{'loss': 0.4296, 'learning_rate': 4.216125419932811e-05, 'epoch': 1.02}
{'loss': 0.4884, 'learning_rate': 4.188129899216126e-05, 'epoch': 1.04}
{'loss': 0.473, 'learning_rate': 4.16013437849944e-05, 'epoch': 1.05}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.46813490986824036, 'eval_accuracy': 0.835193696651346, 'eval_runtime': 2.5311, 'eval_samples_per_second': 601.705, 'eval_steps_per_second': 75.46, 'epoch': 1.05}
{'loss': 0.3744, 'learning_rate': 4.1321388577827555e-05, 'epoch': 1.06}
{'loss': 0.3843, 'learning_rate': 4.1041433370660696e-05, 'epoch': 1.08}
{'loss': 0.2194, 'learning_rate': 4.0761478163493845e-05, 'epoch': 1.09}
{'loss': 0.5468, 'learning_rate': 4.0481522956326986e-05, 'epoch': 1.1}
{'loss': 0.2456, 'learning_rate': 4.0201567749160135e-05, 'epoch': 1.12}
{'loss': 0.6135, 'learning_rate': 3.992161254199328e-05, 'epoch': 1.13}
{'loss': 0.4363, 'learning_rate': 3.964165733482643e-05, 'epoch': 1.14}
{'loss': 0.412, 'learning_rate': 3.936170212765958e-05, 'epoch': 1.15}
{'loss': 0.3602, 'learning_rate': 3.908174692049272e-05, 'epoch': 1.17}
{'loss': 0.4199, 'learning_rate': 3.880179171332587e-05, 'epoch': 1.18}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5599443912506104, 'eval_accuracy': 0.8108995403808273, 'eval_runtime': 2.5361, 'eval_samples_per_second': 600.517, 'eval_steps_per_second': 75.311, 'epoch': 1.18}
{'loss': 0.4073, 'learning_rate': 3.852183650615902e-05, 'epoch': 1.19}
{'loss': 0.522, 'learning_rate': 3.824188129899216e-05, 'epoch': 1.21}
{'loss': 0.4615, 'learning_rate': 3.7961926091825315e-05, 'epoch': 1.22}
{'loss': 0.4375, 'learning_rate': 3.7681970884658456e-05, 'epoch': 1.23}
{'loss': 0.4912, 'learning_rate': 3.7402015677491605e-05, 'epoch': 1.25}
{'loss': 0.3846, 'learning_rate': 3.7122060470324746e-05, 'epoch': 1.26}
{'loss': 0.281, 'learning_rate': 3.6842105263157895e-05, 'epoch': 1.27}
{'loss': 0.4491, 'learning_rate': 3.656215005599104e-05, 'epoch': 1.29}
{'loss': 0.4253, 'learning_rate': 3.628219484882419e-05, 'epoch': 1.3}
{'loss': 0.4873, 'learning_rate': 3.600223964165734e-05, 'epoch': 1.31}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.41114234924316406, 'eval_accuracy': 0.8424162836506894, 'eval_runtime': 2.7059, 'eval_samples_per_second': 562.837, 'eval_steps_per_second': 70.586, 'epoch': 1.31}
{'loss': 0.455, 'learning_rate': 3.572228443449048e-05, 'epoch': 1.33}
{'loss': 0.4336, 'learning_rate': 3.544232922732363e-05, 'epoch': 1.34}
{'loss': 0.3828, 'learning_rate': 3.516237402015677e-05, 'epoch': 1.35}
{'loss': 0.4497, 'learning_rate': 3.4882418812989927e-05, 'epoch': 1.36}
{'loss': 0.3858, 'learning_rate': 3.460246360582307e-05, 'epoch': 1.38}
{'loss': 0.3565, 'learning_rate': 3.4322508398656217e-05, 'epoch': 1.39}
{'loss': 0.2249, 'learning_rate': 3.4042553191489365e-05, 'epoch': 1.4}
{'loss': 0.3397, 'learning_rate': 3.3762597984322507e-05, 'epoch': 1.42}
{'loss': 0.3823, 'learning_rate': 3.348264277715566e-05, 'epoch': 1.43}
{'loss': 0.3233, 'learning_rate': 3.32026875699888e-05, 'epoch': 1.44}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4819819927215576, 'eval_accuracy': 0.8378200919238346, 'eval_runtime': 2.7157, 'eval_samples_per_second': 560.807, 'eval_steps_per_second': 70.331, 'epoch': 1.44}
{'loss': 0.2811, 'learning_rate': 3.292273236282195e-05, 'epoch': 1.46}
{'loss': 0.3931, 'learning_rate': 3.264277715565509e-05, 'epoch': 1.47}
{'loss': 0.4512, 'learning_rate': 3.236282194848824e-05, 'epoch': 1.48}
{'loss': 0.3635, 'learning_rate': 3.208286674132139e-05, 'epoch': 1.5}
{'loss': 0.5091, 'learning_rate': 3.180291153415454e-05, 'epoch': 1.51}
{'loss': 0.2733, 'learning_rate': 3.152295632698769e-05, 'epoch': 1.52}
{'loss': 0.3792, 'learning_rate': 3.124300111982083e-05, 'epoch': 1.54}
{'loss': 0.4004, 'learning_rate': 3.096304591265398e-05, 'epoch': 1.55}
{'loss': 0.4419, 'learning_rate': 3.0683090705487125e-05, 'epoch': 1.56}
{'loss': 0.4612, 'learning_rate': 3.0403135498320267e-05, 'epoch': 1.57}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.40145766735076904, 'eval_accuracy': 0.8470124753775443, 'eval_runtime': 2.7135, 'eval_samples_per_second': 561.262, 'eval_steps_per_second': 70.388, 'epoch': 1.57}
{'loss': 0.3761, 'learning_rate': 3.012318029115342e-05, 'epoch': 1.59}
{'loss': 0.4514, 'learning_rate': 2.9843225083986563e-05, 'epoch': 1.6}
{'loss': 0.5174, 'learning_rate': 2.9563269876819712e-05, 'epoch': 1.61}
{'loss': 0.4205, 'learning_rate': 2.9283314669652857e-05, 'epoch': 1.63}
{'loss': 0.396, 'learning_rate': 2.9003359462486002e-05, 'epoch': 1.64}
{'loss': 0.3486, 'learning_rate': 2.8723404255319154e-05, 'epoch': 1.65}
{'loss': 0.3828, 'learning_rate': 2.84434490481523e-05, 'epoch': 1.67}
{'loss': 0.3219, 'learning_rate': 2.8163493840985444e-05, 'epoch': 1.68}
{'loss': 0.4823, 'learning_rate': 2.788353863381859e-05, 'epoch': 1.69}
{'loss': 0.28, 'learning_rate': 2.7603583426651737e-05, 'epoch': 1.71}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.3911258280277252, 'eval_accuracy': 0.8476690741956664, 'eval_runtime': 2.6514, 'eval_samples_per_second': 574.413, 'eval_steps_per_second': 72.037, 'epoch': 1.71}
{'loss': 0.4227, 'learning_rate': 2.7323628219484882e-05, 'epoch': 1.72}
{'loss': 0.2131, 'learning_rate': 2.7043673012318034e-05, 'epoch': 1.73}
{'loss': 0.5427, 'learning_rate': 2.676371780515118e-05, 'epoch': 1.75}
{'loss': 0.2978, 'learning_rate': 2.6483762597984324e-05, 'epoch': 1.76}
{'loss': 0.5097, 'learning_rate': 2.620380739081747e-05, 'epoch': 1.77}
{'loss': 0.5177, 'learning_rate': 2.5923852183650617e-05, 'epoch': 1.78}
{'loss': 0.4227, 'learning_rate': 2.5643896976483762e-05, 'epoch': 1.8}
{'loss': 0.362, 'learning_rate': 2.5363941769316914e-05, 'epoch': 1.81}
{'loss': 0.4107, 'learning_rate': 2.508398656215006e-05, 'epoch': 1.82}
{'loss': 0.4719, 'learning_rate': 2.4804031354983204e-05, 'epoch': 1.84}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.3741321265697479, 'eval_accuracy': 0.8463558765594222, 'eval_runtime': 2.7141, 'eval_samples_per_second': 561.143, 'eval_steps_per_second': 70.373, 'epoch': 1.84}
{'loss': 0.3845, 'learning_rate': 2.452407614781635e-05, 'epoch': 1.85}
{'loss': 0.2462, 'learning_rate': 2.4244120940649497e-05, 'epoch': 1.86}
{'loss': 0.3949, 'learning_rate': 2.3964165733482642e-05, 'epoch': 1.88}
{'loss': 0.373, 'learning_rate': 2.368421052631579e-05, 'epoch': 1.89}
{'loss': 0.2536, 'learning_rate': 2.340425531914894e-05, 'epoch': 1.9}
{'loss': 0.4185, 'learning_rate': 2.3124300111982084e-05, 'epoch': 1.92}
{'loss': 0.2282, 'learning_rate': 2.2844344904815232e-05, 'epoch': 1.93}
{'loss': 0.4718, 'learning_rate': 2.2564389697648377e-05, 'epoch': 1.94}
{'loss': 0.4447, 'learning_rate': 2.2284434490481522e-05, 'epoch': 1.96}
{'loss': 0.3968, 'learning_rate': 2.200447928331467e-05, 'epoch': 1.97}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.41173121333122253, 'eval_accuracy': 0.8476690741956664, 'eval_runtime': 2.8111, 'eval_samples_per_second': 541.787, 'eval_steps_per_second': 67.946, 'epoch': 1.97}
{'loss': 0.3765, 'learning_rate': 2.172452407614782e-05, 'epoch': 1.98}
{'loss': 0.2995, 'learning_rate': 2.1444568868980964e-05, 'epoch': 1.99}
{'loss': 0.3153, 'learning_rate': 2.1164613661814112e-05, 'epoch': 2.01}
{'loss': 0.241, 'learning_rate': 2.0884658454647257e-05, 'epoch': 2.02}
{'loss': 0.2604, 'learning_rate': 2.0604703247480402e-05, 'epoch': 2.03}
{'loss': 0.4166, 'learning_rate': 2.032474804031355e-05, 'epoch': 2.05}
{'loss': 0.511, 'learning_rate': 2.0044792833146696e-05, 'epoch': 2.06}
{'loss': 0.3425, 'learning_rate': 1.9764837625979844e-05, 'epoch': 2.07}
{'loss': 0.3853, 'learning_rate': 1.9484882418812992e-05, 'epoch': 2.09}
{'loss': 0.3815, 'learning_rate': 1.9204927211646137e-05, 'epoch': 2.1}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4390185475349426, 'eval_accuracy': 0.8345370978332239, 'eval_runtime': 2.5639, 'eval_samples_per_second': 594.026, 'eval_steps_per_second': 74.497, 'epoch': 2.1}
{'loss': 0.3104, 'learning_rate': 1.8924972004479286e-05, 'epoch': 2.11}
{'loss': 0.4301, 'learning_rate': 1.864501679731243e-05, 'epoch': 2.13}
{'loss': 0.1812, 'learning_rate': 1.8365061590145576e-05, 'epoch': 2.14}
{'loss': 0.3526, 'learning_rate': 1.8085106382978724e-05, 'epoch': 2.15}
{'loss': 0.3928, 'learning_rate': 1.7805151175811872e-05, 'epoch': 2.17}
{'loss': 0.3249, 'learning_rate': 1.7525195968645017e-05, 'epoch': 2.18}
{'loss': 0.1966, 'learning_rate': 1.7245240761478166e-05, 'epoch': 2.19}
{'loss': 0.3563, 'learning_rate': 1.696528555431131e-05, 'epoch': 2.2}
{'loss': 0.3502, 'learning_rate': 1.6685330347144456e-05, 'epoch': 2.22}
{'loss': 0.2476, 'learning_rate': 1.6405375139977604e-05, 'epoch': 2.23}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4626925587654114, 'eval_accuracy': 0.8417596848325674, 'eval_runtime': 2.5557, 'eval_samples_per_second': 595.923, 'eval_steps_per_second': 74.735, 'epoch': 2.23}
{'loss': 0.2861, 'learning_rate': 1.612541993281075e-05, 'epoch': 2.24}
{'loss': 0.2364, 'learning_rate': 1.5845464725643898e-05, 'epoch': 2.26}
{'loss': 0.1871, 'learning_rate': 1.5565509518477046e-05, 'epoch': 2.27}
{'loss': 0.2141, 'learning_rate': 1.528555431131019e-05, 'epoch': 2.28}
{'loss': 0.2472, 'learning_rate': 1.500559910414334e-05, 'epoch': 2.3}
{'loss': 0.3617, 'learning_rate': 1.4725643896976484e-05, 'epoch': 2.31}
{'loss': 0.5243, 'learning_rate': 1.4445688689809631e-05, 'epoch': 2.32}
{'loss': 0.2934, 'learning_rate': 1.416573348264278e-05, 'epoch': 2.34}
{'loss': 0.312, 'learning_rate': 1.3885778275475924e-05, 'epoch': 2.35}
{'loss': 0.3107, 'learning_rate': 1.3605823068309071e-05, 'epoch': 2.36}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4533139169216156, 'eval_accuracy': 0.8345370978332239, 'eval_runtime': 2.5682, 'eval_samples_per_second': 593.032, 'eval_steps_per_second': 74.372, 'epoch': 2.36}
{'loss': 0.2472, 'learning_rate': 1.332586786114222e-05, 'epoch': 2.38}
{'loss': 0.3635, 'learning_rate': 1.3045912653975364e-05, 'epoch': 2.39}
{'loss': 0.2863, 'learning_rate': 1.2765957446808511e-05, 'epoch': 2.4}
{'loss': 0.3207, 'learning_rate': 1.2486002239641658e-05, 'epoch': 2.41}
{'loss': 0.226, 'learning_rate': 1.2206047032474804e-05, 'epoch': 2.43}
{'loss': 0.2908, 'learning_rate': 1.1926091825307951e-05, 'epoch': 2.44}
{'loss': 0.3429, 'learning_rate': 1.16461366181411e-05, 'epoch': 2.45}
{'loss': 0.3001, 'learning_rate': 1.1366181410974244e-05, 'epoch': 2.47}
{'loss': 0.5673, 'learning_rate': 1.1086226203807391e-05, 'epoch': 2.48}
{'loss': 0.2406, 'learning_rate': 1.0806270996640538e-05, 'epoch': 2.49}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4416338801383972, 'eval_accuracy': 0.8417596848325674, 'eval_runtime': 2.7432, 'eval_samples_per_second': 555.198, 'eval_steps_per_second': 69.628, 'epoch': 2.49}
{'loss': 0.3175, 'learning_rate': 1.0526315789473684e-05, 'epoch': 2.51}
{'loss': 0.2955, 'learning_rate': 1.0246360582306831e-05, 'epoch': 2.52}
{'loss': 0.3863, 'learning_rate': 9.966405375139978e-06, 'epoch': 2.53}
{'loss': 0.373, 'learning_rate': 9.686450167973126e-06, 'epoch': 2.55}
{'loss': 0.3892, 'learning_rate': 9.406494960806271e-06, 'epoch': 2.56}
{'loss': 0.2138, 'learning_rate': 9.126539753639418e-06, 'epoch': 2.57}
{'loss': 0.3576, 'learning_rate': 8.846584546472565e-06, 'epoch': 2.59}
{'loss': 0.3619, 'learning_rate': 8.566629339305711e-06, 'epoch': 2.6}
{'loss': 0.2299, 'learning_rate': 8.286674132138858e-06, 'epoch': 2.61}
{'loss': 0.324, 'learning_rate': 8.006718924972005e-06, 'epoch': 2.62}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4463840425014496, 'eval_accuracy': 0.8384766907419566, 'eval_runtime': 2.5461, 'eval_samples_per_second': 598.176, 'eval_steps_per_second': 75.017, 'epoch': 2.62}
{'loss': 0.2128, 'learning_rate': 7.726763717805153e-06, 'epoch': 2.64}
{'loss': 0.3482, 'learning_rate': 7.446808510638298e-06, 'epoch': 2.65}
{'loss': 0.2295, 'learning_rate': 7.166853303471445e-06, 'epoch': 2.66}
{'loss': 0.3579, 'learning_rate': 6.886898096304592e-06, 'epoch': 2.68}
{'loss': 0.2569, 'learning_rate': 6.606942889137738e-06, 'epoch': 2.69}
{'loss': 0.3353, 'learning_rate': 6.326987681970885e-06, 'epoch': 2.7}
{'loss': 0.4472, 'learning_rate': 6.047032474804032e-06, 'epoch': 2.72}
{'loss': 0.2058, 'learning_rate': 5.767077267637178e-06, 'epoch': 2.73}
{'loss': 0.4826, 'learning_rate': 5.487122060470325e-06, 'epoch': 2.74}
{'loss': 0.2569, 'learning_rate': 5.207166853303471e-06, 'epoch': 2.76}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4711506962776184, 'eval_accuracy': 0.8365068942875903, 'eval_runtime': 2.6395, 'eval_samples_per_second': 577.003, 'eval_steps_per_second': 72.362, 'epoch': 2.76}
{'loss': 0.2897, 'learning_rate': 4.927211646136618e-06, 'epoch': 2.77}
{'loss': 0.2613, 'learning_rate': 4.647256438969766e-06, 'epoch': 2.78}
{'loss': 0.1884, 'learning_rate': 4.3673012318029114e-06, 'epoch': 2.8}
{'loss': 0.3324, 'learning_rate': 4.087346024636059e-06, 'epoch': 2.81}
{'loss': 0.3748, 'learning_rate': 3.8073908174692052e-06, 'epoch': 2.82}
{'loss': 0.4369, 'learning_rate': 3.5274356103023515e-06, 'epoch': 2.83}
{'loss': 0.3165, 'learning_rate': 3.2474804031354986e-06, 'epoch': 2.85}
{'loss': 0.3057, 'learning_rate': 2.9675251959686453e-06, 'epoch': 2.86}
{'loss': 0.2918, 'learning_rate': 2.6875699888017915e-06, 'epoch': 2.87}
{'loss': 0.2466, 'learning_rate': 2.4076147816349386e-06, 'epoch': 2.89}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4593885838985443, 'eval_accuracy': 0.8443860801050558, 'eval_runtime': 2.5544, 'eval_samples_per_second': 596.236, 'eval_steps_per_second': 74.774, 'epoch': 2.89}
{'loss': 0.442, 'learning_rate': 2.1276595744680853e-06, 'epoch': 2.9}
{'loss': 0.3417, 'learning_rate': 1.847704367301232e-06, 'epoch': 2.91}
{'loss': 0.3646, 'learning_rate': 1.5677491601343787e-06, 'epoch': 2.93}
{'loss': 0.4191, 'learning_rate': 1.2877939529675251e-06, 'epoch': 2.94}
{'loss': 0.3305, 'learning_rate': 1.0078387458006718e-06, 'epoch': 2.95}
{'loss': 0.2295, 'learning_rate': 7.278835386338187e-07, 'epoch': 2.97}
{'loss': 0.2297, 'learning_rate': 4.479283314669653e-07, 'epoch': 2.98}
{'loss': 0.369, 'learning_rate': 1.6797312430011197e-07, 'epoch': 2.99}
{'train_runtime': 155.5912, 'train_samples_per_second': 117.423, 'train_steps_per_second': 14.692, 'train_loss': 0.4024325771907496, 'epoch': 3.0}


TrainOutput(global_step=2286, training_loss=0.4024325771907496, metrics={'train_runtime': 155.5912, 'train_samples_per_second': 117.423, 'train_steps_per_second': 14.692, 'train_loss': 0.4024325771907496, 'epoch': 3.0})

In [10]:
df_test=pd.read_csv('test.csv')
df_test['combined'] = 'KEYWORD: ' + df_test['keyword'].astype(str) + ' | LOCATION: ' + df_test['location'].astype(str) + ' | TEXT: ' + df_test['text'].astype(str)
df_test = df_test.drop(columns=['id', 'keyword', 'location', 'text'])
df_test = df_test.rename(columns={'target': 'label'})


test_data=Dataset.from_pandas(df_test)

test_ds=test_data.map(lambda e: tokenizer(e['combined'], padding='max_length', truncation=True, max_length=100), batched=True)

model.eval()
preds=[]

preds=trainer.predict(test_ds)
preds=preds.predictions.argmax(-1)
df_submission_test_data=pd.read_csv('test.csv')
df_submission_test_data["target"]=preds
df_submission_test_data[["id", "target"]].to_csv("submission.csv", index=False)


Map:   0%|          | 0/3263 [00:00<?, ? examples/s]

  0%|          | 0/408 [00:00<?, ?it/s]

In [None]:
#83,5