In [25]:
import torch
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

kaggle=False
model_path="microsoft/deberta-base"

In [26]:
path='/kaggle/input/clickbait-dataset/clickbait_data.csv' if kaggle else 'train.csv'

df=pd.read_csv(path)
df

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1
...,...,...,...,...,...
7608,10869,,,Two giant cranes holding a bridge collapse int...,1
7609,10870,,,@aria_ahrary @TheTawniest The out of control w...,1
7610,10871,,,M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...,1
7611,10872,,,Police investigating after an e-bike collided ...,1


In [27]:
for i in tqdm(range(len(df))):
    combined=""
    if str(df.loc[i, 'keyword']) != 'nan':
        combined += 'KEYWORD: ' + str(df.loc[i, 'keyword']).replace('%20', ' ') + ' | '
    if str(df.loc[i, 'location']) != 'nan':
        combined += 'LOCATION: ' + str(df.loc[i, 'location']) + ' | '
    combined += 'TEXT: ' + df.loc[i, 'text']
    df.loc[i, 'combined'] = combined

# Drop all but combined and target
df = df.drop(columns=['id', 'keyword', 'location', 'text'])

#rename target to label
df = df.rename(columns={'target': 'label'})
df.sample(30)


  0%|          | 0/7613 [00:00<?, ?it/s]

Unnamed: 0,label,combined
2644,1,KEYWORD: destruction | TEXT: So you have a new...
2227,0,KEYWORD: deluge | TEXT: The f$&amp;@ing things...
5448,1,KEYWORD: police | LOCATION: UK | TEXT: DT @geo...
132,0,KEYWORD: aftershock | TEXT: Aftershock back to...
6845,0,"KEYWORD: trauma | LOCATION: Montgomery County,..."
5559,0,KEYWORD: rainstorm | TEXT: @Calum5SOS you look...
1765,1,KEYWORD: collision | TEXT: my favorite lady ca...
1817,1,KEYWORD: crashed | TEXT: @brianroemmele UX fai...
6810,0,"KEYWORD: tragedy | LOCATION: Los Angeles, CA |..."
4398,1,"KEYWORD: hijacking | LOCATION: Athens,Greece |..."


In [28]:
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

In [29]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(model_path)
model=AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'pooler.dense.weight', 'pooler.dense.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [30]:
from datasets import Dataset

train_data=Dataset.from_pandas(train_df)
valid_data=Dataset.from_pandas(valid_df)

train_ds=train_data.map(lambda e: tokenizer(e['combined'], padding='max_length', truncation=True, max_length=100), batched=True)
valid_ds=valid_data.map(lambda e: tokenizer(e['combined'], padding='max_length', truncation=True, max_length=100), batched=True)

Map:   0%|          | 0/6090 [00:00<?, ? examples/s]

Map:   0%|          | 0/1523 [00:00<?, ? examples/s]

In [31]:
train_ds[99]

{'label': 0,
 'combined': 'KEYWORD: explode | TEXT: Learn How I Gained Access To The Secrets Of The Top Earners &amp; Used Them To Explode My Home Business Here: http://t.co/SGXP1U5OL1 Please #RT',
 '__index_level_0__': 3398,
 'input_ids': [1,
  33754,
  771,
  11200,
  35,
  25931,
  1721,
  36367,
  35,
  10900,
  1336,
  38,
  48564,
  8076,
  598,
  20,
  34702,
  1525,
  20,
  3107,
  7535,
  268,
  359,
  3914,
  131,
  30374,
  28595,
  598,
  16161,
  4636,
  1308,
  2193,
  2090,
  1398,
  35,
  2054,
  640,
  90,
  4,
  876,
  73,
  33020,
  29269,
  134,
  791,
  245,
  3384,
  134,
  3401,
  849,
  13963,
  2,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
 

In [32]:
from transformers import Trainer, TrainingArguments



def accuracy(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=100,
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    save_total_limit=3,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=valid_ds,
    tokenizer=tokenizer,
    compute_metrics=accuracy
)

In [33]:
trainer.train()



  0%|          | 0/2286 [00:00<?, ?it/s]

You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.6873, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.01}
{'loss': 0.694, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.03}
{'loss': 0.6936, 'learning_rate': 3e-06, 'epoch': 0.04}
{'loss': 0.6895, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.05}
{'loss': 0.6921, 'learning_rate': 5e-06, 'epoch': 0.07}
{'loss': 0.6833, 'learning_rate': 6e-06, 'epoch': 0.08}
{'loss': 0.6646, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.09}
{'loss': 0.638, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.1}
{'loss': 0.5671, 'learning_rate': 9e-06, 'epoch': 0.12}
{'loss': 0.5279, 'learning_rate': 1e-05, 'epoch': 0.13}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4602625370025635, 'eval_accuracy': 0.8003939592908733, 'eval_runtime': 2.5089, 'eval_samples_per_second': 607.049, 'eval_steps_per_second': 76.13, 'epoch': 0.13}
{'loss': 0.4104, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.14}
{'loss': 0.767, 'learning_rate': 1.2e-05, 'epoch': 0.16}
{'loss': 0.4634, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.17}
{'loss': 0.4763, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.18}
{'loss': 0.5841, 'learning_rate': 1.5e-05, 'epoch': 0.2}
{'loss': 0.521, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.21}
{'loss': 0.4193, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.22}
{'loss': 0.5349, 'learning_rate': 1.8e-05, 'epoch': 0.24}
{'loss': 0.4247, 'learning_rate': 1.9e-05, 'epoch': 0.25}
{'loss': 0.5027, 'learning_rate': 2e-05, 'epoch': 0.26}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.575931966304779, 'eval_accuracy': 0.7918581746552856, 'eval_runtime': 2.5059, 'eval_samples_per_second': 607.758, 'eval_steps_per_second': 76.219, 'epoch': 0.26}
{'loss': 0.391, 'learning_rate': 2.1e-05, 'epoch': 0.28}
{'loss': 0.5973, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.29}
{'loss': 0.455, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.3}
{'loss': 0.4524, 'learning_rate': 2.4e-05, 'epoch': 0.31}
{'loss': 0.4177, 'learning_rate': 2.5e-05, 'epoch': 0.33}
{'loss': 0.484, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.34}
{'loss': 0.5016, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.35}
{'loss': 0.5183, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.37}
{'loss': 0.5671, 'learning_rate': 2.9e-05, 'epoch': 0.38}
{'loss': 0.3881, 'learning_rate': 3e-05, 'epoch': 0.39}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4476679563522339, 'eval_accuracy': 0.8292843072882469, 'eval_runtime': 2.4882, 'eval_samples_per_second': 612.088, 'eval_steps_per_second': 76.762, 'epoch': 0.39}
{'loss': 0.4957, 'learning_rate': 3.1e-05, 'epoch': 0.41}
{'loss': 0.4384, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.42}
{'loss': 0.6044, 'learning_rate': 3.3e-05, 'epoch': 0.43}
{'loss': 0.424, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.45}
{'loss': 0.43, 'learning_rate': 3.5e-05, 'epoch': 0.46}
{'loss': 0.5227, 'learning_rate': 3.6e-05, 'epoch': 0.47}
{'loss': 0.6213, 'learning_rate': 3.7e-05, 'epoch': 0.49}
{'loss': 0.4786, 'learning_rate': 3.8e-05, 'epoch': 0.5}
{'loss': 0.3107, 'learning_rate': 3.9000000000000006e-05, 'epoch': 0.51}
{'loss': 0.7579, 'learning_rate': 4e-05, 'epoch': 0.52}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4627667963504791, 'eval_accuracy': 0.8115561391989494, 'eval_runtime': 2.4909, 'eval_samples_per_second': 611.428, 'eval_steps_per_second': 76.679, 'epoch': 0.52}
{'loss': 0.4655, 'learning_rate': 4.1e-05, 'epoch': 0.54}
{'loss': 0.3937, 'learning_rate': 4.2e-05, 'epoch': 0.55}
{'loss': 0.44, 'learning_rate': 4.3e-05, 'epoch': 0.56}
{'loss': 0.4896, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.58}
{'loss': 0.5156, 'learning_rate': 4.5e-05, 'epoch': 0.59}
{'loss': 0.2882, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.6}
{'loss': 0.4719, 'learning_rate': 4.7e-05, 'epoch': 0.62}
{'loss': 0.5544, 'learning_rate': 4.8e-05, 'epoch': 0.63}
{'loss': 0.567, 'learning_rate': 4.9e-05, 'epoch': 0.64}
{'loss': 0.6247, 'learning_rate': 5e-05, 'epoch': 0.66}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5426943302154541, 'eval_accuracy': 0.8174655285620486, 'eval_runtime': 2.4885, 'eval_samples_per_second': 612.005, 'eval_steps_per_second': 76.752, 'epoch': 0.66}
{'loss': 0.6646, 'learning_rate': 4.972004479283315e-05, 'epoch': 0.67}
{'loss': 0.7138, 'learning_rate': 4.944008958566629e-05, 'epoch': 0.68}
{'loss': 0.7118, 'learning_rate': 4.916013437849945e-05, 'epoch': 0.7}
{'loss': 0.5449, 'learning_rate': 4.888017917133259e-05, 'epoch': 0.71}
{'loss': 0.5149, 'learning_rate': 4.860022396416574e-05, 'epoch': 0.72}
{'loss': 0.5394, 'learning_rate': 4.832026875699888e-05, 'epoch': 0.73}
{'loss': 0.5327, 'learning_rate': 4.804031354983203e-05, 'epoch': 0.75}
{'loss': 0.5339, 'learning_rate': 4.7760358342665176e-05, 'epoch': 0.76}
{'loss': 0.6724, 'learning_rate': 4.7480403135498324e-05, 'epoch': 0.77}
{'loss': 0.5715, 'learning_rate': 4.720044792833147e-05, 'epoch': 0.79}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5172219276428223, 'eval_accuracy': 0.7419566644780039, 'eval_runtime': 2.5021, 'eval_samples_per_second': 608.678, 'eval_steps_per_second': 76.335, 'epoch': 0.79}
{'loss': 0.5672, 'learning_rate': 4.6920492721164614e-05, 'epoch': 0.8}
{'loss': 0.5076, 'learning_rate': 4.664053751399776e-05, 'epoch': 0.81}
{'loss': 0.5317, 'learning_rate': 4.6360582306830904e-05, 'epoch': 0.83}
{'loss': 0.5736, 'learning_rate': 4.608062709966405e-05, 'epoch': 0.84}
{'loss': 0.6675, 'learning_rate': 4.580067189249721e-05, 'epoch': 0.85}
{'loss': 0.5369, 'learning_rate': 4.552071668533035e-05, 'epoch': 0.87}
{'loss': 0.5085, 'learning_rate': 4.52407614781635e-05, 'epoch': 0.88}
{'loss': 0.5845, 'learning_rate': 4.496080627099664e-05, 'epoch': 0.89}
{'loss': 0.4196, 'learning_rate': 4.468085106382979e-05, 'epoch': 0.91}
{'loss': 0.5945, 'learning_rate': 4.4400895856662936e-05, 'epoch': 0.92}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5045973062515259, 'eval_accuracy': 0.7905449770190414, 'eval_runtime': 2.5096, 'eval_samples_per_second': 606.86, 'eval_steps_per_second': 76.107, 'epoch': 0.92}
{'loss': 0.4841, 'learning_rate': 4.4120940649496084e-05, 'epoch': 0.93}
{'loss': 0.5378, 'learning_rate': 4.384098544232923e-05, 'epoch': 0.94}
{'loss': 0.5918, 'learning_rate': 4.3561030235162374e-05, 'epoch': 0.96}
{'loss': 0.4601, 'learning_rate': 4.328107502799552e-05, 'epoch': 0.97}
{'loss': 0.3048, 'learning_rate': 4.3001119820828664e-05, 'epoch': 0.98}
{'loss': 0.5952, 'learning_rate': 4.272116461366182e-05, 'epoch': 1.0}
{'loss': 0.6211, 'learning_rate': 4.244120940649496e-05, 'epoch': 1.01}
{'loss': 0.6546, 'learning_rate': 4.216125419932811e-05, 'epoch': 1.02}
{'loss': 0.6272, 'learning_rate': 4.188129899216126e-05, 'epoch': 1.04}
{'loss': 0.6272, 'learning_rate': 4.16013437849944e-05, 'epoch': 1.05}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5293205976486206, 'eval_accuracy': 0.8108995403808273, 'eval_runtime': 2.5065, 'eval_samples_per_second': 607.63, 'eval_steps_per_second': 76.203, 'epoch': 1.05}
{'loss': 0.5107, 'learning_rate': 4.1321388577827555e-05, 'epoch': 1.06}
{'loss': 0.4419, 'learning_rate': 4.1041433370660696e-05, 'epoch': 1.08}
{'loss': 0.5209, 'learning_rate': 4.0761478163493845e-05, 'epoch': 1.09}
{'loss': 0.5502, 'learning_rate': 4.0481522956326986e-05, 'epoch': 1.1}
{'loss': 0.3597, 'learning_rate': 4.0201567749160135e-05, 'epoch': 1.12}
{'loss': 0.5157, 'learning_rate': 3.992161254199328e-05, 'epoch': 1.13}
{'loss': 0.4006, 'learning_rate': 3.964165733482643e-05, 'epoch': 1.14}
{'loss': 0.7935, 'learning_rate': 3.936170212765958e-05, 'epoch': 1.15}
{'loss': 0.4403, 'learning_rate': 3.908174692049272e-05, 'epoch': 1.17}
{'loss': 0.4501, 'learning_rate': 3.880179171332587e-05, 'epoch': 1.18}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4992622137069702, 'eval_accuracy': 0.8036769533814839, 'eval_runtime': 2.4694, 'eval_samples_per_second': 616.737, 'eval_steps_per_second': 77.345, 'epoch': 1.18}
{'loss': 0.4428, 'learning_rate': 3.852183650615902e-05, 'epoch': 1.19}
{'loss': 0.5685, 'learning_rate': 3.824188129899216e-05, 'epoch': 1.21}
{'loss': 0.4526, 'learning_rate': 3.7961926091825315e-05, 'epoch': 1.22}
{'loss': 0.4734, 'learning_rate': 3.7681970884658456e-05, 'epoch': 1.23}
{'loss': 0.5283, 'learning_rate': 3.7402015677491605e-05, 'epoch': 1.25}
{'loss': 0.5293, 'learning_rate': 3.7122060470324746e-05, 'epoch': 1.26}
{'loss': 0.4671, 'learning_rate': 3.6842105263157895e-05, 'epoch': 1.27}
{'loss': 0.5012, 'learning_rate': 3.656215005599104e-05, 'epoch': 1.29}
{'loss': 0.5348, 'learning_rate': 3.628219484882419e-05, 'epoch': 1.3}
{'loss': 0.5624, 'learning_rate': 3.600223964165734e-05, 'epoch': 1.31}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.5331447720527649, 'eval_accuracy': 0.7669074195666448, 'eval_runtime': 2.4861, 'eval_samples_per_second': 612.595, 'eval_steps_per_second': 76.826, 'epoch': 1.31}
{'loss': 0.5965, 'learning_rate': 3.572228443449048e-05, 'epoch': 1.33}
{'loss': 0.5916, 'learning_rate': 3.544232922732363e-05, 'epoch': 1.34}
{'loss': 0.4532, 'learning_rate': 3.516237402015677e-05, 'epoch': 1.35}
{'loss': 0.5503, 'learning_rate': 3.4882418812989927e-05, 'epoch': 1.36}
{'loss': 0.7325, 'learning_rate': 3.460246360582307e-05, 'epoch': 1.38}
{'loss': 0.4957, 'learning_rate': 3.4322508398656217e-05, 'epoch': 1.39}
{'loss': 0.4261, 'learning_rate': 3.4042553191489365e-05, 'epoch': 1.4}
{'loss': 0.6446, 'learning_rate': 3.3762597984322507e-05, 'epoch': 1.42}
{'loss': 0.5454, 'learning_rate': 3.348264277715566e-05, 'epoch': 1.43}
{'loss': 0.4414, 'learning_rate': 3.32026875699888e-05, 'epoch': 1.44}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.46069371700286865, 'eval_accuracy': 0.8200919238345371, 'eval_runtime': 2.4698, 'eval_samples_per_second': 616.653, 'eval_steps_per_second': 77.335, 'epoch': 1.44}
{'loss': 0.3599, 'learning_rate': 3.292273236282195e-05, 'epoch': 1.46}
{'loss': 0.4826, 'learning_rate': 3.264277715565509e-05, 'epoch': 1.47}
{'loss': 0.5929, 'learning_rate': 3.236282194848824e-05, 'epoch': 1.48}
{'loss': 0.4745, 'learning_rate': 3.208286674132139e-05, 'epoch': 1.5}
{'loss': 0.5821, 'learning_rate': 3.180291153415454e-05, 'epoch': 1.51}
{'loss': 0.4296, 'learning_rate': 3.152295632698769e-05, 'epoch': 1.52}
{'loss': 0.4041, 'learning_rate': 3.124300111982083e-05, 'epoch': 1.54}
{'loss': 0.4749, 'learning_rate': 3.096304591265398e-05, 'epoch': 1.55}
{'loss': 0.4484, 'learning_rate': 3.0683090705487125e-05, 'epoch': 1.56}
{'loss': 0.4927, 'learning_rate': 3.0403135498320267e-05, 'epoch': 1.57}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.46368300914764404, 'eval_accuracy': 0.8154957321076822, 'eval_runtime': 2.5345, 'eval_samples_per_second': 600.91, 'eval_steps_per_second': 75.36, 'epoch': 1.57}
{'loss': 0.4728, 'learning_rate': 3.012318029115342e-05, 'epoch': 1.59}
{'loss': 0.4878, 'learning_rate': 2.9843225083986563e-05, 'epoch': 1.6}
{'loss': 0.6011, 'learning_rate': 2.9563269876819712e-05, 'epoch': 1.61}
{'loss': 0.494, 'learning_rate': 2.9283314669652857e-05, 'epoch': 1.63}
{'loss': 0.5616, 'learning_rate': 2.9003359462486002e-05, 'epoch': 1.64}
{'loss': 0.374, 'learning_rate': 2.8723404255319154e-05, 'epoch': 1.65}
{'loss': 0.297, 'learning_rate': 2.84434490481523e-05, 'epoch': 1.67}
{'loss': 0.5574, 'learning_rate': 2.8163493840985444e-05, 'epoch': 1.68}
{'loss': 0.4191, 'learning_rate': 2.788353863381859e-05, 'epoch': 1.69}
{'loss': 0.4162, 'learning_rate': 2.7603583426651737e-05, 'epoch': 1.71}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4861367344856262, 'eval_accuracy': 0.8049901510177282, 'eval_runtime': 2.5101, 'eval_samples_per_second': 606.746, 'eval_steps_per_second': 76.092, 'epoch': 1.71}
{'loss': 0.4991, 'learning_rate': 2.7323628219484882e-05, 'epoch': 1.72}
{'loss': 0.3479, 'learning_rate': 2.7043673012318034e-05, 'epoch': 1.73}
{'loss': 0.5534, 'learning_rate': 2.676371780515118e-05, 'epoch': 1.75}
{'loss': 0.4781, 'learning_rate': 2.6483762597984324e-05, 'epoch': 1.76}
{'loss': 0.4593, 'learning_rate': 2.620380739081747e-05, 'epoch': 1.77}
{'loss': 0.5344, 'learning_rate': 2.5923852183650617e-05, 'epoch': 1.78}
{'loss': 0.4558, 'learning_rate': 2.5643896976483762e-05, 'epoch': 1.8}
{'loss': 0.4778, 'learning_rate': 2.5363941769316914e-05, 'epoch': 1.81}
{'loss': 0.3903, 'learning_rate': 2.508398656215006e-05, 'epoch': 1.82}
{'loss': 0.4687, 'learning_rate': 2.4804031354983204e-05, 'epoch': 1.84}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4752233326435089, 'eval_accuracy': 0.8319107025607354, 'eval_runtime': 2.5123, 'eval_samples_per_second': 606.221, 'eval_steps_per_second': 76.026, 'epoch': 1.84}
{'loss': 0.5354, 'learning_rate': 2.452407614781635e-05, 'epoch': 1.85}
{'loss': 0.3669, 'learning_rate': 2.4244120940649497e-05, 'epoch': 1.86}
{'loss': 0.3944, 'learning_rate': 2.3964165733482642e-05, 'epoch': 1.88}
{'loss': 0.4151, 'learning_rate': 2.368421052631579e-05, 'epoch': 1.89}
{'loss': 0.3, 'learning_rate': 2.340425531914894e-05, 'epoch': 1.9}
{'loss': 0.5225, 'learning_rate': 2.3124300111982084e-05, 'epoch': 1.92}
{'loss': 0.3276, 'learning_rate': 2.2844344904815232e-05, 'epoch': 1.93}
{'loss': 0.4129, 'learning_rate': 2.2564389697648377e-05, 'epoch': 1.94}
{'loss': 0.6279, 'learning_rate': 2.2284434490481522e-05, 'epoch': 1.96}
{'loss': 0.472, 'learning_rate': 2.200447928331467e-05, 'epoch': 1.97}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.46065449714660645, 'eval_accuracy': 0.8345370978332239, 'eval_runtime': 2.3819, 'eval_samples_per_second': 639.412, 'eval_steps_per_second': 80.189, 'epoch': 1.97}
{'loss': 0.3398, 'learning_rate': 2.172452407614782e-05, 'epoch': 1.98}
{'loss': 0.3418, 'learning_rate': 2.1444568868980964e-05, 'epoch': 1.99}
{'loss': 0.702, 'learning_rate': 2.1164613661814112e-05, 'epoch': 2.01}
{'loss': 0.3303, 'learning_rate': 2.0884658454647257e-05, 'epoch': 2.02}
{'loss': 0.3326, 'learning_rate': 2.0604703247480402e-05, 'epoch': 2.03}
{'loss': 0.6649, 'learning_rate': 2.032474804031355e-05, 'epoch': 2.05}
{'loss': 0.4539, 'learning_rate': 2.0044792833146696e-05, 'epoch': 2.06}
{'loss': 0.4323, 'learning_rate': 1.9764837625979844e-05, 'epoch': 2.07}
{'loss': 0.57, 'learning_rate': 1.9484882418812992e-05, 'epoch': 2.09}
{'loss': 0.4206, 'learning_rate': 1.9204927211646137e-05, 'epoch': 2.1}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.45058661699295044, 'eval_accuracy': 0.8305975049244911, 'eval_runtime': 2.4096, 'eval_samples_per_second': 632.064, 'eval_steps_per_second': 79.267, 'epoch': 2.1}
{'loss': 0.505, 'learning_rate': 1.8924972004479286e-05, 'epoch': 2.11}
{'loss': 0.4575, 'learning_rate': 1.864501679731243e-05, 'epoch': 2.13}
{'loss': 0.3137, 'learning_rate': 1.8365061590145576e-05, 'epoch': 2.14}
{'loss': 0.443, 'learning_rate': 1.8085106382978724e-05, 'epoch': 2.15}
{'loss': 0.4761, 'learning_rate': 1.7805151175811872e-05, 'epoch': 2.17}
{'loss': 0.4271, 'learning_rate': 1.7525195968645017e-05, 'epoch': 2.18}
{'loss': 0.4981, 'learning_rate': 1.7245240761478166e-05, 'epoch': 2.19}
{'loss': 0.5141, 'learning_rate': 1.696528555431131e-05, 'epoch': 2.2}
{'loss': 0.4324, 'learning_rate': 1.6685330347144456e-05, 'epoch': 2.22}
{'loss': 0.4326, 'learning_rate': 1.6405375139977604e-05, 'epoch': 2.23}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4625377655029297, 'eval_accuracy': 0.8063033486539725, 'eval_runtime': 2.4039, 'eval_samples_per_second': 633.562, 'eval_steps_per_second': 79.455, 'epoch': 2.23}
{'loss': 0.3405, 'learning_rate': 1.612541993281075e-05, 'epoch': 2.24}
{'loss': 0.3782, 'learning_rate': 1.5845464725643898e-05, 'epoch': 2.26}
{'loss': 0.3218, 'learning_rate': 1.5565509518477046e-05, 'epoch': 2.27}
{'loss': 0.3381, 'learning_rate': 1.528555431131019e-05, 'epoch': 2.28}
{'loss': 0.4586, 'learning_rate': 1.500559910414334e-05, 'epoch': 2.3}
{'loss': 0.5744, 'learning_rate': 1.4725643896976484e-05, 'epoch': 2.31}
{'loss': 0.6632, 'learning_rate': 1.4445688689809631e-05, 'epoch': 2.32}
{'loss': 0.28, 'learning_rate': 1.416573348264278e-05, 'epoch': 2.34}
{'loss': 0.3138, 'learning_rate': 1.3885778275475924e-05, 'epoch': 2.35}
{'loss': 0.4468, 'learning_rate': 1.3605823068309071e-05, 'epoch': 2.36}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.526504397392273, 'eval_accuracy': 0.8023637557452397, 'eval_runtime': 2.8491, 'eval_samples_per_second': 534.557, 'eval_steps_per_second': 67.039, 'epoch': 2.36}
{'loss': 0.5548, 'learning_rate': 1.332586786114222e-05, 'epoch': 2.38}
{'loss': 0.3703, 'learning_rate': 1.3045912653975364e-05, 'epoch': 2.39}
{'loss': 0.3697, 'learning_rate': 1.2765957446808511e-05, 'epoch': 2.4}
{'loss': 0.4193, 'learning_rate': 1.2486002239641658e-05, 'epoch': 2.41}
{'loss': 0.3878, 'learning_rate': 1.2206047032474804e-05, 'epoch': 2.43}
{'loss': 0.4554, 'learning_rate': 1.1926091825307951e-05, 'epoch': 2.44}
{'loss': 0.4352, 'learning_rate': 1.16461366181411e-05, 'epoch': 2.45}
{'loss': 0.3951, 'learning_rate': 1.1366181410974244e-05, 'epoch': 2.47}
{'loss': 0.5697, 'learning_rate': 1.1086226203807391e-05, 'epoch': 2.48}
{'loss': 0.3439, 'learning_rate': 1.0806270996640538e-05, 'epoch': 2.49}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.47232168912887573, 'eval_accuracy': 0.8220617202889035, 'eval_runtime': 2.8518, 'eval_samples_per_second': 534.052, 'eval_steps_per_second': 66.976, 'epoch': 2.49}
{'loss': 0.4694, 'learning_rate': 1.0526315789473684e-05, 'epoch': 2.51}
{'loss': 0.4249, 'learning_rate': 1.0246360582306831e-05, 'epoch': 2.52}
{'loss': 0.4523, 'learning_rate': 9.966405375139978e-06, 'epoch': 2.53}
{'loss': 0.4007, 'learning_rate': 9.686450167973126e-06, 'epoch': 2.55}
{'loss': 0.418, 'learning_rate': 9.406494960806271e-06, 'epoch': 2.56}
{'loss': 0.3259, 'learning_rate': 9.126539753639418e-06, 'epoch': 2.57}
{'loss': 0.4647, 'learning_rate': 8.846584546472565e-06, 'epoch': 2.59}
{'loss': 0.3039, 'learning_rate': 8.566629339305711e-06, 'epoch': 2.6}
{'loss': 0.3813, 'learning_rate': 8.286674132138858e-06, 'epoch': 2.61}
{'loss': 0.411, 'learning_rate': 8.006718924972005e-06, 'epoch': 2.62}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4852442443370819, 'eval_accuracy': 0.8200919238345371, 'eval_runtime': 2.7415, 'eval_samples_per_second': 555.529, 'eval_steps_per_second': 69.669, 'epoch': 2.62}
{'loss': 0.4487, 'learning_rate': 7.726763717805153e-06, 'epoch': 2.64}
{'loss': 0.4371, 'learning_rate': 7.446808510638298e-06, 'epoch': 2.65}
{'loss': 0.4288, 'learning_rate': 7.166853303471445e-06, 'epoch': 2.66}
{'loss': 0.4273, 'learning_rate': 6.886898096304592e-06, 'epoch': 2.68}
{'loss': 0.284, 'learning_rate': 6.606942889137738e-06, 'epoch': 2.69}
{'loss': 0.4494, 'learning_rate': 6.326987681970885e-06, 'epoch': 2.7}
{'loss': 0.6459, 'learning_rate': 6.047032474804032e-06, 'epoch': 2.72}
{'loss': 0.332, 'learning_rate': 5.767077267637178e-06, 'epoch': 2.73}
{'loss': 0.5336, 'learning_rate': 5.487122060470325e-06, 'epoch': 2.74}
{'loss': 0.2934, 'learning_rate': 5.207166853303471e-06, 'epoch': 2.76}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.4370654225349426, 'eval_accuracy': 0.835193696651346, 'eval_runtime': 2.5035, 'eval_samples_per_second': 608.353, 'eval_steps_per_second': 76.294, 'epoch': 2.76}
{'loss': 0.3705, 'learning_rate': 4.927211646136618e-06, 'epoch': 2.77}
{'loss': 0.3648, 'learning_rate': 4.647256438969766e-06, 'epoch': 2.78}
{'loss': 0.2407, 'learning_rate': 4.3673012318029114e-06, 'epoch': 2.8}
{'loss': 0.3872, 'learning_rate': 4.087346024636059e-06, 'epoch': 2.81}
{'loss': 0.3959, 'learning_rate': 3.8073908174692052e-06, 'epoch': 2.82}
{'loss': 0.428, 'learning_rate': 3.5274356103023515e-06, 'epoch': 2.83}
{'loss': 0.3743, 'learning_rate': 3.2474804031354986e-06, 'epoch': 2.85}
{'loss': 0.3938, 'learning_rate': 2.9675251959686453e-06, 'epoch': 2.86}
{'loss': 0.3283, 'learning_rate': 2.6875699888017915e-06, 'epoch': 2.87}
{'loss': 0.4209, 'learning_rate': 2.4076147816349386e-06, 'epoch': 2.89}


  0%|          | 0/191 [00:00<?, ?it/s]

{'eval_loss': 0.44819602370262146, 'eval_accuracy': 0.8286277084701248, 'eval_runtime': 2.4915, 'eval_samples_per_second': 611.286, 'eval_steps_per_second': 76.662, 'epoch': 2.89}
{'loss': 0.5159, 'learning_rate': 2.1276595744680853e-06, 'epoch': 2.9}
{'loss': 0.3155, 'learning_rate': 1.847704367301232e-06, 'epoch': 2.91}
{'loss': 0.5238, 'learning_rate': 1.5677491601343787e-06, 'epoch': 2.93}
{'loss': 0.5116, 'learning_rate': 1.2877939529675251e-06, 'epoch': 2.94}
{'loss': 0.347, 'learning_rate': 1.0078387458006718e-06, 'epoch': 2.95}
{'loss': 0.3027, 'learning_rate': 7.278835386338187e-07, 'epoch': 2.97}
{'loss': 0.2201, 'learning_rate': 4.479283314669653e-07, 'epoch': 2.98}
{'loss': 0.4723, 'learning_rate': 1.6797312430011197e-07, 'epoch': 2.99}
{'train_runtime': 183.9435, 'train_samples_per_second': 99.324, 'train_steps_per_second': 12.428, 'train_loss': 0.4819971496664633, 'epoch': 3.0}


TrainOutput(global_step=2286, training_loss=0.4819971496664633, metrics={'train_runtime': 183.9435, 'train_samples_per_second': 99.324, 'train_steps_per_second': 12.428, 'train_loss': 0.4819971496664633, 'epoch': 3.0})

In [39]:
df_test=pd.read_csv('test.csv')
df_test['combined'] = 'KEYWORD: ' + df_test['keyword'].astype(str) + ' | LOCATION: ' + df_test['location'].astype(str) + ' | TEXT: ' + df_test['text'].astype(str)
df_test = df_test.drop(columns=['id', 'keyword', 'location', 'text'])
df_test = df_test.rename(columns={'target': 'label'})


test_data=Dataset.from_pandas(df_test)

test_ds=test_data.map(lambda e: tokenizer(e['combined'], padding='max_length', truncation=True, max_length=100), batched=True)

model.eval()
preds=[]

preds=trainer.predict(test_ds)
preds=preds.predictions.argmax(-1)
df_submission_test_data=pd.read_csv('test.csv')
df_submission_test_data["target"]=preds
df_submission_test_data[["id", "target"]].to_csv("submission.csv", index=False)


Map:   0%|          | 0/3263 [00:00<?, ? examples/s]

  0%|          | 0/408 [00:00<?, ?it/s]

In [None]:
#81%