In [1]:
from datasets import load_dataset

review_dataset = load_dataset("csv", data_files="..\datasets\Hindi Product Review.csv", split="train")

Found cached dataset csv (C:/Users/arifa/.cache/huggingface/datasets/csv/default-67df0aacd0686b67/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


In [2]:
review_dataset

Dataset({
    features: ['Text', 'Label'],
    num_rows: 3296
})

In [3]:
review_dataset.set_format("pandas")

In [4]:
# get label counts for both classes
label_counts = review_dataset["Label"].value_counts()
num_labels = (len(label_counts.keys()))

In [5]:
label_counts

Label
neutral     1424
positive    1413
negative     457
Label          2
Name: count, dtype: int64

In [6]:
max_token_length = max(review_dataset['Text'].str.len())
max_token_length

114

In [7]:
review_dataset.reset_format()

In [8]:
from transformers import set_seed

set_seed(30)

In [9]:
from transformers import BertForSequenceClassification, BertConfig, CharacterBertModel, CharacterBertTokenizer

#### LOADING BERT FOR CLASSIFICATION ####

config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)  # binary classification
model = BertForSequenceClassification(config=config)

In [10]:
#### REPLACING BERT WITH CHARACTER_BERT ####

character_bert_model = CharacterBertModel.from_pretrained(
    "E:\Documents\Character Bert\Hate Speech\character-bert-hindi")
model.bert = character_bert_model

Some weights of the model checkpoint at E:\Documents\Character Bert\Hate Speech\character-bert-hindi were not used when initializing CharacterBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing CharacterBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CharacterBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
tokenizer = CharacterBertTokenizer(strip_accents=None, do_lower_case=None)

In [12]:
def tokenize_function(example):
    return tokenizer(example["Text"], truncation=True)

In [13]:
from transformers import DataCollatorWithPadding

tokenized_dataset = review_dataset.map(tokenize_function, batched=True, remove_columns=["Text"])
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Loading cached processed dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-eee8e2d03c0c4a6d.arrow


In [14]:
tokenized_dataset

Dataset({
    features: ['Label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 3296
})

In [15]:
tokenized_dataset = tokenized_dataset.filter(lambda x:x if x["Label"] != 'Label' else None)
tokenized_dataset = tokenized_dataset.filter(lambda x:x if (x["Label"] != 'Label' and x["Label"] != 'neutral') else None)

Loading cached processed dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-942158a257179d18.arrow
Loading cached processed dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-ded00301aa5bbdc4.arrow


In [16]:
tokenized_dataset

Dataset({
    features: ['Label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 1870
})

In [17]:
def assign_label(example):
    #mapping = {"neutral":0, "positive":1, "negative":2}
    mapping = {"positive":0, "negative":1}
    example['Label'] = mapping[example['Label']]
    return example

In [18]:
tokenized_dataset = tokenized_dataset.map(assign_label)
tokenized_dataset = tokenized_dataset.rename_column("Label", "labels")
tokenized_dataset.set_format("torch")
tokenized_dataset.column_names

Loading cached processed dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-441ba4da58a9432c.arrow


['labels', 'input_ids', 'token_type_ids', 'attention_mask']

In [19]:
samples = [tokenized_dataset[i] for i in range(20)]
samples

for chunk in data_collator(samples)["input_ids"]:
    print(f"\n'>>> {tokenizer.decode(chunk)}'")


'>>> [CLS] गेम कई बार मुश्किल मालूम देता है । [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

'>>> [CLS] मल्टीप्लेयर में छोटी स्टोरीलाइन है । [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

'>>> [CLS] दिखने में बड़ा, पर वजन में हल्का और हाथ से आसानी से फिसलता नहीं । [SEP] [PAD] [PAD] [PAD]'

'>>> [CLS] देवघर शांति और भाईचारे का प्रतीक है । [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

'>>> [CLS] सैमसंग के टचविज़ यूजर इंटरफ़ेस में कस्टमाइजेशन के लिए पर्याप्त स्कोप है । [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

'>>> [CLS] कुलमिलाकर कहा जा सकता है कि एपल वॉच को इस्तेमाल करने में खुशी होती है । [SEP] [PAD] [PAD] [PAD]'

'>>> [CLS] किकस्टैंड से इसकी उपयोगिता और भी ज्यादा हो जाती है । [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

'>>> [CLS] फुजीफिल्म मिनी 50एस सेल्फी टाइमर के साथ आया है । [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

'>>> [CLS] पॉपुला सिंगर अंकित तिवारी भी न

In [20]:
review_dataset[:20]

{'Text': ['गेम कई बार मुश्किल मालूम देता है ।',
  'मल्टीप्लेयर में छोटी स्टोरीलाइन है ।',
  'फेसबुक का सिक्योरिटी चैकअप फीचर पॉपअप की तरह यूजर्स को दिखाइ देगा ।',
  'दिखने में बड़ा , पर वजन में हल्का और हाथ से आसानी से फिसलता नहीं ।',
  'ये दोनों टैबलेट एंड्रॉइड के 4.2.2 वर्जन ऑपरेटिंग सिस्टम पर काम करेंगे ।',
  'इस टेबलेट में सामने की तरफ नीचे दो स्पीकर्स हैं ।',
  'देवघर शांति और भाईचारे का प्रतीक है ।',
  'सैमसंग के टचविज़ यूजर इंटरफ़ेस में कस्टमाइजेशन के लिए पर्याप्त स्कोप है ।',
  'कुलमिलाकर कहा जा सकता है कि एपल वॉच को इस्तेमाल करने में खुशी होती है ।',
  'किकस्टैंड से इसकी उपयोगिता और भी ज्यादा हो जाती है ।',
  'Micromax Canvas Juice2 में 5 इंच की एचडी डिस्पले दी गई है ।',
  'लेकिन परफैक्ट नहीं है ।',
  'फुजीफिल्म मिनी 50एस सेल्फी टाइमर के साथ आया है ।',
  'योमकेश बताता है कि वह भागलपुर - मुंगेर से आया है ।',
  'पॉपुला सिंगर अंकित तिवारी भी निराश करते हैं ।',
  'छोटी भूमिका में सिद्धांत कपूर ध्यान खींचते हैं ।',
  'यह देख लगता है कि फिल्ममेकर ने क्या सोचकर फिल्म बनाई ।',
  'ऐसा लग

In [21]:
# downsampled_dataset = tokenized_dataset.train_test_split(
#     train_size=0.8, seed=42
# )
# downsampled_dataset

stratify_column_name = "labels"

# create class label column and stratify
downsampled_dataset = tokenized_dataset.class_encode_column(
    stratify_column_name
).train_test_split(
    test_size=0.2, 
    seed = 42,
    #stratify_by_column=stratify_column_name
)

Loading cached processed dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-cba559c0bf14c731.arrow
Loading cached processed dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-5c238bcfb4c59386.arrow
Loading cached split indices for dataset at C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-fa645cad05ac9250.arrow and C:\Users\arifa\.cache\huggingface\datasets\csv\default-67df0aacd0686b67\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-7e2a72cc405e9b06.arrow


In [22]:
downsampled_dataset

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1496
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 374
    })
})

In [23]:
from torch.utils.data import DataLoader
batch_size = 32
# batch_size = 4

train_dataloader = DataLoader(
    downsampled_dataset["train"], shuffle=True, batch_size=batch_size, collate_fn=data_collator
)
eval_dataloader = DataLoader(
    downsampled_dataset["test"], batch_size=batch_size, collate_fn=data_collator
)

In [24]:
for batch in train_dataloader:
    break
{k: v.shape for k, v in batch.items()}

{'labels': torch.Size([32]),
 'input_ids': torch.Size([32, 21, 50]),
 'token_type_ids': torch.Size([32, 21]),
 'attention_mask': torch.Size([32, 21])}

In [25]:
import torch
with torch.no_grad():
    outputs = model(**batch)
    print(outputs.loss, outputs.logits.shape)

tensor(0.7312) torch.Size([32, 2])


In [26]:
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=4e-5)
# optimizer = AdamW(model.parameters(), lr=3e-5)

In [27]:
from transformers import get_scheduler

num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)
print(num_training_steps)

141


In [28]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
device

device(type='cuda')

In [29]:
import numpy as np
import evaluate

metric_fun = evaluate.load("f1")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    metric_result = metric_fun.compute(predictions=predictions, references=labels, average='macro')
    return {
        "f1": metric_result["f1"],
    }

In [30]:
#  disable weights and biases logging
import os
os.environ["WANDB_DISABLED"] = "true"

In [31]:
from transformers import TrainingArguments

batch_size = 32
# batch_size = 16
# Show the training loss with every epoch
logging_steps = len(downsampled_dataset["train"]) // batch_size


training_args = TrainingArguments(
    report_to = None,
    output_dir="models/bert-unigram-bengali-classifier",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    save_strategy="no",
    #learning_rate=3e-5,
    learning_rate=4e-5,
    weight_decay=0.01,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=8,
    #num_train_epochs=7,
    #push_to_hub=True,
    fp16=True,
    logging_steps=logging_steps,
)

Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [32]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=downsampled_dataset["train"],
    eval_dataset=downsampled_dataset["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Using amp half precision backend


In [33]:
trainer.train()

***** Running training *****
  Num examples = 1496
  Num Epochs = 8
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 376


Epoch,Training Loss,Validation Loss,F1
1,0.424,0.288279,0.834759
2,0.3334,0.550389,0.674577
3,0.2049,0.270864,0.816869
4,0.1176,0.304385,0.829833
5,0.0512,0.340693,0.870568
6,0.0133,0.455256,0.871388
7,0.0012,0.477239,0.869168
8,0.0005,0.472146,0.875047


***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=376, training_loss=0.1402231955744742, metrics={'train_runtime': 51.0304, 'train_samples_per_second': 234.527, 'train_steps_per_second': 7.368, 'total_flos': 7983611613278400.0, 'train_loss': 0.1402231955744742, 'epoch': 8.0})

In [34]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


{'eval_loss': 0.4721461534500122,
 'eval_f1': 0.8750471510017566,
 'eval_runtime': 0.719,
 'eval_samples_per_second': 520.164,
 'eval_steps_per_second': 16.69,
 'epoch': 8.0}

In [46]:
batch_f1 = []

for obj in trainer.state.log_history:
    if 'eval_f1' in obj.keys():
        batch_f1.append(obj['eval_f1'])

In [47]:
max(batch_f1)

0.8750471510017566

In [90]:
# trainer.save_model()

In [91]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
device

device(type='cuda')

In [54]:
scores = list()
import numpy as np
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold

from datasets import load_dataset, DatasetDict

# First make the kfold object
folds = StratifiedKFold(n_splits=5)

tokenized_dataset = tokenized_dataset.shuffle(seed=42)

# Now make our splits based off of the labels. 
# We can use `np.zeros()` here since it only works off of indices, we really care about the labels
splits = folds.split(np.zeros(tokenized_dataset.num_rows), tokenized_dataset["labels"])

# In this case I'm overriding the train/val/test
for train_idxs, val_idxs in splits:
    fold_dataset = DatasetDict({
    "train":tokenized_dataset.select(train_idxs),
    "validation":tokenized_dataset.select(val_idxs),
    })

    #### LOADING BERT FOR CLASSIFICATION ####

    config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)  # binary classification
    model = BertForSequenceClassification(config=config)
    
    #### REPLACING BERT WITH CHARACTER_BERT ####
    character_bert_model = CharacterBertModel.from_pretrained(
        "E:\Documents\Character Bert\Hate Speech\character-bert-hindi")
    model.bert = character_bert_model
    model.to(device)

    batch_size = 32
    # Show the training loss with every epoch
    logging_steps = len(fold_dataset["train"]) // batch_size

    training_args = TrainingArguments(
        report_to = None,
        output_dir="models/bert-unigram-bengali-classifier",
        overwrite_output_dir=True,
        evaluation_strategy="epoch",
        save_strategy="no",
        learning_rate=3e-5,
        weight_decay=0.01,
        warmup_ratio=0.1,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=6,
        fp16=True,
        logging_steps=logging_steps,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=fold_dataset["train"],
        eval_dataset=fold_dataset["validation"],
        data_collator=data_collator,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    
    fold_f1 = []

    for obj in trainer.state.log_history:
        if 'eval_f1' in obj.keys():
            fold_f1.append(obj['eval_f1'])
            
    scores.append(max(fold_f1))
    print(max(fold_f1))

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\arifa/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.16.0.dev0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading configuration file E:\Documents\Character Bert\Hate Speech\character-bert-hindi\co

Epoch,Training Loss,Validation Loss,F1
1,0.4599,0.311272,0.809783
2,0.2572,0.268307,0.827415
3,0.1316,0.340605,0.840986
4,0.0659,0.398271,0.844014
5,0.0239,0.445582,0.865162
6,0.0069,0.457299,0.869022


***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)




0.8690215899518225


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\arifa/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.16.0.dev0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading configuration file E:\Documents\Character Bert\Hate Speech\character-bert-hindi\co

Epoch,Training Loss,Validation Loss,F1
1,0.4848,0.31525,0.793282
2,0.245,0.318047,0.816896
3,0.1699,0.341229,0.852777
4,0.0834,0.365985,0.870482
5,0.0323,0.463377,0.867332
6,0.0066,0.489336,0.845154


***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)




0.8704819575133328


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\arifa/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.16.0.dev0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading configuration file E:\Documents\Character Bert\Hate Speech\character-bert-hindi\co

Epoch,Training Loss,Validation Loss,F1
1,0.4956,0.291182,0.851716
2,0.2447,0.239004,0.876959
3,0.1437,0.283424,0.876102
4,0.0617,0.322534,0.864099
5,0.0264,0.370907,0.858581
6,0.0043,0.379426,0.869459


***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)




0.8769586509629936


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\arifa/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.16.0.dev0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading configuration file E:\Documents\Character Bert\Hate Speech\character-bert-hindi\co

Epoch,Training Loss,Validation Loss,F1
1,0.5152,0.313837,0.804316
2,0.2711,0.222222,0.887031
3,0.1541,0.213662,0.89909
4,0.0654,0.259062,0.881514
5,0.0149,0.267344,0.90891
6,0.0032,0.270313,0.91023


***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)




0.9102299457539245


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\arifa/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.16.0.dev0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading configuration file E:\Documents\Character Bert\Hate Speech\character-bert-hindi\co

Epoch,Training Loss,Validation Loss,F1
1,0.4841,0.347914,0.801715
2,0.251,0.312556,0.834773
3,0.1422,0.360698,0.825059
4,0.0891,0.597304,0.76474
5,0.0363,0.515664,0.829108
6,0.0044,0.549329,0.814264


***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32
***** Running Evaluation *****
  Num examples = 374
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)




0.8347730455788771


In [55]:
scores

[0.8690215899518225,
 0.8704819575133328,
 0.8769586509629936,
 0.9102299457539245,
 0.8347730455788771]

In [56]:
sum(scores)/len(scores)

0.87229303795219