<a href="https://colab.research.google.com/github/altayavci/papers-with-code/blob/dev/bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [96]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
import os
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

In [76]:
pwd

'/content'

In [77]:
dataset = pd.read_csv("/content/data_tiny.csv")

In [78]:
texts, labels = dataset.text.values.tolist(), dataset.target.values.tolist()

In [79]:
labels[0]

'first hand'

In [80]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

In [81]:
labels[0]

0

In [82]:
tokenizer = BertTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")

In [83]:
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
labels = torch.tensor(labels)

In [84]:
inputs_train, inputs_val, labels_train, labels_val = train_test_split(inputs.input_ids, labels, test_size=0.33)

In [85]:
train_dataset = TensorDataset(inputs_train, labels_train)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [86]:
val_dataset = TensorDataset(inputs_val, labels_val)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [87]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [88]:
model = BertForSequenceClassification.from_pretrained("dbmdz/bert-base-turkish-cased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [89]:
model = model.to(device)

In [90]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_function = torch.nn.CrossEntropyLoss()

In [91]:
epochs = 3

In [98]:
valid_loss_min = np.Inf
train_loss, train_acc, val_loss, val_acc = [], [], [], []

for epoch in range(epochs):

    running_loss, correct, total = 0.0, 0, 0
    model.train()
    for batch_inputs, batch_labels in tqdm(train_loader,total=len(train_loader)):
        batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)
        outputs = model(batch_inputs, labels=batch_labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()
        preds = torch.argmax(outputs.logits, dim = 1)
        correct += torch.sum(preds == batch_labels).item()
        total += batch_labels.size(0)

    train_acc.append(100 * correct / total)
    train_loss.append(running_loss / len(train_loader))


    val_true, val_pred = [], []
    running_loss, correct, total = 0.0, 0.0, 0.0
    model.eval()
    with torch.no_grad():
        for batch_inputs, batch_labels in tqdm(val_loader,total=len(val_loader)):
           batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)
           outputs = model(batch_inputs, labels=batch_labels)

           loss = outputs.loss
           running_loss += loss.item()
           preds = torch.argmax(outputs.logits, dim=1)

           val_true.extend(batch_labels.data.cpu().numpy())
           val_pred.extend(preds.data.cpu().numpy())

           correct += torch.sum(preds == batch_labels).item()
           total += batch_labels.size(0)

        val_acc.append(100 * correct / total)
        val_loss.append(running_loss / len(val_loader))


    network_learned = running_loss < valid_loss_min
    if network_learned:
            valid_loss_min = running_loss
            torch.save(model.state_dict(), os.path.join("/content/", 'best.pt'))
            print("Model has learned !\n")


            plt.figure(figsize = (15, 13), facecolor = 'silver', edgecolor = 'gray')

            cm = confusion_matrix(val_true, val_pred, labels=[0,1])
            ax= plt.subplot()
            sns.heatmap(cm, annot=True, fmt='g', ax=ax)
            ax.set_xlabel('Predicted labels')
            ax.set_ylabel('True labels')
            ax.set_title('Confusion Matrix')
            ax.xaxis.set_ticklabels(["first hand", "second hand"])
            ax.yaxis.set_ticklabels(["first hand", "second hand"])
            plt.savefig(os.path.join("/content/", 'confussion.png'))
            plt.close()

            plt.figure(figsize = (15, 12), facecolor = 'silver', edgecolor = 'gray')
            cr = classification_report(val_true, val_pred,
                                   target_names = ["first hand", "second hand"],
                                   output_dict = True)

            sns.heatmap(pd.DataFrame(cr).iloc[:-1, :].T, annot=True)
            plt.savefig(os.path.join("/content/", 'report.png'))
            plt.close()





100%|██████████| 92/92 [02:23<00:00,  1.56s/it]
100%|██████████| 46/46 [00:24<00:00,  1.88it/s]


Model has learned !



100%|██████████| 92/92 [02:23<00:00,  1.56s/it]
100%|██████████| 46/46 [00:24<00:00,  1.88it/s]
100%|██████████| 92/92 [02:23<00:00,  1.56s/it]
100%|██████████| 46/46 [00:24<00:00,  1.87it/s]


In [100]:
train_acc

[96.44079397672827, 97.80971937029432, 98.01505817932923]

In [101]:
train_loss

[0.10196706698701272, 0.06213869106874842, 0.05582108931696933]