# İnce Ayar
Bu kod, ön eğitimden geçmiş veya geçmemiş, farklı transformer modellerinin farklı şekillerde BOUN Twitter Veri Seti üzerinden analizini içeriyor.

Gerekli kütüphaneler

In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.utils import compute_class_weight
from sklearn.metrics import classification_report, f1_score, accuracy_score, recall_score
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup, AdamW, BertForPreTraining, BertForMaskedLM, BertConfig, AutoModel, AutoTokenizer
import torch
import numpy as np
import random
import math
import pandas as pd
from tqdm.notebook import tqdm
from collections import Counter
from datetime import datetime
import re
import os
import json
import sys
from utils import url_removal

### Gerekli konfigürasyonlar

In [2]:
embedding_layer = True   # Transformer modelini eğitirken embedding layerını dahil edip etmeme
last_free_layer = 0      # Transformer modelinde eğitilecek son layer
learning_rate = 1e-5     # Eğitim oranı
weight_decay = 0.01      # Weight decay
has_url_removal = True   # URL silmenin olduğu ön işleme
optimizer_name = "AdamW" # Optimizer

pretrained_path = "None"  # Ön eğitimde eğitilen modelin pathi, kullanılmayacaksa 'None'
portion = 1 # Eğitim verisinde kullanılacak verinin oranı 
transformers_model = "distilberturk" # Kullanılacak transformer modeli (mbert, berturk, distilberturk)
results_folder = f"Results/{transformers_model}_{optimizer_name}/"
os.makedirs(results_folder, exist_ok=True)
results_filepath = os.path.join(results_folder, "results.json")

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.device_count())
print(torch.cuda.is_available())

cuda:0
1
True


### Model Detayı

In [4]:
class Sentiment_Net(nn.Module):
    def __init__(self):
        super(Sentiment_Net, self).__init__()
        if transformers_model=="distilberturk":
            self.net_bert = AutoModel.from_pretrained('dbmdz/distilbert-base-turkish-cased')
        elif transformers_model=="berturk":
            self.net_bert = AutoModel.from_pretrained('dbmdz/bert-base-turkish-cased')
        elif transformers_model=="mbert":
            self.net_bert = AutoModel.from_pretrained('bert-base-multilingual-cased')
        unfrozen_layers = ["classifier", "pooler"]
        if embedding_layer:
            unfrozen_layers.append('embedding')
        
        for idx in range(last_free_layer, 12):
            if transformers_model=="distilberturk":
                unfrozen_layers.append('transformer.layer.'+str(idx))
            elif transformers_model=="mbert" or transformers_model=="berturk":
                unfrozen_layers.append('encoder.layer.'+str(idx))
                
            
        print(unfrozen_layers)
        for name, param in self.net_bert.named_parameters():
            if not any([layer in name for layer in unfrozen_layers]):
                print("[FROZE]: %s" % name)
                param.requires_grad = False
            else:
                print("[FREE]: %s" % name)
                param.requires_grad = True

        self.fc1 = nn.Linear(768, 3)

    def forward(self, x, attention):
        x, _ = self.net_bert(x, attention_mask=attention)

        #Getting head
        x = x[:,0,:]

        x = self.fc1(x)
        return x


def weight_reset(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.reset_parameters()

try:
    sentiment_net.apply(weight_reset)
    print('Sentiment Net resetlendi.')
except: 
    pass

sentiment_net = Sentiment_Net().to(device)
print('Sentiment Net tanımlandı.')

['classifier', 'pooler', 'embedding', 'transformer.layer.0', 'transformer.layer.1', 'transformer.layer.2', 'transformer.layer.3', 'transformer.layer.4', 'transformer.layer.5', 'transformer.layer.6', 'transformer.layer.7', 'transformer.layer.8', 'transformer.layer.9', 'transformer.layer.10', 'transformer.layer.11']
[FREE]: embeddings.word_embeddings.weight
[FREE]: embeddings.position_embeddings.weight
[FREE]: embeddings.LayerNorm.weight
[FREE]: embeddings.LayerNorm.bias
[FREE]: transformer.layer.0.attention.q_lin.weight
[FREE]: transformer.layer.0.attention.q_lin.bias
[FREE]: transformer.layer.0.attention.k_lin.weight
[FREE]: transformer.layer.0.attention.k_lin.bias
[FREE]: transformer.layer.0.attention.v_lin.weight
[FREE]: transformer.layer.0.attention.v_lin.bias
[FREE]: transformer.layer.0.attention.out_lin.weight
[FREE]: transformer.layer.0.attention.out_lin.bias
[FREE]: transformer.layer.0.sa_layer_norm.weight
[FREE]: transformer.layer.0.sa_layer_norm.bias
[FREE]: transformer.layer.

In [5]:
if transformers_model=="distilberturk":
    tokenizer = AutoTokenizer.from_pretrained('dbmdz/distilbert-base-turkish-cased')
elif transformers_model=="berturk":
    tokenizer = AutoTokenizer.from_pretrained('dbmdz/bert-base-turkish-cased')
elif transformers_model=="mbert":
    tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')

Ön eğitim verisi yükleme

In [6]:
if os.path.exists(pretrained_path):
    deneme = torch.load(pretrained_path,  map_location=torch.device('cpu'))
    empty = {}
    for k in deneme["model_state_dict"].keys():
        if k.startswith("fc1"):
            print(k)
            continue
        empty[k.replace("distilbert.", "")] = deneme["model_state_dict"][k]
    sentiment_net.load_state_dict(empty, strict=False)
    print(f"Ön eğitim verisi {pretrained_path}'ten yüklendi.")
else:
    print(f"{pretrained_path} yok. Ön eğitim verisi yüklenmiyor.")

None yok. Ön eğitim verisi yüklenmiyor.


Öznitelik çıkarma

In [7]:
def feat_ext(json_fp, portion=1):
    df = pd.read_json(json_fp).sample(frac=portion, random_state=10)
    mapping = {"negative":0, "neutral":1, "positive":2}
    y = [mapping[x] for x in df["value"]]
    
    sentences = list(df["sentence"])
    if has_url_removal:
        sentences = [url_removal(x) for x in sentences]
    
    
    features = []
    attention_masks = []
    max_len = 256
    for sentence in tqdm(sentences):
        input_ids = torch.tensor(tokenizer.encode(sentence))
        attention_mask = torch.cat((torch.tensor([1.0]*(len(input_ids))), torch.tensor([0.0]*(max_len-len(input_ids)))), 0)
        input_ids = torch.cat((input_ids, torch.tensor([0]*(max_len-len(input_ids)))), 0)        
        attention_masks.append(attention_mask)
        features.append(input_ids)
    return torch.stack(features),torch.stack(attention_masks), torch.tensor(y)

In [8]:
X_train_feat, X_train_attention, y_train = feat_ext("../Veriler/BOUN/train.json", portion)
X_dev_feat, X_dev_attention, y_dev = feat_ext("../Veriler/BOUN/validation.json")
X_test_feat, X_test_attention, y_test = feat_ext("../Veriler/BOUN/test.json")

HBox(children=(FloatProgress(value=0.0, max=5733.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=639.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1592.0), HTML(value='')))




Optimizer ve sınıf ağırlıklarının ayarlanması

In [9]:
cw = compute_class_weight(class_weight="balanced", classes=[0, 1, 2], y=y_train.tolist())
criterion = nn.CrossEntropyLoss(torch.FloatTensor(cw).to(device))
if optimizer_name == "AdamW":
    optimizer = AdamW(sentiment_net.parameters(), lr=learning_rate,  correct_bias=False, weight_decay=weight_decay)
elif optimizer_name == "Adam":
    optimizer = optim.Adam(sentiment_net.parameters(), lr=learning_rate, weight_decay=weight_decay)
elif optimizer_name == "SGD":
    optimizer = optim.SGD(sentiment_net.parameters(), lr=learning_rate, weight_decay=weight_decay)

## Eğitim Döngüsü

In [10]:
best_val_acc = 0
batch_size = 8
best_val_recall = 0
accumulation_steps = 8
for epoch in range(10):
    running_loss = 0.0
    total_loss = 0.0
    total = 0
    correct = 0
    indices = np.arange(len(X_train_feat))
    np.random.shuffle(indices)
    train_outputs = torch.LongTensor([]).to(device)
    for idx in range(math.ceil(len(X_train_feat)/batch_size)):
        inputs_0 = X_train_feat[indices[idx*batch_size:min(len(X_train_feat), (idx+1)*batch_size)]].to(device)
        input_attention = X_train_attention[indices[idx*batch_size:min(len(X_train_attention), (idx+1)*batch_size)]].to(device)
        labels = y_train[indices[idx*batch_size:min(len(y_train), (idx+1)*batch_size)]].to(device)
        
        
        outputs = sentiment_net(inputs_0, input_attention)
        
        _, predicted = torch.max(outputs.data, 1)

        correct += (predicted == labels).sum().item()
        total+= len(labels)
        train_outputs = torch.cat((train_outputs, predicted), 0)
        loss = criterion(outputs, labels) / accumulation_steps 
        loss.backward()
        
        if (idx+1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        
        
        running_loss += loss.item()
        if (idx+1) % accumulation_steps == 0:
            print('[%d_%d, %5d/%d] loss: %.3f accuracy: %.3f' %
                  (epoch + 1, (idx + 1)//accumulation_steps, idx + 1, len(X_train_feat)//batch_size, running_loss, correct/total))
            total_loss += running_loss
            running_loss = 0.0
    
    train_acc = correct/total
    print(train_acc)
    with torch.no_grad():
        outputs = torch.tensor([], device=device)
        for idx in range(math.ceil(len(X_test_feat)/batch_size)):
            inputs_0 = X_test_feat[idx*batch_size:min(len(X_test_feat), (idx+1)*batch_size)]
            input_attention = X_test_attention[idx*batch_size:min(len(X_test_attention), (idx+1)*batch_size)]
            inputs_0 = inputs_0.to(device)
            input_attention = input_attention.to(device)
            outputs = torch.cat((outputs, sentiment_net(inputs_0, input_attention)), 0)
        _, predicted_test = torch.max(outputs.data, 1)
        y_test = y_test.to(device)
        total = y_test.size(0)
        correct = (predicted_test == y_test).sum().item()
        test_acc = correct/total
    
    with torch.no_grad():
        outputs = torch.tensor([], device=device)
        val_loss = 0
        for idx in range(math.ceil(len(X_dev_feat)/batch_size)):
            inputs_0 = X_dev_feat[idx*batch_size:min(len(X_dev_feat), (idx+1)*batch_size)]
            input_attention = X_dev_attention[idx*batch_size:min(len(X_dev_attention), (idx+1)*batch_size)]
            labels = y_dev[idx*batch_size:min(len(y_dev), (idx+1)*batch_size)].to(device)
            inputs_0 = inputs_0.to(device)
            input_attention = input_attention.to(device)
            out_0 = sentiment_net(inputs_0, input_attention)
            outputs = torch.cat((outputs, out_0), 0)
            val_loss += criterion(out_0, labels)
        _, predicted_dev = torch.max(outputs.data, 1)
        y_dev = y_dev.to(device)
        total = y_dev.size(0)
        correct = (predicted_dev == y_dev).sum().item()
        val_acc = correct/total
        val_recall = recall_score(predicted_dev.cpu(), y_dev.cpu(), average="macro")
    
    test_recall = recall_score(predicted_test.cpu(), y_test.cpu(), average="macro")
    if val_recall>best_val_recall:
        now = datetime.now()
        print(f'{now}  :  {val_recall} is higher than the best({best_val_recall}). Saving the results at {results_filepath}')
        # En iyi modeli kaydetmek için buradaki yorum kısmını kaldırın.
#         torch.save({
#                 'epoch': epoch+1,
#                 'model_state_dict': sentiment_net.state_dict(),
#                 'optimizer_state_dict': optimizer.state_dict(),
#                 'loss': total_loss
#                 }, f'Models/distilberturk_emoji_pair_class_rec_{test_recall}_acc_{accuracy_score(predicted_test.cpu(), y_test.cpu())}_{epoch+1}_sigmoid.pt')
        best_val_recall = val_recall
        results = {"Epoch":epoch+1, "Training Loss":total_loss, "Training Accuracy":train_acc, "Validation Accuracy":val_acc, "Test Accuracy":test_acc, "Train Recall":recall_score(train_outputs.cpu(), y_train[indices].cpu(), average="macro"), "Validation Recall":val_recall, "Test Recall":test_recall}
        with open(results_filepath, "w") as f:
            json.dump(results, f)
    print('Epoch: ',epoch+1)
    print(f'Loss: {total_loss}, Training accuracy:{train_acc}, Validation accuracy:{val_acc}, Test accuracy:{test_acc}')
    print(f'Train accuracy: {accuracy_score(train_outputs.cpu(), y_train[indices].cpu())}\t Train Recall:{recall_score(train_outputs.cpu(), y_train[indices].cpu(), average="macro")}')
    print(f'Val accuracy: {accuracy_score(predicted_dev.cpu(), y_dev.cpu())}\t Val Recall:{val_recall}')
    print(f'Test accuracy: {accuracy_score(predicted_test.cpu(), y_test.cpu())}\t Test Recall:{test_recall}')

[1_1,     8/716] loss: 1.121 accuracy: 0.344
[1_2,    16/716] loss: 1.123 accuracy: 0.383
[1_3,    24/716] loss: 1.117 accuracy: 0.370
[1_4,    32/716] loss: 1.101 accuracy: 0.410
[1_5,    40/716] loss: 1.096 accuracy: 0.428
[1_6,    48/716] loss: 1.086 accuracy: 0.440
[1_7,    56/716] loss: 1.081 accuracy: 0.451
[1_8,    64/716] loss: 1.056 accuracy: 0.467
[1_9,    72/716] loss: 1.064 accuracy: 0.477
[1_10,    80/716] loss: 1.063 accuracy: 0.487
[1_11,    88/716] loss: 1.085 accuracy: 0.494
[1_12,    96/716] loss: 1.017 accuracy: 0.501
[1_13,   104/716] loss: 1.018 accuracy: 0.512
[1_14,   112/716] loss: 1.028 accuracy: 0.519
[1_15,   120/716] loss: 1.011 accuracy: 0.520
[1_16,   128/716] loss: 1.024 accuracy: 0.523
[1_17,   136/716] loss: 0.919 accuracy: 0.528
[1_18,   144/716] loss: 0.888 accuracy: 0.540
[1_19,   152/716] loss: 0.946 accuracy: 0.547
[1_20,   160/716] loss: 0.978 accuracy: 0.549
[1_21,   168/716] loss: 0.814 accuracy: 0.556
[1_22,   176/716] loss: 0.881 accuracy: 0.5

KeyboardInterrupt: 