In [1]:
#hugging
import datasets
from transformers import AutoTokenizer

#pytorch
import torch
from torch import nn
import torch.optim as optim
#from torchsummary import summary

import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

GPU: NVIDIA GeForce RTX 3050 is available.


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Processing Dataset

In [4]:
ds = datasets.load_dataset('google-research-datasets/go_emotions', 'raw')
CLASSES = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']

In [5]:
def get_X_y_from_ds(ds: datasets.dataset_dict, CLASSES: list[str], split: str ='train') -> tuple[list[str], list[int]]:
    y = []
    for c in CLASSES:
        y.append(ds[split][c])
    return ds[split]['text'], y

In [6]:
X, y = get_X_y_from_ds(ds, CLASSES)
X_train, X_test, y_train, y_test = train_test_split(X, np.array(y).T, train_size=0.7, shuffle=True)

# Preparing model

In [7]:
tokenizer = AutoTokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token



In [None]:
tf.kears.leayers.RNN(cell_type)

In [8]:
class RNN(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, vocab_size, num_layers, classes):
        super(RNN, self).__init__()

        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        #Хочет LxBxE_dims
        #При batch_first=True хочет BxLxE_dims
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers)

        # The linear layer that maps from hidden state space to tag space
        self.FC = nn.Linear(hidden_dim, classes)

                        #n_layers x B x H_out
    
    def forward(self, text):
        # text B x len
        embeddings = self.embeddings(text) # B x len x H(embedding_dim)
        #inp = embeddings.view(-1, len(text), self.embedding_dim) # L x B x H_in так нужно сделать, чтобы в LSTM кинуть
        inp = embeddings.permute(1, 0, 2)
        #self.hidden = self.init_hidden_state(text) #h_0 и c_0 n_layers x B x H_out

        lstm_out, (h, c) = self.lstm(inp) #out: L x B x Hout h_t: n_l x B x H_out
        out = lstm_out[-1]

        
        # In each timestep of an LSTM the input goes through a simple neural network and the output gets passed to the next timestep. The output out of function
        # out, (ht, ct) = self.lstm_nets(X)
        # contains a list of ALL outputs (i.e the output of the neural networks of every timestep). Yet, in classification, you mostly only really care about the LAST output. You can get it like this:
        # out = out[:, -1]
        # https://stackoverflow.com/questions/72667646/how-to-connect-a-lstm-layer-to-a-linear-layer-in-pytorch
        
        out = self.FC(out) 
        return out 
        

In [23]:
#E = 32
#H = 128
#len(tokenz) = 64
rnn = RNN(embedding_dim=64, 
          hidden_dim=64, 
          vocab_size=50257, 
          num_layers=1, 
          classes=28).to('cuda')

text = tokenizer(X[0:2], return_tensors='pt', padding='max_length', max_length=32, truncation=True)
x = rnn(text['input_ids'].to('cuda'))

# Train

In [24]:
#embedding_dim, hidden_dim, vocab_size, num_layers, classes
model = RNN(embedding_dim=128, 
            hidden_dim=128, 
            vocab_size=50257, 
            num_layers=4, 
            classes=28)
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.to('cuda')

RNN(
  (embeddings): Embedding(50257, 128)
  (lstm): LSTM(128, 128, num_layers=4)
  (FC): Linear(in_features=128, out_features=28, bias=True)
)

In [25]:
def Accuracy(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        temp += sum(np.logical_and(y_true[i], y_pred[i])) / sum(np.logical_or(y_true[i], y_pred[i]))
    return temp / y_true.shape[0]

In [26]:
train_acc_list = []
val_acc_list = []
X_train1 = np.array(X_train)
y_train1 = np.array(y_train)
X_test1 = np.array(X_test)
y_test1 = np.array(y_test)

n_epochs = 20
rng = np.random.default_rng()


for epoch in range(n_epochs):  # loop over the dataset multiple times

    train_acc = 0
    val_acc = 0

    #train
    model.train(True)
    running_loss = 0.0
    for i in range(0, len(X_train1), 250):
        idx = rng.choice(len(X_train1), size=250, replace=False)
        X_i, y_i = X_train1[idx], y_train1[idx]
        X_i = tokenizer(X_i.tolist(), return_tensors='pt', padding='max_length', max_length=32, truncation=True)['input_ids']
        y_i = torch.Tensor(y_i).to('cuda')
                
        optimizer.zero_grad()

        # forward + backward + optimize
        y_pred = model(X_i.to('cuda'))
        loss = criterion(y_pred, y_i)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 40000 == 0:    # print every 10000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 80 :.2f}')
            running_loss = 0.0

        #calculate acc
        train_acc += Accuracy(y_i.cpu().numpy(), 
                              y_pred.cpu().detach().numpy())
    train_acc /= len(range(0, len(X_train1), 250))

    #validation
    model.eval()
    with torch.no_grad():
        for i in range(0, len(X_test1), 250):
            idx = rng.choice(len(X_test1), size=250, replace=False)
            X_i, y_i = X_test1[idx], y_test1[idx]
            X_i = tokenizer(X_i.tolist(), return_tensors='pt', padding='max_length', max_length=32, truncation=True)['input_ids']
            
            y_i = torch.Tensor(y_i).to('cuda')
            y_pred = model(X_i.to('cuda'))
            val_acc += Accuracy(y_i.cpu().numpy(), 
                                y_pred.cpu().detach().numpy())
    val_acc /= len(range(0, len(X_test1), 250))

    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)
    
    print('-'*40)
    print(f'Train accuracy {train_acc_list[-1]*100:.2f}%')
    print(f'Test accuracy {val_acc_list[-1]*100:.2f}%')
    print('-'*40)

print('Finished Training')

plt.plot(range(n_epochs), train_acc_list, c='r')
plt.plot(range(n_epochs), val_acc_list, c='b')

[1,     1] loss: 0.01
[1, 40001] loss: 0.38
[1, 80001] loss: 0.32
[1, 120001] loss: 0.31
----------------------------------------
Train accuracy 4.22%
Test accuracy 4.23%
----------------------------------------
[2,     1] loss: 0.00
[2, 40001] loss: 0.31
[2, 80001] loss: 0.31
[2, 120001] loss: 0.32
----------------------------------------
Train accuracy 4.22%
Test accuracy 4.21%
----------------------------------------
[3,     1] loss: 0.00
[3, 40001] loss: 0.32
[3, 80001] loss: 0.31
[3, 120001] loss: 0.31
----------------------------------------
Train accuracy 4.21%
Test accuracy 4.22%
----------------------------------------
[4,     1] loss: 0.00
[4, 40001] loss: 0.32
[4, 80001] loss: 0.32
[4, 120001] loss: 0.32
----------------------------------------
Train accuracy 4.22%
Test accuracy 4.22%
----------------------------------------
[5,     1] loss: 0.00
[5, 40001] loss: 0.32
[5, 80001] loss: 0.31
[5, 120001] loss: 0.31
----------------------------------------
Train accuracy 4.22%
T

KeyboardInterrupt: 