In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
from huggingface_hub import login
from dotenv import dotenv_values

login(token=dotenv_values('.env')['HUGGING_FACE_TOKEN'])

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/hakatashi/.cache/huggingface/token
Login successful


In [3]:
from datasets import load_dataset

dataset = load_dataset("hakatashi/hakatashi-pixiv-bookmark-deepdanbooru-private", cache_dir='/mnt/f/.cache')

Found cached dataset parquet (/mnt/f/.cache/hakatashi___parquet/hakatashi--hakatashi-pixiv-bookmark-deepdanbooru-private-dc6bd44c53eea7d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['key', 'tag_probs', 'class'],
        num_rows: 179121
    })
    validation: Dataset({
        features: ['key', 'tag_probs', 'class'],
        num_rows: 59708
    })
    test: Dataset({
        features: ['key', 'tag_probs', 'class'],
        num_rows: 59707
    })
})

In [5]:
device = torch.device('cuda')
torch_dataset = dataset.with_format(type='torch', device=device)

In [6]:
import torch.nn.functional as F

In [7]:
x = torch_dataset['train']['tag_probs']
y = F.one_hot(torch_dataset['train']['class']).float()

In [8]:
%%time

import torch.nn as nn
import torch.optim as optim

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.middle1_layer = nn.Linear(6000, 512, device=device)
        self.middle2_layer = nn.Linear(512, 128, device=device)
        self.middle3_layer = nn.Linear(128, 128, device=device)
        self.out_layer = nn.Linear(128, 3, device=device)

    def forward(self, x):
        x = F.relu(self.middle1_layer(x))
        x = F.relu(self.middle2_layer(x))
        x = F.relu(self.middle3_layer(x))
        x = self.out_layer(x)
        return x

network = Network()
optimizer = optim.SGD(network.parameters(), lr=0.01)
criterion = nn.MSELoss()

for i in range(30000):
    optimizer.zero_grad()
    output = network(x)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print(f'[Epoch {i}] Loss: {loss.item():.3f}')

[Epoch 0] Loss: 0.358
[Epoch 100] Loss: 0.187
[Epoch 200] Loss: 0.161
[Epoch 300] Loss: 0.158
[Epoch 400] Loss: 0.157
[Epoch 500] Loss: 0.156
[Epoch 600] Loss: 0.156
[Epoch 700] Loss: 0.155
[Epoch 800] Loss: 0.154
[Epoch 900] Loss: 0.153
[Epoch 1000] Loss: 0.152
[Epoch 1100] Loss: 0.151
[Epoch 1200] Loss: 0.149
[Epoch 1300] Loss: 0.148
[Epoch 1400] Loss: 0.145
[Epoch 1500] Loss: 0.143
[Epoch 1600] Loss: 0.140
[Epoch 1700] Loss: 0.137
[Epoch 1800] Loss: 0.133
[Epoch 1900] Loss: 0.129
[Epoch 2000] Loss: 0.125
[Epoch 2100] Loss: 0.120
[Epoch 2200] Loss: 0.116
[Epoch 2300] Loss: 0.113
[Epoch 2400] Loss: 0.110
[Epoch 2500] Loss: 0.108
[Epoch 2600] Loss: 0.106
[Epoch 2700] Loss: 0.104
[Epoch 2800] Loss: 0.103
[Epoch 2900] Loss: 0.102
[Epoch 3000] Loss: 0.101
[Epoch 3100] Loss: 0.101
[Epoch 3200] Loss: 0.100
[Epoch 3300] Loss: 0.099
[Epoch 3400] Loss: 0.099
[Epoch 3500] Loss: 0.098
[Epoch 3600] Loss: 0.098
[Epoch 3700] Loss: 0.098
[Epoch 3800] Loss: 0.097
[Epoch 3900] Loss: 0.097
[Epoch 4000]

In [9]:
torch.save(network.state_dict(), 'torch-multiclass-onehot-shallow-network-multilayer')

In [10]:
x_test = torch_dataset['test']['tag_probs']
y_test = torch_dataset['test']['class']

In [11]:
y_test_predict = network(x_test)

In [12]:
_, y_test_predict_class = torch.max(y_test_predict.data, 1)

In [13]:
from torcheval.metrics.functional import multiclass_accuracy, multiclass_confusion_matrix, multiclass_precision, multiclass_f1_score, multiclass_recall

print('confusion_matrix:')
print(multiclass_confusion_matrix(y_test_predict_class, y_test, num_classes=3))
print(f'accuracy_score: {multiclass_accuracy(y_test_predict_class, y_test)}')
print(f'precision_score: {multiclass_precision(y_test_predict_class, y_test, average="macro", num_classes=3)}')
print(f'recall_score: {multiclass_recall(y_test_predict_class, y_test, average="macro", num_classes=3)}')
print(f'f1_score: {multiclass_f1_score(y_test_predict_class, y_test, average="macro", num_classes=3)}')

confusion_matrix:
tensor([[37909,  2365,   424],
        [ 3071, 11244,   204],
        [ 1607,   756,  2127]], device='cuda:0')
accuracy_score: 0.8588607311248779
precision_score: 0.8149803876876831
recall_score: 0.7265412211418152
f1_score: 0.7586900591850281
