In [14]:
import torch
import numpy as np
from transformers import ViTForImageClassification, ViTFeatureExtractor
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import transforms


In [3]:
X_train = np.load('images_train.npy')
X_test = np.load('images_test.npy')
y_train = np.load('labels_train.npy')
y_test = np.load('labels_test.npy')


In [5]:
def preprocess(images, labels):
    images = torch.tensor(images, dtype=torch.float32).permute(0, 3, 1, 2)  
    labels = torch.tensor(labels, dtype=torch.float32)
    return images, labels

X_train, y_train = preprocess(X_train, y_train)
X_test, y_test = preprocess(X_test, y_test)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


# ViT

In [7]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=y_train.shape[1], problem_type='multi_label_classification')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [8]:
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
criterion = torch.nn.BCEWithLogitsLoss()


In [9]:
epochs = 5
model.train()
for epoch in range(epochs):
    total_loss = 0
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs).logits
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.2404
Epoch 2, Loss: 0.1167
Epoch 3, Loss: 0.0715
Epoch 4, Loss: 0.0411
Epoch 5, Loss: 0.0245


In [11]:
model.eval()
preds, true_labels = [], []
with torch.no_grad():
    for batch in test_loader:
        inputs, targets = batch
        inputs = inputs.to(device)
        outputs = model(inputs).logits.cpu().numpy()
        preds.append(outputs)
        true_labels.append(targets.numpy())

y_pred = np.vstack(preds) > 0.5  
y_true = np.vstack(true_labels)

print(classification_report(y_true, y_pred, target_names=[str(i) for i in range(y_train.shape[1])]))


              precision    recall  f1-score   support

           0       0.94      0.85      0.89       570
           1       0.96      0.91      0.93      2366
           2       0.91      0.82      0.86       318
           3       0.90      0.86      0.88       273
           4       0.93      0.93      0.93      3099
           5       0.71      0.22      0.34       191
           6       0.91      0.80      0.85      1060
           7       0.89      0.87      0.88      1315
           8       0.58      0.54      0.56       452
           9       0.91      0.79      0.84      1833
          10       0.93      0.84      0.88       558
          11       0.82      0.85      0.83        68
          12       0.81      0.64      0.71       236
          13       0.72      0.76      0.74        82
          14       0.97      0.97      0.97       667

   micro avg       0.91      0.85      0.88     13088
   macro avg       0.86      0.78      0.81     13088
weighted avg       0.91   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# resnet и effnet

In [12]:
def get_model(model_name, num_classes):
    if model_name == "resnet":
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, num_classes)  # Изменяем выходной слой
    elif model_name == "efficientnet":
        model = models.efficientnet_b3(pretrained=True)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)  # Изменяем выходной слой
    else:
        raise ValueError("'resnet' или 'efficientnet'")

    return model


In [15]:
model_name = "efficientnet"  
model = get_model(model_name, y_train.shape[1])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=5e-5)
criterion = nn.BCEWithLogitsLoss()



In [16]:
epochs = 5
model.train()
for epoch in range(epochs):
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.2617
Epoch 2, Loss: 0.1575
Epoch 3, Loss: 0.1206
Epoch 4, Loss: 0.0938
Epoch 5, Loss: 0.0737


In [18]:
model.eval()
preds, true_labels = [], []
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs).cpu().numpy()
        preds.append(outputs)
        true_labels.append(targets.numpy())

y_pred = np.vstack(preds) > 0.5  
y_true = np.vstack(true_labels)

print(classification_report(y_true, y_pred, target_names=[str(i) for i in range(y_train.shape[1])]))


              precision    recall  f1-score   support

           0       0.90      0.86      0.88       570
           1       0.95      0.94      0.95      2366
           2       0.88      0.78      0.83       318
           3       0.91      0.84      0.87       273
           4       0.92      0.94      0.93      3099
           5       0.67      0.09      0.17       191
           6       0.92      0.85      0.88      1060
           7       0.90      0.87      0.89      1315
           8       0.63      0.42      0.51       452
           9       0.88      0.82      0.85      1833
          10       0.89      0.91      0.90       558
          11       0.78      0.82      0.80        68
          12       0.77      0.74      0.75       236
          13       0.72      0.72      0.72        82
          14       0.98      0.98      0.98       667

   micro avg       0.91      0.87      0.89     13088
   macro avg       0.85      0.77      0.79     13088
weighted avg       0.90   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
model_name = "resnet"  
model = get_model(model_name, y_train.shape[1])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=5e-5)
criterion = nn.BCEWithLogitsLoss()

epochs = 5
model.train()
for epoch in range(epochs):
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")




Epoch 1, Loss: 0.1897
Epoch 2, Loss: 0.1098
Epoch 3, Loss: 0.0687
Epoch 4, Loss: 0.0441
Epoch 5, Loss: 0.0297


In [20]:
model.eval()
preds, true_labels = [], []
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs).cpu().numpy()
        preds.append(outputs)
        true_labels.append(targets.numpy())

y_pred = np.vstack(preds) > 0.5 
y_true = np.vstack(true_labels)

print(classification_report(y_true, y_pred, target_names=[str(i) for i in range(y_train.shape[1])]))


              precision    recall  f1-score   support

           0       0.93      0.87      0.90       570
           1       0.93      0.96      0.95      2366
           2       0.92      0.80      0.85       318
           3       0.86      0.90      0.88       273
           4       0.92      0.96      0.94      3099
           5       0.78      0.28      0.41       191
           6       0.89      0.88      0.89      1060
           7       0.88      0.89      0.89      1315
           8       0.57      0.44      0.50       452
           9       0.86      0.87      0.86      1833
          10       0.92      0.89      0.91       558
          11       0.86      0.65      0.74        68
          12       0.81      0.67      0.73       236
          13       0.82      0.65      0.72        82
          14       0.98      0.97      0.98       667

   micro avg       0.90      0.89      0.89     13088
   macro avg       0.86      0.78      0.81     13088
weighted avg       0.89   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
