In [3]:
import kagglehub
from sklearn.utils.validation import validate_data

# Download latest version
path = kagglehub.dataset_download("jessicali9530/stanford-dogs-dataset")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\festa\.cache\kagglehub\datasets\jessicali9530\stanford-dogs-dataset\versions\2


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from tqdm import tqdm
import PIL
from IPython.display import display, Image
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder
import torchvision.models as models
import os
import pandas as pd
def fix_seed(seed = 39):
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  np.random.seed(seed)
  generator = torch.Generator().manual_seed(seed)
  return generator
generator = fix_seed()
path = os.path.join(path, "images","Images")
print(path)

C:\Users\festa\.cache\kagglehub\datasets\jessicali9530\stanford-dogs-dataset\versions\2\images\Images


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform = None):
        self.root_dir = root_dir
        self.transform = transform
        self.class_folders = sorted([d for d in os.listdir(self.root_dir) if os.path.isdir(os.path.join(self.root_dir, d))])
        self.class_to_idx = {cls : idx for idx, cls in enumerate(self.class_folders)}
        self.idx_to_class = {idx: '_'.join(cls.split('-')[1:]).capitalize() for idx, cls in enumerate(self.class_folders)}
        self.samples = []
        for class_name in self.class_folders:
            class_path = os.path.join(self.root_dir, class_name)
            for fname in os.listdir(class_path):
                if fname.lower().endswith((".jpg", ".jpeg", ".png")):
                    img_path = os.path.join(class_path, fname)
                    self.samples.append((img_path, self.class_to_idx[class_name]))
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = PIL.Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

In [7]:
train_transforms = T.Compose([
    T.RandomResizedCrop(224, scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(),
    T.ColorJitter(0.2, 0.2, 0.2, 0.1),
    T.RandomRotation(20),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406],
                [0.229, 0.224, 0.225])
])

val_transforms = T.Compose([
    T.Resize(256),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406],
                [0.229, 0.224, 0.225])
])

In [8]:
full_dataset = CustomDataset(root_dir=path, transform=train_transforms)
mapping = full_dataset.idx_to_class
train_size = int(0.8 * len(full_dataset))
other_size = len(full_dataset) - train_size
train_dataset, other_dataset = random_split(full_dataset, [train_size, other_size])
valid_size = int(0.5 * len(other_dataset))
test_size = len(other_dataset) - valid_size
valid_dataset, test_dataset = random_split(other_dataset, [valid_size, test_size])
mapping

{0: 'Chihuahua',
 1: 'Japanese_spaniel',
 2: 'Maltese_dog',
 3: 'Pekinese',
 4: 'Shih_tzu',
 5: 'Blenheim_spaniel',
 6: 'Papillon',
 7: 'Toy_terrier',
 8: 'Rhodesian_ridgeback',
 9: 'Afghan_hound',
 10: 'Basset',
 11: 'Beagle',
 12: 'Bloodhound',
 13: 'Bluetick',
 14: 'Black_and_tan_coonhound',
 15: 'Walker_hound',
 16: 'English_foxhound',
 17: 'Redbone',
 18: 'Borzoi',
 19: 'Irish_wolfhound',
 20: 'Italian_greyhound',
 21: 'Whippet',
 22: 'Ibizan_hound',
 23: 'Norwegian_elkhound',
 24: 'Otterhound',
 25: 'Saluki',
 26: 'Scottish_deerhound',
 27: 'Weimaraner',
 28: 'Staffordshire_bullterrier',
 29: 'American_staffordshire_terrier',
 30: 'Bedlington_terrier',
 31: 'Border_terrier',
 32: 'Kerry_blue_terrier',
 33: 'Irish_terrier',
 34: 'Norfolk_terrier',
 35: 'Norwich_terrier',
 36: 'Yorkshire_terrier',
 37: 'Wire_haired_fox_terrier',
 38: 'Lakeland_terrier',
 39: 'Sealyham_terrier',
 40: 'Airedale',
 41: 'Cairn',
 42: 'Australian_terrier',
 43: 'Dandie_dinmont',
 44: 'Boston_bull',
 45:

In [21]:
import json
with open("mapping.json", "w") as f:

    json.dump(mapping,f)

In [9]:
batch_size = 64
epochs = 100
num_classes = 120

In [10]:
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True)

In [11]:
base = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
for param in base.parameters():
    param.requires_grad = False  # заморозим фичи

in_features = base.fc.in_features
base.fc = nn.Sequential(
    nn.Linear(in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, num_classes)
)
model = base.to(device)
for name, param in model.named_parameters():
    if not name.startswith("fc."):
        param.requires_grad = False

In [12]:
model.load_state_dict(torch.load("best_model.pth", weights_only=True, map_location=torch.device("cuda")))

<All keys matched successfully>

In [13]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=1e-3, steps_per_epoch=len(train_loader), epochs=20)
criterion = nn.CrossEntropyLoss()

In [14]:
def train_epoch(model, criterion, optimizer, device, loader):
  model.train()
  curr_loss = 0.0
  total = 0
  correct = 0
  for images, labels in tqdm(loader, desc = 'Training'):
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    curr_loss += loss.item() * images.size(0)
    _, predicted = torch.max(outputs.data, 1)
    correct += (predicted == labels).sum().item()
    total += labels.size(0)
  epoch_loss = curr_loss / total
  total_loss = correct / total
  return epoch_loss, total_loss

In [15]:
def val_epoch(model, dataloader, criterion, device):
    model.eval()
    curr_loss = 0.0
    total = 0
    correct = 0
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc= "val"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            curr_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    epoch_loss = curr_loss/total
    epoch_acc = correct/total
    return epoch_loss, epoch_acc

In [89]:
# Размораживаю backbone
for name, param in model.named_parameters():
    if not name.startswith("fc."):
        param.requires_grad = True
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

In [90]:
print("start train")
train_losses, val_losses = [], []
train_acces, val_acces = [], []
best_val_acc = 0.0
for epoch in range(epochs):
    print(f"____EPOCH____: {epoch}")
    train_loss, train_acc = train_epoch(model, criterion, optimizer, device,  train_loader)
    train_losses.append(train_loss)
    train_acces.append(train_acc)

    val_loss, val_acc = val_epoch(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_acces.append(val_acc)

    print(f"train_loss: {train_loss}, train_acc: {train_acc}")
    print(f"val_loss: {val_loss}, val_acc: {val_acc}")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        print(f"Best model saved with validation accuracy: {best_val_acc:.4f}")

start train
____EPOCH____: 0


Training: 100%|██████████| 258/258 [02:30<00:00,  1.72it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.81it/s]


train_loss: 0.579579288543603, train_acc: 0.8239795918367347
val_loss: 0.4711752577645901, val_acc: 0.8551992225461613
Best model saved with validation accuracy: 0.8552
____EPOCH____: 1


Training: 100%|██████████| 258/258 [02:34<00:00,  1.67it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.51it/s]


train_loss: 0.4759619942964919, train_acc: 0.8504008746355685
val_loss: 0.4174714866080715, val_acc: 0.8615160349854227
Best model saved with validation accuracy: 0.8615
____EPOCH____: 2


Training: 100%|██████████| 258/258 [02:34<00:00,  1.67it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.80it/s]


train_loss: 0.42924317674108564, train_acc: 0.8624271137026239
val_loss: 0.40052746295697256, val_acc: 0.8683187560738581
Best model saved with validation accuracy: 0.8683
____EPOCH____: 3


Training: 100%|██████████| 258/258 [02:29<00:00,  1.72it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.78it/s]


train_loss: 0.4006931742669542, train_acc: 0.872631195335277
val_loss: 0.38134749044018884, val_acc: 0.8814382896015549
Best model saved with validation accuracy: 0.8814
____EPOCH____: 4


Training: 100%|██████████| 258/258 [02:34<00:00,  1.66it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.51it/s]


train_loss: 0.36861550277940736, train_acc: 0.8784013605442177
val_loss: 0.3744798196068774, val_acc: 0.8707482993197279
____EPOCH____: 5


Training: 100%|██████████| 258/258 [02:40<00:00,  1.61it/s]
val: 100%|██████████| 33/33 [00:12<00:00,  2.61it/s]


train_loss: 0.3493890361054877, train_acc: 0.8844144800777454
val_loss: 0.38107524134501075, val_acc: 0.8702623906705539
____EPOCH____: 6


Training: 100%|██████████| 258/258 [02:29<00:00,  1.73it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.81it/s]


train_loss: 0.3296210352255374, train_acc: 0.891399416909621
val_loss: 0.37131816809100937, val_acc: 0.8790087463556852
____EPOCH____: 7


Training: 100%|██████████| 258/258 [02:29<00:00,  1.73it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.80it/s]


train_loss: 0.3135487593790069, train_acc: 0.8969873663751214
val_loss: 0.35838218350452167, val_acc: 0.8877551020408163
Best model saved with validation accuracy: 0.8878
____EPOCH____: 8


Training: 100%|██████████| 258/258 [02:29<00:00,  1.73it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.79it/s]


train_loss: 0.2910494542706928, train_acc: 0.904944120505345
val_loss: 0.34580463938509176, val_acc: 0.8926141885325559
Best model saved with validation accuracy: 0.8926
____EPOCH____: 9


Training: 100%|██████████| 258/258 [02:34<00:00,  1.67it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.80it/s]


train_loss: 0.2729263093659203, train_acc: 0.9115646258503401
val_loss: 0.364964855670118, val_acc: 0.8775510204081632
____EPOCH____: 10


Training: 100%|██████████| 258/258 [02:29<00:00,  1.73it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.80it/s]


train_loss: 0.25933706520018474, train_acc: 0.9134475218658892
val_loss: 0.3553833037151424, val_acc: 0.8824101068999028
____EPOCH____: 11


Training: 100%|██████████| 258/258 [02:35<00:00,  1.66it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.52it/s]


train_loss: 0.23984988628949092, train_acc: 0.922740524781341
val_loss: 0.3491234626445423, val_acc: 0.8867832847424684
____EPOCH____: 12


Training: 100%|██████████| 258/258 [02:39<00:00,  1.61it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.52it/s]


train_loss: 0.23668691261053318, train_acc: 0.9217079689018465
val_loss: 0.33164035541207265, val_acc: 0.8882410106899903
____EPOCH____: 13


Training: 100%|██████████| 258/258 [02:40<00:00,  1.61it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.51it/s]


train_loss: 0.21694270442479785, train_acc: 0.9281462585034014
val_loss: 0.34826831211154036, val_acc: 0.880466472303207
____EPOCH____: 14


Training: 100%|██████████| 258/258 [02:37<00:00,  1.64it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.79it/s]


train_loss: 0.21574151146689935, train_acc: 0.9289965986394558
val_loss: 0.3499103115113095, val_acc: 0.8770651117589893
____EPOCH____: 15


Training: 100%|██████████| 258/258 [02:29<00:00,  1.73it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.82it/s]


train_loss: 0.1994000227707352, train_acc: 0.9336734693877551
val_loss: 0.36235078233787454, val_acc: 0.880466472303207
____EPOCH____: 16


Training: 100%|██████████| 258/258 [02:37<00:00,  1.64it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.51it/s]


train_loss: 0.19055609803273912, train_acc: 0.935617103984451
val_loss: 0.3571663820940505, val_acc: 0.8877551020408163
____EPOCH____: 17


Training: 100%|██████████| 258/258 [02:39<00:00,  1.61it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.51it/s]


train_loss: 0.18006470161548974, train_acc: 0.9410228377065112
val_loss: 0.34500170538909697, val_acc: 0.8858114674441205
____EPOCH____: 18


Training: 100%|██████████| 258/258 [02:36<00:00,  1.65it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.80it/s]


train_loss: 0.1725563365093589, train_acc: 0.9434523809523809
val_loss: 0.349508328502681, val_acc: 0.8824101068999028
____EPOCH____: 19


Training: 100%|██████████| 258/258 [02:29<00:00,  1.73it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.80it/s]


train_loss: 0.1609069354709082, train_acc: 0.9455174927113703
val_loss: 0.3421132321405225, val_acc: 0.8872691933916423
____EPOCH____: 20


Training: 100%|██████████| 258/258 [02:38<00:00,  1.62it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.51it/s]


train_loss: 0.1550906016857909, train_acc: 0.9491010689990281
val_loss: 0.3687499892853555, val_acc: 0.8794946550048591
____EPOCH____: 21


Training: 100%|██████████| 258/258 [02:40<00:00,  1.61it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.50it/s]


train_loss: 0.14641563135287042, train_acc: 0.952259475218659
val_loss: 0.3712997711087107, val_acc: 0.8751214771622935
____EPOCH____: 22


Training: 100%|██████████| 258/258 [02:39<00:00,  1.62it/s]
val: 100%|██████████| 33/33 [00:13<00:00,  2.52it/s]


train_loss: 0.14270744521978182, train_acc: 0.9518343051506317
val_loss: 0.3670051886805988, val_acc: 0.879980563654033
____EPOCH____: 23


Training: 100%|██████████| 258/258 [02:37<00:00,  1.64it/s]
val: 100%|██████████| 33/33 [00:11<00:00,  2.79it/s]


train_loss: 0.13378734123486705, train_acc: 0.9548104956268222
val_loss: 0.3943253467683078, val_acc: 0.8756073858114675
____EPOCH____: 24


Training:   3%|▎         | 8/258 [00:04<02:30,  1.67it/s]


KeyboardInterrupt: 

In [16]:
#TEST
val_epoch(model, test_loader, criterion, device)

val: 100%|██████████| 33/33 [00:15<00:00,  2.16it/s]


(0.22577127589834212, 0.9271137026239067)