# Лабораторная работа 1

In [1]:
!pip install torch torchvision

Collecting typing-extensions>=4.10.0 (from torch)
  Obtaining dependency information for typing-extensions>=4.10.0 from https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl.metadata
  Using cached typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Using cached typing_extensions-4.15.0-py3-none-any.whl (44 kB)
Installing collected packages: typing-extensions
  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.7.1
    Uninstalling typing_extensions-4.7.1:
      Successfully uninstalled typing_extensions-4.7.1
Successfully installed typing-extensions-4.15.0


In [5]:
!pip install typing_extensions==4.7.1 --upgrade



In [1]:
import os
import pandas as pd

In [2]:
root_dir = "confirmed_fronts"

data = []
for brand in os.listdir(root_dir):
    brand_path = os.path.join(root_dir, brand)
    if not os.path.isdir(brand_path): continue
    for year in os.listdir(brand_path):
        year_path = os.path.join(brand_path, year)
        if not os.path.isdir(year_path): continue
        for fname in os.listdir(year_path):
            if not fname.endswith(('.jpg', '.jpeg', '.png')): continue
            parts = fname.split('$$')
            if len(parts) < 4: continue
            color = parts[3].strip().lower()
            image_path = os.path.join(brand_path, year, fname)
            data.append({"img_path": image_path, "color": color})

df = pd.DataFrame(data)
print(df['color'].value_counts())

top_colors = df['color'].value_counts().head(7).index.tolist()
df = df[df['color'].isin(top_colors)]
df = df.reset_index(drop=True)
print(df['color'].value_counts())

df.to_csv("all_data.csv", index=False)

color
black          14317
grey            9474
white           9395
blue            8483
silver          7770
red             6095
unlisted        1516
brown            911
green            777
yellow           667
beige            600
orange           559
purple           362
bronze           329
gold             217
multicolour      196
pink              87
maroon            26
turquoise         26
magenta            9
burgundy           9
indigo             1
navy               1
Name: count, dtype: int64
color
black       14317
grey         9474
white        9395
blue         8483
silver       7770
red          6095
unlisted     1516
Name: count, dtype: int64


In [3]:
from sklearn.model_selection import train_test_split
import shutil

df = pd.read_csv("all_data.csv")

df_train, df_tmp = train_test_split(df, test_size=0.3, stratify=df['color'], random_state=42)
df_val, df_test = train_test_split(df_tmp, test_size=0.5, stratify=df_tmp['color'], random_state=42)

def copy_images(df, out_root, split):
    for _, row in df.iterrows():
        src = row['img_path']
        dst_dir = os.path.join(out_root, split, row['color'])
        os.makedirs(dst_dir, exist_ok=True)
        dst = os.path.join(dst_dir, os.path.basename(src))
        shutil.copyfile(src, dst)

copy_images(df_train, "data_split", "train")
copy_images(df_val, "data_split", "val")
copy_images(df_test, "data_split", "test")

In [4]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score
from tqdm.auto import tqdm

In [5]:
IMG_SIZE = 224
BATCH_SIZE = 32

normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                [0.229, 0.224, 0.225])

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize
])
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    normalize
])

data_dir = 'data_split'
train_ds = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transform)
val_ds = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=val_transform)
test_ds = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print("Классы:", train_ds.classes)
num_classes = len(train_ds.classes)

Классы: ['black', 'blue', 'grey', 'red', 'silver', 'unlisted', 'white']


In [23]:
class MyCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * (IMG_SIZE//8) * (IMG_SIZE//8), 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else 'cpu'))
model0 = MyCNN(num_classes).to(device)


In [7]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    all_targets, all_preds, total, correct = [], [], 0, 0
    for x, y in tqdm(loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        preds = out.argmax(1)
        all_targets.extend(y.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        correct += (preds == y).sum().item()
        total += y.size(0)
    f1 = f1_score(all_targets, all_preds, average='macro')
    return correct/total, f1

@torch.no_grad()
def evaluate(model, loader, criterion):
    model.eval()
    all_targets, all_preds, total, correct = [], [], 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        preds = out.argmax(1)
        all_targets.extend(y.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        correct += (preds == y).sum().item()
        total += y.size(0)
    f1 = f1_score(all_targets, all_preds, average='macro')
    return correct/total, f1


In [24]:
def fit(model, train_loader, val_loader, epochs=10, lr=1e-3):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    best_f1 = 0
    for epoch in range(epochs):
        tr_acc, tr_f1 = train_one_epoch(model, train_loader, optimizer, criterion)
        val_acc, val_f1 = evaluate(model, val_loader, criterion)
        print(f"Эпоха {epoch+1} | train acc {tr_acc:.3f}, F1_macro {tr_f1:.3f} | val acc {val_acc:.3f}, F1_macro {val_f1:.3f}")
    print(f"Лучшая F1_macro на валидации: {best_f1:.3f}")

fit(model0, train_loader, val_loader, epochs=10)
torch.save(model0.state_dict(), 'best_simplecnn.pth')


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 1 | train acc 0.728, F1_macro 0.635 | val acc 0.790, F1_macro 0.686


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 2 | train acc 0.801, F1_macro 0.700 | val acc 0.823, F1_macro 0.718


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 3 | train acc 0.826, F1_macro 0.724 | val acc 0.835, F1_macro 0.731


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 4 | train acc 0.842, F1_macro 0.740 | val acc 0.843, F1_macro 0.738


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 5 | train acc 0.853, F1_macro 0.757 | val acc 0.843, F1_macro 0.741


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 6 | train acc 0.868, F1_macro 0.775 | val acc 0.851, F1_macro 0.753


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 7 | train acc 0.878, F1_macro 0.792 | val acc 0.855, F1_macro 0.755


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 8 | train acc 0.891, F1_macro 0.813 | val acc 0.849, F1_macro 0.750


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 9 | train acc 0.900, F1_macro 0.833 | val acc 0.855, F1_macro 0.762


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 10 | train acc 0.907, F1_macro 0.840 | val acc 0.856, F1_macro 0.761
Лучшая F1_macro на валидации: 0.762


In [10]:
model1 = models.resnet18(weights='IMAGENET1K_V1')
model1.fc = nn.Linear(model1.fc.in_features, num_classes)
model1 = model1.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/tulenevvadim/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:01<00:00, 40.0MB/s]


In [11]:
model2 = models.mobilenet_v2(weights='IMAGENET1K_V1')
model2.classifier[1] = nn.Linear(model2.classifier[1].in_features, num_classes)
model2 = model2.to(device)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /Users/tulenevvadim/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 23.2MB/s]


In [12]:
fit(model1, train_loader, val_loader, epochs=7, lr=2e-4)
torch.save(model1.state_dict(), 'best_resnet.pth')

  0%|          | 0/1248 [00:03<?, ?it/s]

Эпоха 1 | train acc 0.846, F1_macro 0.744 | val acc 0.871, F1_macro 0.766


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 2 | train acc 0.893, F1_macro 0.797 | val acc 0.903, F1_macro 0.823


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 3 | train acc 0.913, F1_macro 0.820 | val acc 0.888, F1_macro 0.792


  0%|          | 0/1248 [00:02<?, ?it/s]

IOStream.flush timed out


Эпоха 4 | train acc 0.924, F1_macro 0.841 | val acc 0.887, F1_macro 0.815


  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 5 | train acc 0.935, F1_macro 0.860 | val acc 0.903, F1_macro 0.822


  0%|          | 0/1248 [00:03<?, ?it/s]

Эпоха 6 | train acc 0.946, F1_macro 0.885 | val acc 0.903, F1_macro 0.816


  0%|          | 0/1248 [00:03<?, ?it/s]

Эпоха 7 | train acc 0.954, F1_macro 0.904 | val acc 0.909, F1_macro 0.830
Лучшая F1_macro на валидации: 0.830


In [13]:
fit(model2, train_loader, val_loader, epochs=7, lr=2e-4)
torch.save(model2.state_dict(), 'best_mobilenet.pth')

  0%|          | 0/1248 [00:02<?, ?it/s]

Эпоха 1 | train acc 0.843, F1_macro 0.742 | val acc 0.870, F1_macro 0.771


  0%|          | 0/1248 [00:02<?, ?it/s]

KeyboardInterrupt: 

In [26]:
torch.save(model1.state_dict(), 'best_resnet.pth')

OrderedDict([('features.0.0.weight',
              tensor([[[[ 1.3185e-02, -4.3213e-03,  1.4823e-02],
                        [ 3.2780e-02, -2.5385e-02,  6.8572e-03],
                        [ 1.0549e-02, -3.7347e-02, -1.4727e-02]],
              
                       [[ 7.9917e-03, -5.9146e-03,  1.5076e-02],
                        [ 1.9999e-02, -3.2863e-02, -2.0859e-03],
                        [ 1.1350e-02, -3.2956e-02, -7.8733e-03]],
              
                       [[-2.5234e-02, -2.0167e-02, -9.9620e-03],
                        [-1.1213e-02, -2.9266e-02, -1.5218e-02],
                        [-2.6531e-02, -3.3449e-02, -2.4215e-02]]],
              
              
                      [[[-5.9290e-02, -1.7451e-02,  2.4834e-02],
                        [ 1.3826e-01,  3.8424e-01,  4.6761e-02],
                        [-1.8215e-01, -2.7591e-01, -3.3435e-02]],
              
                       [[-6.4451e-02,  5.5549e-02,  3.7894e-03],
                        [ 2.7346e-01, 

In [29]:
@torch.no_grad()
def test_eval(model, loader, device):
    model.eval()
    all_targets, all_preds = [], []
    for x, y in tqdm(loader):
        x, y = x.to(device), y.to(device)
        preds = model(x).argmax(1)
        all_targets.extend(y.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
    f1 = f1_score(all_targets, all_preds, average='macro')
    print(f"Test F1_macro: {f1:.3f}")
    return f1

print('MySimpleCNN:')
model0.load_state_dict(torch.load('best_simplecnn.pth'))
test_eval(model0, test_loader, device)

cpu_device = torch.device('cpu')

print('ResNet18:')
model1.load_state_dict(torch.load('best_resnet.pth'))
test_eval(model1, test_loader, cpu_device)

print('MobileNetV2:')
model2.load_state_dict(torch.load('best_mobilenet.pth'))
test_eval(model2, test_loader, cpu_device)

MySimpleCNN:


  0%|          | 0/268 [00:02<?, ?it/s]

Test F1_macro: 0.762
ResNet18:


  0%|          | 0/268 [00:02<?, ?it/s]

Test F1_macro: 0.823
MobileNetV2:


  0%|          | 0/268 [00:02<?, ?it/s]

Test F1_macro: 0.794


0.7943191021030701

Рейтинг:
ResNet18 - 0.823
MobileNetV2 - 0.794
MySimpleCNN - 0.762


ResNet18 и MobileNetV2 очевидно лучше моей модельки, тк они предобучены и умеют выявлять образы из картинок
MySimpleCNN нужно было с нуля обучать
MobileNetV2 получилась хуже чем ResNet18, тк не вышло доучить. Предполагаю, что они бы были примерно одинаковые по качеству, если бы не долгое обучение