# Чекпоинт 3: Бейзлайн 1 (Faster RCNN)

## 0. Установка зависимостей и скачивание датасета (Stanford Cars Dataset)

In [None]:
!pip install --quiet gdown pycocotools

In [2]:
import os
import shutil
from pycocotools.coco import COCO
from torchvision import models
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import torch
from sklearn.metrics import classification_report, confusion_matrix

In [17]:
from classification_utils import SimpleBinaryDataset, train_model

In [4]:
#Скачиваем StanfordCarsDataset (positives)

#1. Скачаем zip по ID из Drive
!gdown 1x88IjoAE6CWQp0UqEDPX7i3KvWgADqJf -O /content/stanford_cars_dataset_rcnn.zip

#2. Распакуем архив
!unzip -q /content/stanford_cars_dataset_rcnn.zip -d /content/

#3. И удалим сам zip, чтобы оптимизировать место
!rm /content/stanford_cars_dataset_rcnn.zip

Downloading...
From (original): https://drive.google.com/uc?id=1x88IjoAE6CWQp0UqEDPX7i3KvWgADqJf
From (redirected): https://drive.google.com/uc?id=1x88IjoAE6CWQp0UqEDPX7i3KvWgADqJf&confirm=t&uuid=d5afd5f1-7690-434e-a94a-d653486aa75b
To: /content/stanford_cars_dataset_rcnn.zip
100% 2.00G/2.00G [00:13<00:00, 148MB/s]


In [5]:
#Скачиваем COCO (negatives)

#1. Скачиваем изображения
!wget -q http://images.cocodataset.org/zips/val2017.zip -O /content/val2017.zip
!unzip -q /content/val2017.zip -d /content/coco_images
!rm /content/val2017.zip

#2. Скачиваем аннотации и фильтруем negative
!wget -q http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip -q annotations_trainval2017.zip -d /content/coco_ann
!rm annotations_trainval2017.zip

coco = COCO("/content/coco_ann/annotations/instances_val2017.json")
all_ids = set(coco.getImgIds())
car_ids = set(coco.getImgIds(catIds=coco.getCatIds(['car'])))
neg_ids = list(all_ids - car_ids)[:5000]  # первые 5k негативов

loading annotations into memory...
Done (t=0.63s)
creating index...
index created!


In [6]:
#Копируем все фото Stanford в positive/train
POS_DIR = "/content/data_binary/positive/train"
os.makedirs(POS_DIR, exist_ok=True)
for split in ["train","test"]:
    src = f"/content/stanford_cars_dataset_rcnn/{split}/images"
    for fn in os.listdir(src):
        shutil.copy(os.path.join(src, fn), os.path.join(POS_DIR, fn))

In [7]:
# Копируем из локальных файлов в negative/train
NEG_DIR = "/content/data_binary/negative/train"
os.makedirs(NEG_DIR, exist_ok=True)

for img_id in neg_ids:
    img_info = coco.loadImgs(img_id)[0]
    filename = img_info['file_name']
    src_path = os.path.join("/content/coco_images/val2017", filename)
    dst_path = os.path.join(NEG_DIR, filename)
    
    if os.path.isfile(src_path):
        shutil.copy(src_path, dst_path)

## 2. Обучение

In [8]:
tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

dataset = SimpleBinaryDataset(
    pos_root="/content/data_binary/positive/train",
    neg_root="/content/data_binary/negative/train",
    transform=tf
)

train_n = int(0.8 * len(dataset))
val_n = len(dataset) - train_n
train_ds, val_ds = random_split(dataset, [train_n, val_n])

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
model.fc = torch.nn.Linear(model.fc.in_features, 2)

# 8) Обучаем модель (5 эпох)
model = train_model(model, train_loader, val_loader, device, epochs=5, lr=1e-4)

# 9) Сохраняем веса
torch.save(model, "binary_car_detector.pt")

Epoch 1/5 [Train]:   0%|          | 0/316 [00:00<?, ?it/s]

Epoch 1/5 [Val]:   0%|          | 0/79 [00:00<?, ?it/s]

1/5  TrainLoss=0.0459  TrainAcc=98.70%  ValAcc=99.68%


Epoch 2/5 [Train]:   0%|          | 0/316 [00:00<?, ?it/s]

Epoch 2/5 [Val]:   0%|          | 0/79 [00:00<?, ?it/s]

2/5  TrainLoss=0.0050  TrainAcc=99.88%  ValAcc=99.72%


Epoch 3/5 [Train]:   0%|          | 0/316 [00:00<?, ?it/s]

Epoch 3/5 [Val]:   0%|          | 0/79 [00:00<?, ?it/s]

3/5  TrainLoss=0.0045  TrainAcc=99.85%  ValAcc=99.60%


Epoch 4/5 [Train]:   0%|          | 0/316 [00:00<?, ?it/s]

Epoch 4/5 [Val]:   0%|          | 0/79 [00:00<?, ?it/s]

4/5  TrainLoss=0.0045  TrainAcc=99.90%  ValAcc=99.84%


Epoch 5/5 [Train]:   0%|          | 0/316 [00:00<?, ?it/s]

Epoch 5/5 [Val]:   0%|          | 0/79 [00:00<?, ?it/s]

5/5  TrainLoss=0.0045  TrainAcc=99.91%  ValAcc=99.76%


In [20]:
model.eval()
preds, labels = [], []
with torch.no_grad():
    for imgs, lbls in val_loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        outs = model(imgs)
        _, p = outs.max(1)
        preds.extend(p.cpu().tolist())
        labels.extend(lbls.cpu().tolist())

print(classification_report(labels, preds, target_names=["no_car","car"]))
print(confusion_matrix(labels, preds))

              precision    recall  f1-score   support

      no_car       1.00      0.99      1.00       900
         car       1.00      1.00      1.00      1623

    accuracy                           1.00      2523
   macro avg       1.00      1.00      1.00      2523
weighted avg       1.00      1.00      1.00      2523

[[ 895    5]
 [   1 1622]]
