In [1]:
import time
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
from model import SSD300, MultiBoxLoss
from datasets import PascalVOCDataset
from utils import *

In [2]:
model = SSD300(n_classes=10)


Loaded base model.



In [None]:
# Frizar: conv 4_3, conv 7, conv 8_2, conv 9_2, conv 10_2, conv 11_2 + pred convs

In [3]:
model

SSD300(
  (base): VGGBase(
    (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv4_1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4_2): Conv2d(512, 512, kernel_size=(3, 3), 

In [None]:
for child in model.children():
    for param in child.parameters():
        #param.requires_grad = False
        print(param.children())

In [49]:
for child in model.children():
    for layer in child.children():
        for i in layer.parameters():
            i.requires_grad = False

In [81]:
#  conv 4_3, conv 7, conv 8_2, conv 9_2, conv 10_2, conv 11_2 + pred convs
layers = [model.base.conv4_3, model.base.conv7, model.aux_convs.conv8_2, model.aux_convs.conv9_2, model.aux_convs.conv10_2, model.aux_convs.conv11_2]

In [82]:
for l in layers:
    for i in l.parameters():
        i.requires_grad = True

In [91]:
model.pred_convs

PredictionConvolutions(
  (loc_conv4_3): Conv2d(512, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (loc_conv7): Conv2d(1024, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (loc_conv8_2): Conv2d(512, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (loc_conv9_2): Conv2d(256, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (loc_conv10_2): Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (loc_conv11_2): Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cl_conv4_3): Conv2d(512, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cl_conv7): Conv2d(1024, 60, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cl_conv8_2): Conv2d(512, 60, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cl_conv9_2): Conv2d(256, 60, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cl_conv10_2): Conv2d(256, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cl_conv11_2): Conv2d(256, 40, kernel_size=(3, 3), str

In [93]:
for i in model.pred_convs.children():
    for j in i.parameters():
        j.requires_grad = True

In [85]:
for l in model:
    print(l)

VGGBase(
  (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv4_1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4_2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4_3)

In [21]:
layer.parameters()

<generator object Module.parameters at 0x7f897359b120>

In [4]:
for param_name, param in model.named_parameters():
    print(param_name)
    if param_name == 'base.conv1_1.weight': 
        print(param.shape)
        break

rescale_factors
base.conv1_1.weight
torch.Size([64, 3, 3, 3])


In [5]:
biases = [1,2,3]
not_biases = [4,5,6]
lr = .4

In [6]:
_ = [{'params': biases, 'lr': 2 * lr}, {'params': not_biases}]

In [7]:
_[0]['lr']

0.8

In [8]:
!cat create_data_lists.py

from utils import create_data_lists

#voc07_path = 'data/VOC2007'
voc07_path = 'data/FedeSet'
#voc12_path = 'data/VOC2012'
voc12_path = 'data/Damiset'

if __name__ == '__main__':
    create_data_lists(voc07_path= voc07_path,
                      voc12_path= voc12_path,
                      output_folder='./')


In [9]:
!ls

arial.ttf		 input			TRAIN_objects.json
checkpoints		 label_map (fede).json	train.py
create_data_lists.py	 label_map.json		Untitled.ipynb
data			 LICENSE		utils.py
datasets.py		 model.py		utils.pyc
detect_image.py		 outputs		VOCdevkit
detect.py		 __pycache__		VOCdevkit (2)
detect_vid.py		 QueHacer.odt		VOCdevkit (3)
eval.py			 README.md		VOCtest_06-Nov-2007.tar
fede_detect_image.ipynb  TEST_images.json	VOCtrainval_06-Nov-2007.tar
fede_pruebas.ipynb	 TEST_objects.json	VOCtrainval_11-May-2012.tar
img			 TRAIN_images.json


In [10]:
from datasets import PascalVOCDataset

In [32]:
# Hay que correr create_data_lists.py antes por afuera. Debería crear un jsons, por ejemplo: TRAIN_images.json
# en definitva no es otra cosa más que una lista de paths de imágenes
import json

f = open('label_map.json',)
data = json.load(f)

In [33]:
type(data), len(data)

(dict, 21)

In [35]:
data

{'aeroplane': 1,
 'bicycle': 2,
 'bird': 3,
 'boat': 4,
 'bottle': 5,
 'bus': 6,
 'car': 7,
 'cat': 8,
 'chair': 9,
 'cow': 10,
 'diningtable': 11,
 'dog': 12,
 'horse': 13,
 'motorbike': 14,
 'person': 15,
 'pottedplant': 16,
 'sheep': 17,
 'sofa': 18,
 'train': 19,
 'tvmonitor': 20,
 'background': 0}

In [20]:
# Esto es una parte de train.py, pero antes hay que correr create_data_lists.py
# Además a create_data_lists.py le tuve que cambiar el path del dataset

# Custom dataloaders
keep_difficult = True
data_folder = './'

# Este constructor de dataset es la posta.
# Toma un json con los paths de las imagenes y un json con los boxes, clasificaciones y difcultades
# Y arma el objeto dataset con esto

train_dataset = PascalVOCDataset(data_folder,
                                 split='train',
                                 keep_difficult=keep_difficult)

In [21]:
# train_dataset ahora tiene tuplas de la pinta:
train_dataset[0]
# que representan la imagen, las cajas, y las categorías (la cuarta creo que es un tag de imagen dificil)

(tensor([[[-0.0116, -0.0116, -0.0116,  ..., -0.0116, -0.0116, -0.0116],
          [-0.0116, -0.0116, -0.0116,  ..., -0.0116, -0.0116, -0.0116],
          [-0.0116, -0.0116, -0.0116,  ..., -0.0116, -0.0116, -0.0116],
          ...,
          [-0.0116, -0.0116, -0.0116,  ..., -0.0116, -0.0116, -0.0116],
          [-0.0116, -0.0116, -0.0116,  ..., -0.0116, -0.0116, -0.0116],
          [-0.0116, -0.0116, -0.0116,  ..., -0.0116, -0.0116, -0.0116]],
 
         [[-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
          [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
          [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
          ...,
          [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
          [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
          [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049]],
 
         [[-0.0092, -0.0092, -0.0092,  ..., -0.0092, -0.0092, -0.0092],
          [-0.0092, -0.0092,

In [None]:
batch_size = 8
workers = 4

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                           collate_fn=train_dataset.collate_fn, num_workers=workers,
                                           pin_memory=True)  # note that we're passing the collate function here

In [None]:
for i, (images, boxes, labels, _) in enumerate(train_loader):
    break

In [None]:
# [batch, channels, dim1, dim2]
images.shape

In [None]:
#boxes tiene los ground truths
print(len(boxes))
boxes[0].shape

In [None]:
# labels tiene la clasificación verdadera de cada box
print(len(labels))
labels[0]

In [28]:
string = '   Hola Qué Tal??   '
string.lower().strip()

'hola qué tal??'