In [127]:
import torch
import torch.nn as nn


![](/home/mskang/hyeokjong/object_detection/YOLO_v1/image/1.PNG)

In [128]:
architecture_config = [  # kernel_size in_channel out_channel stride padding
    (7, 64, 2, 3),
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],  # 4 is number of times to repeat
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],  # 2 is number of times to repeat
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1),
]

In [129]:
class Conv(nn.Module):
    def __init__(self, in_c, out_c, k, s, p):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(in_channels = in_c, out_channels = out_c, kernel_size = k, stride = s, padding = p, bias = False),
            nn.BatchNorm2d( num_features = out_c),
            nn.LeakyReLU(0.1)
        )

        
    def forward(self, x):
        return self.conv(x)
        
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

In [130]:
test = Conv(3,64,3,2,1)
x = torch.randn(32,3,448,448)
output = test(x)
print(output.shape)

torch.Size([32, 64, 224, 224])


In [131]:
for i,j in enumerate(architecture_config):
    print(i,j, type(j))

0 (7, 64, 2, 3) <class 'tuple'>
1 M <class 'str'>
2 (3, 192, 1, 1) <class 'tuple'>
3 M <class 'str'>
4 (1, 128, 1, 0) <class 'tuple'>
5 (3, 256, 1, 1) <class 'tuple'>
6 (1, 256, 1, 0) <class 'tuple'>
7 (3, 512, 1, 1) <class 'tuple'>
8 M <class 'str'>
9 [(1, 256, 1, 0), (3, 512, 1, 1), 4] <class 'list'>
10 (1, 512, 1, 0) <class 'tuple'>
11 (3, 1024, 1, 1) <class 'tuple'>
12 M <class 'str'>
13 [(1, 512, 1, 0), (3, 1024, 1, 1), 2] <class 'list'>
14 (3, 1024, 1, 1) <class 'tuple'>
15 (3, 1024, 2, 1) <class 'tuple'>
16 (3, 1024, 1, 1) <class 'tuple'>
17 (3, 1024, 1, 1) <class 'tuple'>


In [142]:
class Yolov1(nn.Module):
    def __init__(self, config):
        super().__init__()
        
        self.config = config
        self.convs = self.make_block(self.config)
        self.fc = nn.Sequential(
            Flatten(),
            nn.Linear(1024*7*7, 496),
            nn.Dropout(0.0),
            nn.LeakyReLU(0.1),
            nn.Linear(496, 7*7*(20+2*5))            
        )



    def make_block(self, config):
        layer = []
        in_channel = 3

        for i in config:

            if type(i) == tuple:
                layer += [ Conv ( in_c = in_channel, out_c = i[1], k = i[0], s = i[2], p = i[3] ) ]
                in_channel = i[1]
            
            elif type(i) == str:
                layer += [ nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)) ]

            elif type(i) == list:
                c1 = i[0]
                c2 = i[1]
                repeat = i[2]

                for i in range(repeat):
                    layer += [ Conv ( in_c = in_channel, out_c = c1[1], k = c1[0], s = c1[2], p = c1[3] ) ]
                    in_channel = c1[1]
                    layer += [ Conv ( in_c = in_channel, out_c = c2[1], k = c2[0], s = c2[2], p = c2[3] ) ]
                    in_channel = c2[1]
        
        return nn.Sequential( *layer )


    def forward(self, x):
        x = self.convs(x)
        x = self.fc(x)
        return x



In [147]:
model = Yolov1(architecture_config)
model

def test():
    model = Yolov1(architecture_config)
    x = torch.empty(2,3,448,448)
    print(model(x).shape)

test()


torch.Size([2, 1470])


In [149]:
'''
import torch.onnx
params = model.state_dict()
dummy_data = torch.empty(1, 3, 448, 448, dtype = torch.float32)
torch.onnx.export(model, dummy_data, "/home/mskang/hyeokjong/object_detection/YOLO_v1/output.onnx")
'''