
![IDAL](https://i.imgur.com/tIKXIG1.jpg)  

#**Máster en Inteligencia Artificial Avanzada y Aplicada:  IA^3**
---

In [2]:
# import libraries
import numpy as np
import torch
import torch.nn as nn

# Problemas sobre convoluciones: tamaños, padding y stride

Muy buena visualización sobre convoluciones, padding, stride y dilation en esta [dirección](https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md)


### Convoluciona una imagen de tamaño  1x256x256 para obtener un resultado de 1x252x84

In [3]:
# parameters
inChans  = 1 # B/N or RGB
imsize   = [256,256]
outChans = 1
krnSize  = 7 # should be an odd number
stride   = (1,3)
padding  = 1

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print(f'Expected size: {expectSize}')
print(f'Empirical size: {list(empSize)}')

Expected size: [  1 252  84]
Empirical size: [252, 84]


Ten en cuenta los siguientes detalles:
+ El tamaño de la imagen (imsize) se sumnistra con una lista: [alto,ancho].
+ El tamaño de filtro/kernel debe ser un valor impar, y es el mismo de ancho que de alto.
+ El padding tambien se aplica igual al ancho que al alto.
+ El desplazamiento (stride) puede ser distinto en vertical (para la altura) y en horizontal (para la anchura). Se suministra como una tupla: (stride1, stride2).
+ El tamaño final se redondea hacia abajo.
+ NO aplicamos capas de pooling, solo UNA convolución.

# A resolver

### 1) Convolucionar una imagen de tamaño 3x64x64 para obtener un resultado de 10x28x28

In [12]:
# parameters
inChans  = 3
imsize   = [64, 64]
outChans = 10
krnSize  = 9
stride   = (2, 2)
padding  = 0

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print("Required size: [10, 28, 28]")
print(f'Expected size: {expectSize}')
print(f'Empirical size: {list(empSize)}')

Required size: [10, 28, 28]
Expected size: [10 28 28]
Empirical size: [10, 28, 28]


### 2) Convolucionar una imagen de tamaño 3x196x96 para obtener un resultado de 5x66x49

---



In [22]:
# parameters
inChans  = 3
imsize   = [196, 96]
outChans = 5
krnSize  = 1
stride   = (3, 2)
padding  = 1

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print("Required size: [5, 66, 49]")
print(f'Expected size: {expectSize}')
print(f'Empirical size: {list(empSize)}')

Required size: [5, 66, 49]
Expected size: [ 5 66 49]
Empirical size: [5, 66, 49]


### 3) Convolucionar una imagen de tamaño 1x32x32 para obtener un resultado de 6x28x28

In [15]:
# note: these dimensions are the input -> first hidden layer of the famous LeNet-5

# parameters
inChans  = 1 
imsize   = [32, 32]
outChans = 6
krnSize  = 5
stride   = (1, 1)
padding  = 0

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print("Required size: [6, 28, 28]")
print(f'Expected size: {expectSize}')
print(f'Empirical size: {list(empSize)}')

Required size: [6, 28, 28]
Expected size: [ 6 28 28]
Empirical size: [6, 28, 28]


### 4) Convolucionar una imagen de tamaño 3x227x227 para obtener un resultado de 96x55x55

In [20]:
# note: these dimensions are the input -> first hidden layer of the famous AlexNet

# parameters
inChans  = 3
imsize   = [227, 227]
outChans = 96
krnSize  = 9
stride   = (4, 4)
padding  = 0

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print("Required size: [96, 55, 55]")
print(f'Expected size: {expectSize}')
print(f'Empirical size: {list(empSize)}')

Required size: [96, 55, 55]
Expected size: [96 55 55]
Empirical size: [96, 55, 55]


### 5) Convolucionar una imagen de tamaño 3x224x224 para obtener un resultado de 64x224x224

In [23]:
# note: these dimensions are the input -> first hidden layer of the famous VGG-16

# parameters
inChans  = 3
imsize   = [224, 224]
outChans = 64
krnSize  = 3
stride   = (1,1)
padding  = 1

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print("Required size: [64, 224, 224]")
print(f'Expected size: {expectSize}')
print(f'Empirical size: {list(empSize)}')

Required size: [64, 224, 224]
Expected size: [ 64 224 224]
Empirical size: [64, 224, 224]


# Soluciones (No hacer trampas!)

In [None]:
# 1)
inChans  = 3
imsize   = [64,64]
outChans = 10
krnSize  = 9
stride   = (2,2)
padding  = 0

# 2)
inChans  = 3
imsize   = [196,96]
outChans = 5
krnSize  = 5
stride   = (3,2)
padding  = 3

# 3)
inChans  = 1
imsize   = [32,32]
outChans = 6
krnSize  = 5
stride   = (1,1)
padding  = 0

# 4)
inChans  = 3
imsize   = [227,227]
outChans = 96
krnSize  = 11
stride   = (4,4)
padding  = 1

# 5)
inChans  = 3
imsize   = [224,224]
outChans = 64
krnSize  = 3
stride   = (1,1)
padding  = 1

# Referencias


+ Doc oficial : https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d
+ Curso **A deep understanding of deep learning**: https://www.udemy.com/course/deeplearning_x/
+ Padding y stride: https://towardsdatascience.com/covolutional-neural-network-cb0883dd6529
+ Padding y stride; https://d2l.ai/chapter_convolutional-neural-networks/padding-and-strides.html