# Tarea 2 - Procesamiento distribuido y redes neuronales profundas
**Integrantes:** 

Camila Goméz Nazal -
Ignacio Zurita Tapia

In [2]:
import torch
import numpy as np
from torchvision.datasets import DatasetFolder
from torchvision.transforms import Compose, RandomApply, RandomHorizontalFlip, Resize, RandomRotation, Lambda
from torch.utils.data import DataLoader, Sampler, RandomSampler
from torch.utils.data.sampler import SubsetRandomSampler
from IPython.display import HTML
from torchvision import get_image_backend
seed = 81818 
torch.random.manual_seed(seed)

<torch._C.Generator at 0x206b19b7e10>

## Carga y transformación de datos


#### 1. Dataset Folder

In [3]:
def pil_loader(path):
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')
            
def accimage_loader(path):
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)

def loader(path):
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)

In [4]:
scale = Lambda(lambda x: torch.div(x, torch.max(x)))
brightness = Lambda(lambda x: torch.mult(x, torch.FloatTensor(x.shape[0], x.shape[1], x.shape[2]).uniform_(1.2, 1.5)))

In [5]:
root = 'ChestXRay2017/chest_xray'
extensions = ('.jpeg')
transform = Compose([Resize([224,224]),
                     scale,
                     RandomHorizontalFlip(0.5),
                     RandomRotation([-20,20]),
                     brightness])

train = DatasetFolder(root + '/train', loader, extensions, transform)
test = DatasetFolder(root + '/test', loader, extensions, transform)

#### 2. Cantidad de muestras en cada clase en train y test:



In [6]:
HTML(filename='distribucion_datos.html')

Unnamed: 0,Normal,NeumonÃ­a
Train,1349,3884
Test,234,390


#### 3.

In [7]:
etiquetas_prueba = list(zip(*test.samples))[1]
l1=len(test.samples)
ind_test = np.random.choice(l1, l1 )
ind_test.shape

(624,)

In [8]:
l =len(train.samples)
np.random.seed(0)
indices = np.random.choice(l, l, replace=False)
indices.shape

(5232,)

In [9]:
ind_train = indices[:int(0.8*l)]
ind_val   = indices[int(0.8*l):]
print(ind_train.shape)
print(ind_val.shape)

(4185,)
(1047,)


In [10]:
val_samples = [train.samples[i] for i in ind_val]
etiquetas_val  = list(zip(*val_samples))[1]
print(etiquetas_val.count(0))
print(etiquetas_val.count(1))

263
784


In [11]:
class ReplicarMuestreoDePrueba(Sampler):
  def __init__(self, etiquetas_prueba, indices_val, etiquetas_val):
    self.etiquetas_prueba = etiquetas_prueba
    self.indices_val      = indices_val
    self.etiquetas_val    = etiquetas_val
  def __iter__(self):
    dist_test   = self.etiquetas_prueba.count(0)/self.etiquetas_prueba.count(1)
    dist_val    = int(self.etiquetas_val.count(1)*dist_test)
    nro_etiq_nuevas = dist_val - self.etiquetas_val.count(0) 
    c=0
    for i in range(len(etiquetas_val)):
      if etiquetas_val[i]==0:
          self.indices_val.append(self.indices_val[i])
          c+=1
      if c==nro_etiq_nuevas:
        break
    return iter(self.indices_val)

c = ReplicarMuestreoDePrueba(etiquetas_prueba,list(ind_val),etiquetas_val)
it = c.__iter__()

#### 4.

In [12]:
train_sampler = SubsetRandomSampler(ind_train)
valid_sampler = SubsetRandomSampler(list(it))
test_sampler = RandomSampler(ind_test)


train_loader = torch.utils.data.DataLoader(train.samples, batch_size=16, 
                                           sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(train.samples, batch_size=16,
                                                sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test.samples, batch_size=16, 
                                           sampler=test_sampler)

print(len(train_loader))
print(len(valid_loader))
print(len(test_loader))

262
79
39


## Redes convolucionales profundas

1.

In [27]:
import torch.nn as nn
class DWSepConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, bias=True):
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=1,
                                   padding=padding, bias=bias, groups = in_channels)
        self.pointwise = nn.Conv2d(in_channels , out_channels, kernel_size=1)
    def forward(self,x):
        D = self.depthwise(x)
        P = self.pointwise(D)
        return P
        

2.

In [28]:
class VGG16DWSep(nn.Module):
    def __init__(self):
        super(VGG16DWSep, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(3, 64,kernel_size=3,stride=1,padding=1),
            nn.Conv2d(64,64,kernel_size=3,stride=1,padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            DWSepConv2d(64,128,kernel_size=3, padding=1),
            DWSepConv2d(128,128,kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            DWSepConv2d(128,256,kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            DWSepConv2d(256,256,kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            DWSepConv2d(256,256,kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            DWSepConv2d(256,512,kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            DWSepConv2d(512,512,kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Flatten(),
            nn.Linear(512, 1024),
            nn.Dropout(p=0.7),
            nn.Linear(1024,512),
            nn.Dropout(p=0.5),
            nn.Linear(512,2)
        )
        
    def forward(self,x):
        return self.layers(x)
         

3.

In [30]:
import torchvision.models as models
vgg16 = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\camil/.cache\torch\checkpoints\vgg16-397923af.pth


HBox(children=(IntProgress(value=0, max=553433881), HTML(value='')))




In [32]:
dir(vgg16)

['__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_name',
 '_initialize_weights',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_modules',
 '_named_members',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_version',
 'add_module',
 'apply',
 'avgpool',
 'bfloat16',
 'buffers',
 'children',
 'classifier',
 'cpu',
 'cuda',
 'double',
 'dump_patches',
 'eval',
 'extra_repr',
 'features',
 'fl

In [33]:
vgg16.parameters

<bound method Module.parameters of VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size

## Interpretabilidad