## MA_DL : Codierung 128bit (full)

In [1]:
!pip install facenet-pytorch # due to issues with the google cloud service



In [2]:
import torch
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.models as models
from PIL import Image

from facenet_pytorch import MTCNN, InceptionResnetV1

dtype = torch.cuda.FloatTensor

## Model

In [3]:
class MultiLabel(nn.Sequential):
    def __init__(self, input_dim=512, output_dim=128):
        super(MultiLabel, self).__init__()
        self.l1 = nn.Linear(input_dim, input_dim)
        self.l2 = nn.Linear(input_dim, input_dim)
        self.l5 = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = self.l5(x)
        return torch.sigmoid(x)

In [4]:
model = MultiLabel()
model.cuda()

MultiLabel(
  (l1): Linear(in_features=512, out_features=512, bias=True)
  (l2): Linear(in_features=512, out_features=512, bias=True)
  (l5): Linear(in_features=512, out_features=128, bias=True)
)

## Data

In [5]:
scaler = transforms.Resize((224, 224))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()
to_image = transforms.ToPILImage()
transform = transforms.Compose([
    scaler, 
    to_tensor,
#    normalize
])

In [6]:
train_ds = datasets.ImageFolder('../../data/large_ds/train/', transform=transform)
train_ds.idx_to_class = {i:c for c, i in train_ds.class_to_idx.items()}
train_dl = DataLoader(train_ds, batch_size=1, num_workers=4)

valid_ds = datasets.ImageFolder('../../data/large_ds/valid/', transform=transform)
valid_ds.idx_to_class = {i:c for c, i in valid_ds.class_to_idx.items()}
valid_dl = DataLoader(valid_ds, batch_size=1, num_workers=4)

## Gesichtserkennung

In [7]:
mtcnn = MTCNN(
    image_size=160,
    thresholds=[0.6, 0.7, 0.7],
#        factor=0.709,
#        prewhiten=True,
    keep_all=True,
    device=torch.device('cuda')
    )

## Embeddings 

In [8]:
def get_vectors(imgs):
    # 2. Create a PyTorch Variable with the transformed image
    t_img = Variable(imgs).type(dtype)  
    
    # 3. Create a vector of zeros that will hold our feature vector
    #    The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(imgs.shape[0], 512)    

    # 4. Define a function that will copy the output of a layer
    def copy_data(m, i, o):
      my_embedding.copy_(o.data.squeeze())    
    
    # 5. Attach that function to our selected layer
    h = layer.register_forward_hook(copy_data)    
    # 6. Run the model on our transformed image
    resnet(t_img)    
    # 7. Detach our copy function from the layer
    h.remove()    
    # 8. Return the feature vector
    return my_embedding

In [9]:
# Alternativ resnet
#vggf_resnet = InceptionResnetV1(pretrained='vggface2').eval()
#img_embedding = vggf_resnet(img_cropped.unsqueeze(0))

In [10]:
# Load the pretrained model
resnet = models.resnet18(pretrained=True)
resnet.cuda()
# Use the model object to select the desired layer
layer = resnet._modules.get('avgpool')
_ = resnet.eval()

## Training

In [11]:
def calculate_mistakes(pred, target):
    #pred = pred[0]
#    print('calculating mistakes : ')
#    print('   pred: ' + str(pred.shape))
#    print('   target: ' + str(target.shape))
    
    mistakes = 0
    
    if len(pred) != len(target):
        raise Exception('sizes of both tensors must match')
        
    for x,y in zip(pred, target):
        if round(x.item()) != y.item():
            mistakes = mistakes + 1
        
    return mistakes

In [12]:
loss_func = nn.BCELoss() #MultiLabelSoftMarginLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=1e-5) #weight_decay=1e-4 

In [13]:
import matplotlib.pyplot as plt

In [None]:
epochs = 400
for e in range(epochs):
    print("======================")
    print("Epoch : " + str(e))
    epoch_loss = 0
    epoch_mistakes = 0
    train_size = len(train_dl)
    
    # activate train mode
    model.train()

    for index, (data, target) in enumerate(train_dl):
        faces, prob = mtcnn(to_image(data[0]), return_prob=True)
 
 #       if faces is None:
 #           print('no face')
 #           continue
 #       else:
 #           print('face found')
                
        target_t = torch.cuda.FloatTensor([int(x) for x in train_ds.idx_to_class[target[0].item()]])

        emb = get_vectors(faces)    
        data_v   = Variable(emb[0], requires_grad=False).type(dtype)
        target_v = Variable(target_t, requires_grad=False).type(dtype)

        # forward
#        print('datav: ' + str(data_v.shape))
        pred = model.forward(data_v)
#        print('pred: ' + str(pred.shape))
        # zero grads
        optimizer.zero_grad()
        # calculate loss
        loss = loss_func(pred, target_v.float())
        epoch_loss = epoch_loss + loss
        # back prop
        loss.backward()
        optimizer.step()
        
        epoch_mistakes = epoch_mistakes + calculate_mistakes(pred, target_t)
        
    print("Train loss : " + str(epoch_loss.item()))
    print("Average errors : " + str(epoch_mistakes/train_size))
    
    # activate eval mode
    model.eval()
    
    valid_loss = 0
    valid_mistakes = 0
    valid_size = len(valid_dl)
    for index, (data, target) in enumerate(valid_dl):
        faces, prob = mtcnn(to_image(data[0]), return_prob=True)
        target_t = torch.FloatTensor([int(x) for x in valid_ds.idx_to_class[target[0].item()]])
        
        emb = get_vectors(faces) 
        data_v   = Variable(emb[0], requires_grad=False).type(dtype)
        target_v = Variable(target_t, requires_grad=False).type(dtype)

        pred = model.forward(data_v)
        loss = loss_func(pred, target_v.float())
        valid_loss = valid_loss + loss
        valid_mistakes = valid_mistakes + calculate_mistakes(pred, target_t)
        
        
    print("Valid loss : " + str(valid_loss.item()))
    print("Average valid errors : " + str(valid_mistakes/valid_size))

Epoch : 0
Train loss : 7185.5205078125
Average errors : 57.55289139633286
Valid loss : 1847.111572265625
Average valid errors : 63.174364896073904
Epoch : 1
Train loss : 7173.94677734375
Average errors : 57.33530794546309
Valid loss : 1846.30322265625
Average valid errors : 62.985373364126254
Epoch : 2
Train loss : 7152.490234375
Average errors : 56.943864598025385
Valid loss : 1849.8692626953125
Average valid errors : 62.985758275596616
Epoch : 3
Train loss : 7140.49951171875
Average errors : 56.745933239304186
Valid loss : 1853.8673095703125
Average valid errors : 62.92532717474981
Epoch : 4
Train loss : 7123.25732421875
Average errors : 56.5444287729196
Valid loss : 1856.456787109375
Average valid errors : 62.87182448036952
Epoch : 5
Train loss : 7110.06201171875
Average errors : 56.44438175834509
Valid loss : 1855.7462158203125
Average valid errors : 62.86374133949192
Epoch : 6
Train loss : 7097.13671875
Average errors : 56.32552891396333
Valid loss : 1854.773681640625
Average vali

Valid loss : 1890.4609375
Average valid errors : 61.72594303310239
Epoch : 49
Train loss : 6802.5263671875
Average errors : 52.7594734367654


In [None]:
torch.save(model.state_dict(), 'dl128b_v1_400.pt')