In [1]:
import numpy as np
np.random.seed(0)
import torch
torch.manual_seed(0)
import random
random.seed(0)
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.nn import Module
import cv2
import os
import torchvision
import time
import socket
import matplotlib.pyplot as plt


In [62]:
class HasFaceNN(Module):
    def __init__(self):
        super(HasFaceNN, self).__init__()
        self.enc1 = nn.Sequential(
            nn.Conv2d(1,6,5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(6,16,5),
            nn.ReLU(),
            nn.MaxPool2d(2)
            )
        self.enc2 = nn.Sequential(
            nn.Linear(16*29*29,256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 5)
            )
    def encoder(self,image):
        code = self.enc1(image)
        code = code.view(-1, 16*29*29)
        code = self.enc2(code)
        return code
    
    def forward(self,image):
        code = self.encoder(image)
        return code

class LandmarkNN(Module):
    def __init__(self):
        super(LandmarkNN, self).__init__()
        self.enc1 = nn.Sequential(
            nn.Conv2d(3,24,7,1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(24,48,5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(48,60,3),
            nn.ReLU(),
            nn.Conv2d(60,60,3),
            nn.ReLU(),
            nn.Conv2d(60,48,3),
            nn.ReLU(),
            nn.MaxPool2d(2)
            )
        self.enc2 = nn.Sequential(
            nn.Linear(48*27*27,1024),
            nn.ReLU(),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Linear(1024, 16)
            )
    def encoder(self,image):
        code = self.enc1(image)
        code = code.view(-1, 48*27*27)
        code = self.enc2(code)
        return code
    
    def forward(self,image):
        code = self.encoder(image)
        return code

# Landmark2Expression
class ExpressionNN(Module):
    def __init__(self):
        super(ExpressionNN, self).__init__()
        self.enc1 = nn.Sequential(
            nn.Linear(16,60),
            nn.ReLU(),
            nn.Linear(60,300),
            nn.ReLU(),
            nn.Linear(300,240),
            nn.ReLU(),
            )
        self.enc2 = nn.Sequential(
            nn.Linear(240,240),
            nn.ReLU(),
            nn.Linear(240,240),
            nn.ReLU()
            )
        self.enc3 = nn.Sequential(
            nn.Linear(240,200),
            nn.Dropout(0.2),
            nn.Linear(200,3)
            )
    
    def encoder(self,dataIn):
        dataIn = dataIn.float()
        code1 = self.enc1(dataIn)
        code2 = self.enc2(code1)
        code2 += code1
        code3 = self.enc2(code2)
        code3 += code2
        code4 = self.enc3(code3)
        return code4
    
    def forward(self,dataIn):
        code = self.encoder(dataIn)
        return code

hasFaceModel = HasFaceNN()
hasFaceModel = torch.load("./Models/model_hasface")
hasFaceModel.eval()

landmarkModel = LandmarkNN()
landmarkModel = torch.load("./Models/model_landmark")
landmarkModel.eval()

expressionModel = ExpressionNN()
expressionModel = torch.load("./Models/model_expression")
expressionModel.eval()

ExpressionNN(
  (enc1): Sequential(
    (0): Linear(in_features=24, out_features=60, bias=True)
    (1): ReLU()
    (2): Linear(in_features=60, out_features=300, bias=True)
    (3): ReLU()
    (4): Linear(in_features=300, out_features=120, bias=True)
    (5): ReLU()
  )
  (enc2): Sequential(
    (0): Linear(in_features=120, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=120, bias=True)
    (3): ReLU()
  )
  (enc3): Sequential(
    (0): Linear(in_features=120, out_features=64, bias=True)
    (1): Dropout(p=0.1, inplace=False)
    (2): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [72]:
class MouthNN(Module):
    def __init__(self):
        super(MouthNN, self).__init__()
        self.enc1 = nn.Sequential(
            nn.Conv2d(3,6,5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(6,16,3),
            nn.ReLU(),
            nn.MaxPool2d(2)
            )
        self.enc2 = nn.Sequential(
            nn.Linear(16*30*30,128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
            )
    def encoder(self,image):
        code = self.enc1(image)
        code = code.view(-1, 16*30*30)
        code = self.enc2(code)
        return code
    
    def forward(self,image):
        code = self.encoder(image)
        return code

# Mouth2Expression
class ExpressionNN2(Module):
    def __init__(self):
        super(ExpressionNN2, self).__init__()
        self.enc1 = nn.Sequential(
            nn.Conv2d(3,6,5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(6,16,5),
            nn.ReLU(),
            nn.MaxPool2d(2)
            )
        self.enc2 = nn.Sequential(
            nn.Linear(16*13*13,256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 3)
            )
    def encoder(self,image):
        code = self.enc1(image)
        code = code.view(-1, 16*13*13)
        code = self.enc2(code)
        return code
    
    def forward(self,image):
        code = self.encoder(image)
        return code

mouthModel = MouthNN()
mouthModel = torch.load("./Models/model_mouth")
mouthModel.eval()

expressionModel2 = ExpressionNN2()
expressionModel2 = torch.load("./Models/model_expression2")
expressionModel2.eval()

ExpressionNN2(
  (enc1): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
  )
  (enc2): Sequential(
    (0): Linear(in_features=10816, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Dropout(p=0.5, inplace=False)
    (5): Linear(in_features=256, out_features=3, bias=True)
  )
)

In [79]:
sendOutData = np.zeros(3)
sendOutData = sendOutData.astype(np.float32)

cam = cv2.VideoCapture(1)
cam.set(3, 640)
cam.set(4, 640)
flag_startCap = 0
resval = []
trans = torchvision.transforms.ToTensor()


while (cam.isOpened()):
    tmr_start = time.time()
    succ, img = cam.read()
    cv2.imshow("camera",img)
    if (flag_startCap == 1):
        img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img128 = cv2.resize(img_g, (128,128))
        data_img128 = trans(img128)
        data_img128 = data_img128.unsqueeze(0)
        # if has face
        res_hasFace = hasFaceModel(data_img128).detach().numpy()
        res_hasFace = res_hasFace[0]
        #print(res_hasFace)
        if (res_hasFace[0] > 0.5):
            # face rect
            centy = (res_hasFace[1] + res_hasFace[2])*0.5
            centx = (res_hasFace[3] + res_hasFace[4])*0.5
            r = max(centx-res_hasFace[3], centy-res_hasFace[1])
            if (r < 0.01):
                r = 0.01

            top = centy-r
            if (top<0.0):
                top = 0.01

            bottom = centy+r
            if (bottom>=1.0):
                bottom = 1.0

            left = centx-r
            if (left<0.0):
                left = 0.01

            right = centx+r
            if (right>=1.0):
                right = 1.0

            cut_img = img[int(top*639):int(bottom*639), int(left*639):int(right*639)]
            img256 = cv2.resize(cut_img, (256,256))
            data_img128 = cv2.resize(cut_img, (128,128))
            data_img128 = trans(data_img128)
            data_img128 = data_img128.unsqueeze(0)
            
            # face feature region detection
            res_landmark = mouthModel(data_img128).detach().numpy()
            res_landmark = res_landmark[0]
            
            res_landmark *= 256
            centx = res_landmark[0]
            centy = res_landmark[1]
            r = 24
            top = int(centy-r)
            if (top<0):
                top = 0

            bottom = int(centy+r)
            if (bottom>=256):
                bottom = 255

            left = int(centx-r)
            if (left<0):
                left = 0

            right = int(centx+r) 
            if (right>=256):
                right = 255

            cut_img = img256[top:bottom, left:right]
            cut_img = cv2.resize(cut_img, (64,64))

            data_img64 = trans(cut_img)
            data_img64 = data_img64.unsqueeze(0)
            # Mouth2Expression
            res_exp = expressionModel2(data_img64).detach().numpy()
            res_exp = res_exp[0]
            print(res_exp)
            sendOutData[0] = res_exp[0]
            sendOutData[1] = res_exp[1]
            sendOutData[2] = res_exp[2]
            
            # send TCP message
            sendOutData = sendOutData.astype(np.float32)
            tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            tcp_socket.connect(('192.168.0.128', 8888))
            tcp_socket.send(sendOutData.tobytes())
            tcp_socket.close()
        #end if
    #end if
    
    keyPress = cv2.waitKey(1)
    if (keyPress == ord('r')):
        flag_startCap = 1
    elif (keyPress == ord('e')):
        flag_startCap = 0
        break;
    #endif
    
    tmr_end = time.time()
    while (tmr_end - tmr_start < 0.033):
        tmr_end = time.time()
    #end while
#end while

cv2.destroyAllWindows()

[0.05299938 0.22541492 0.24092557]
[0.04094257 0.23925924 0.23777045]
[0.06877428 0.2147165  0.24708943]
[0.05229601 0.2317303  0.24557939]
[0.0565207  0.22558719 0.24352463]
[0.05735004 0.22364178 0.245406  ]
[0.0630591  0.21734962 0.24688737]
[0.0460503  0.23575148 0.23767762]
[0.00997894 0.27964216 0.228318  ]
[0.05949438 0.22301438 0.24039054]
[0.05440274 0.22639433 0.24471226]
[0.05240256 0.22923592 0.24137628]
[0.06677319 0.21607226 0.24340165]
[0.04941591 0.23149368 0.23926681]
[0.07948932 0.2114355  0.24854077]
[0.0771268  0.21382499 0.25099432]
[0.07089681 0.21674675 0.24771778]
[0.05584119 0.22583726 0.24264608]
[0.06983186 0.21779776 0.2445598 ]
[0.05939228 0.22234765 0.24600068]
[0.05136991 0.22734258 0.24112135]
[0.06814279 0.21524286 0.24989848]
[0.03184428 0.24855524 0.23613298]
[0.03037994 0.25043827 0.2375904 ]
[0.04551531 0.2343351  0.23771536]
[0.08043639 0.2081477  0.24925429]
[0.0427469  0.23643148 0.23813175]
[0.04978748 0.22906725 0.23883635]
[0.04278329 0.237777

[0.00850783 0.58546937 0.00724362]
[-0.03074276  0.6269969  -0.01113313]
[-0.07180065  0.61524814  0.03801818]
[-0.07461123  0.64459425  0.0198433 ]
[-0.07527468  0.6626537   0.00710468]
[-0.073918    0.66016346  0.00753699]
[-0.0687132   0.6628024  -0.00139232]
[-0.07417817  0.64550024  0.01863544]
[-0.07209757  0.6266226   0.0314988 ]
[-0.06862181  0.6521666   0.0058724 ]
