# load package and settings

In [1]:
import cv2
import sys
import dlib
import time
import socket
import struct
import numpy as np
import tensorflow as tf
from win32api import GetSystemMetrics
import win32gui

from threading import Thread, Lock
from config import get_config
import pickle
import math

In [2]:
conf,_ = get_config()
if conf.mod == 'flx_0413':
    import flx_0413 as model
elif conf.mod == 'deepwarp':
    import deepwarp as model
else:
    sys.exit("Wrong Model selection: flx or deepwarp")

# system parameters
model_dir = '../'+conf.weight_set+'/warping_model/ckpt/'+conf.mod+'/'+ str(conf.ef_dim) + '/'
size_video = [640,480]
coor_remote_head_center = [320,240]
fps = 0
P_IDP = 5
depth = -50
# for monitoring
fig_alpha = [0,0]
fig_R_w=[320,240]
fig_eye_pos = [0,0,0]

# environment parameter
Rs = (GetSystemMetrics(0),GetSystemMetrics(1))

Namespace(P_IDP=5, agl_dim=2, channel=3, ef_dim=12, encoded_agl_dim=16, f=520, height=48, mod='flx', record_time=False, recver_port=5005, sender_port=5005, tar_ip='localhost', uid='local', weight_set='weights_20180413', width=64)


In [3]:
size_window = [659,528]
win_pos = [int(Rs[0]/2)-int(size_window[0]/2),int(Rs[1]/2)-int(size_window[1]/2)]

# Load CNN models to GPU

In [4]:
print("Loading model of [L] eye to GPU")
with tf.Graph().as_default() as g:
    # define placeholder for inputs to network
    with tf.name_scope('inputs'):
        LE_input_img = tf.placeholder(tf.float32, [None, conf.height, conf.width, conf.channel], name="input_img") # [None, 41, 51, 3]
        LE_input_fp = tf.placeholder(tf.float32, [None, conf.height, conf.width,conf.ef_dim], name="input_fp") # [None, 41, 51, 14]
        LE_input_ang = tf.placeholder(tf.float32, [None, conf.agl_dim], name="input_ang") ## [None, 41, 51, 2]
        LE_phase_train = tf.placeholder(tf.bool, name='phase_train') # a bool for batch_normalization

    # inference model.
    LE_img_pred, _, _ = model.inference(LE_input_img, LE_input_fp, LE_input_ang,  LE_phase_train, conf)

    # split modle here
    L_sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=False), graph = g)
    # load model
    saver = tf.train.Saver(tf.global_variables())
    ckpt = tf.train.get_checkpoint_state(model_dir+'L/')
    if ckpt and ckpt.model_checkpoint_path:
        # Restores from checkpoint
        saver.restore(L_sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found')
        
print("Loading model of [R] eye to GPU")
with tf.Graph().as_default() as g2:
    # define placeholder for inputs to network
    with tf.name_scope('inputs'):
        RE_input_img = tf.placeholder(tf.float32, [None, conf.height, conf.width, conf.channel], name="input_img") # [None, 41, 51, 3]
        RE_input_fp = tf.placeholder(tf.float32, [None, conf.height, conf.width,conf.ef_dim], name="input_fp") # [None, 41, 51, 14]
        RE_input_ang = tf.placeholder(tf.float32, [None, conf.agl_dim], name="input_ang") ## [None, 2]
        RE_phase_train = tf.placeholder(tf.bool, name='phase_train') # a bool for batch_normalization

    # inference model.
    RE_img_pred, _, _ = model.inference(RE_input_img, RE_input_fp, RE_input_ang, RE_phase_train, conf)

    # split modle here
    R_sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=False), graph = g2)
    # load model
    saver = tf.train.Saver(tf.global_variables())
    ckpt = tf.train.get_checkpoint_state(model_dir+'R/')
    if ckpt and ckpt.model_checkpoint_path:
        # Restores from checkpoint
        saver.restore(R_sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found')

Loading model of [L] eye to GPU
INFO:tensorflow:Restoring parameters from ../weights_20180413/warping_model/ckpt/flx/12/L/mix_48x64_L5-15308
Loading model of [R] eye to GPU
INFO:tensorflow:Restoring parameters from ../weights_20180413/warping_model/ckpt/flx/12/R/mix_48x64_R2-10680


In [5]:
# class video_receiver:
#     def __init__(self, port):
#         self.close = False
#         self.port = port
#         self.video_recv = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
#         print('Socket created')
#         self.detector = dlib.get_frontal_face_detector()
#         self.predictor = dlib.shape_predictor("./lm_feat/shape_predictor_68_face_landmarks.dat") 
#         self.face_detect_size = [320,240]
#         self.coor_remote_head_center = [320,240]
#         self.x_ratio = size_video[0]/self.face_detect_size[0]
#         self.y_ratio = size_video[1]/self.face_detect_size[1]
        
#         self.video_recv_thread = Thread(target=self.start_recv, args=(self.port,))
#         self.video_recv_thread.start()
        
#     def face_detection(self,frame):
#         # face detection
#         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#         face_detect_gray = cv2.resize(gray,(self.face_detect_size[0],self.face_detect_size[1]))
#         detections = self.detector(face_detect_gray, 0)
#         for k,bx in enumerate(detections):
#             #(left,right,top,bottom)
#             self.coor_remote_head_center = [int((bx.left()+bx.right())*self.x_ratio/2),
#                                        int((bx.top()+bx.bottom())*self.y_ratio/2)]
#             break
# #         print(recv_time_fd)
# #         print(self.coor_remote_head_center)
#         remote_head_Center = self.coor_remote_head_center
        
#     def start_recv(self, port):
#         global remote_head_Center
#         self.video_recv.bind(('',port))
#         self.video_recv.listen(10)
#         print('Socket now listening')
#         conn,addr=self.video_recv.accept()

#         data = b""
#         payload_size = struct.calcsize(">L")
#         print("payload_size: {}".format(payload_size))
        
#         while True:
#             while len(data) < payload_size:
#                 data += conn.recv(90456)

#             packed_msg_size = data[:payload_size]
#             data = data[payload_size:]
#             msg_size = struct.unpack(">L", packed_msg_size)[0]
#             while len(data) < msg_size:
#                 data += conn.recv(90456)

#             frame_data = data[:msg_size]
#             data = data[msg_size:]

#             frame = pickle.loads(frame_data, fix_imports=True, encoding="bytes")           
            
#             frame = cv2.imdecode(frame, cv2.IMREAD_COLOR)
            
# #             self.video_recv_hd_thread = Thread(target=self.face_detection, args=(frame,))
# #             self.video_recv_hd_thread.start()

#             cv2.imshow('Remote',frame)
#             k = cv2.waitKey(1)
#             if (k == ord('o') or self.close):                
#                 self.close = True
#                 break
#             else:
#                 pass
#         cv2.destroyWindow('Remote')
            
#     def stop(self):
#         self.close = True
#         if self.video_recv_thread.is_alive():
#             self.video_recv_thread.join()

In [6]:
# # _ = video_receiver(port = conf.recver_port)

# vs_thread = Thread(target=video_receiver, args=(conf.recver_port,))
# vs_thread.start()
# # time.sleep(3)

In [7]:
# ## socket
# client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# client_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# client_socket.connect((conf.tar_ip, conf.sender_port))
# encode_param=[int(cv2.IMWRITE_JPEG_QUALITY),90]

# Flx-gaze 

In [8]:
class gaze_redirection_system:
    def __init__(self):
        #Landmark identifier. Set the filename to whatever you named the downloaded file
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor("./lm_feat/shape_predictor_68_face_landmarks.dat") 
        self.size_df = [320,240]
        self.size_I = [48,64]
        # initial value
        self.Rw = [0,0]
        self.Pe_z = -50
        ####換電腦記得重新量下面參數！
        self.f = conf.f
        self.Ps = (35,19.5)
        self.Pc = (0,-10,0)
        self.Pe = [self.Pc[0],self.Pc[1],self.Pe_z] # H,V,D
        
    def monitor_para(self,frame):
        global fig_eye_pos
        global fig_alpha
        global fig_R_w
        cv2.rectangle(frame,
                  (size_video[0]-150,0),(size_video[0],55),
                  (255,255,255),-1
                 )
        cv2.putText(frame,
                    'Eye:['+str(int(fig_eye_pos[0])) +','+str(int(fig_eye_pos[1]))+','+str(int(fig_eye_pos[2]))+']',
                    (size_video[0]-140,15), cv2.FONT_HERSHEY_SIMPLEX, 0.4,(0,0,255),1,cv2.LINE_AA)
        cv2.putText(frame,
                    'alpha:[V='+str(int(fig_alpha[0])) + ',H='+ str(int(fig_alpha[1]))+']',
                    (size_video[0]-140,30),cv2.FONT_HERSHEY_SIMPLEX,0.4,(0,0,255),1,cv2.LINE_AA)
        cv2.putText(frame,
                    'R_w:['+str(int(fig_R_w[0])) + ','+ str(int(fig_R_w[1]))+']',
                    (size_video[0]-140,45),cv2.FONT_HERSHEY_SIMPLEX,0.4,(0,0,255),1,cv2.LINE_AA)
        return frame
        
    def get_inputs(self, frame, shape, pos = "L", size_I = [48,64]):
        if(pos == "R"):
            lc = 36
            rc = 39
            FP_seq = [36,37,38,39,40,41]
        elif(pos == "L"):
            lc = 42
            rc = 45
            FP_seq = [45,44,43,42,47,46]
        else:
            print("Error: Wrong Eye")

        eye_cx = (shape.part(rc).x+shape.part(lc).x)*0.5
        eye_cy = (shape.part(rc).y+shape.part(lc).y)*0.5
        eye_center = [eye_cx, eye_cy]
        eye_len = np.absolute(shape.part(rc).x - shape.part(lc).x)
        bx_d5w = eye_len*3/4
        bx_h = 1.5*bx_d5w
        sft_up = bx_h*7/12
        sft_low = bx_h*5/12
        img_eye = frame[int(eye_cy-sft_up):int(eye_cy+sft_low),int(eye_cx-bx_d5w):int(eye_cx+bx_d5w)]
        ori_size = [img_eye.shape[0],img_eye.shape[1]]
        LT_coor = [int(eye_cy-sft_up), int(eye_cx-bx_d5w)] # (y,x)    
        img_eye = cv2.resize(img_eye, (size_I[1],size_I[0]))
        # create anchor maps
        ach_map = []
        for i,d in enumerate(FP_seq):
            resize_x = int((shape.part(d).x-LT_coor[1])*size_I[1]/ori_size[1])
            resize_y = int((shape.part(d).y-LT_coor[0])*size_I[0]/ori_size[0])
            # y
            ach_map_y = np.expand_dims(np.expand_dims(np.arange(0, size_I[0]) - resize_y, axis=1), axis=2)
            ach_map_y = np.tile(ach_map_y, [1,size_I[1],1])
            # x
            ach_map_x = np.expand_dims(np.expand_dims(np.arange(0, size_I[1]) - resize_x, axis=0), axis=2)
            ach_map_x = np.tile(ach_map_x, [size_I[0],1,1])
            if (i ==0):
                ach_map = np.concatenate((ach_map_x, ach_map_y), axis=2)
            else:
                ach_map = np.concatenate((ach_map, ach_map_x, ach_map_y), axis=2)

        return img_eye/255, ach_map, eye_center, ori_size, LT_coor
       
    def shifting_angles_estimator(self, R_le, R_re):
        global fig_alpha
        global fig_R_w
        global fig_eye_pos
        
        # get P_w
        try:
            tar_win = win32gui.FindWindow(None, "Remote")
            Rw_lt = win32gui.GetWindowRect(tar_win)
            
        except:
            Rw_lt = [int(Rs[0])-int(size_window[0]/2),int(Rs[1])-int(size_window[1]/2)]
            print("Missing the window")
        R_w = (Rw_lt[0]+coor_remote_head_center[0], Rw_lt[1]+coor_remote_head_center[1])
        fig_R_w = R_w
        Pw = (self.Ps[0]*(R_w[0]-Rs[0]/2)/Rs[0], self.Ps[1]*(R_w[1]-Rs[1]/2)/Rs[1], 0)
        
        # get Pe
        self.Pe[2] = -(self.f*P_IDP)/np.sqrt((R_le[0]-R_re[0])**2 + (R_le[1]-R_re[1])**2)
        # x-axis needs flip
        self.Pe[0] = -np.abs(self.Pe[2])*(R_le[0]+R_re[0]-size_video[0])/(2*self.f) + self.Pc[0]
        self.Pe[1] = np.abs(self.Pe[2])*(R_le[1]+R_re[1]-size_video[1])/(2*self.f) + self.Pc[1]
        fig_eye_pos = self.Pe

        # calcualte alpha
        a_w2z_x = math.degrees(math.atan( (Pw[0]-self.Pe[0])/(Pw[2]-self.Pe[2])))
        a_w2z_y = math.degrees(math.atan( (Pw[1]-self.Pe[1])/(Pw[2]-self.Pe[2])))    
        
        a_z2c_x = math.degrees(math.atan( (self.Pe[0]-self.Pc[0])/(self.Pc[2]-self.Pe[2])))
        a_z2c_y = math.degrees(math.atan( (self.Pe[1]-self.Pc[1])/(self.Pc[2]-self.Pe[2])))
#         print('a_x=' , a_w2z_x,'+',a_z2c_x)
#         print('a_y=' , a_w2z_y,'+',a_z2c_y)
        alpha = [int(a_w2z_y + a_z2c_y),int(a_w2z_x + a_z2c_y)]
       
        fig_alpha = alpha
            
        return alpha
    
    def flx_gaze(self, frame, gray, detections, pixel_cut=[3,4], size_I = [48,64]):
        global fig_eye_pos
        global fig_alpha
        global fig_R_w
        alpha_w2c = [0,0]
        x_ratio = size_video[0]/self.size_df[0]
        y_ratio = size_video[1]/self.size_df[1]
        LE_M_A=[]
        RE_M_A=[]
        for k,bx in enumerate(detections):
            # Get facial landmarks
            time_start = time.time()
            target_bx = dlib.rectangle(left=int(bx.left()*x_ratio),right =int(bx.right()*x_ratio),
                                       top =int(bx.top()*y_ratio), bottom=int(bx.bottom()*y_ratio))
            shape = self.predictor(gray, target_bx)
            # get eye
            LE_img, LE_M_A, LE_center, size_le_ori, R_le_LT = self.get_inputs(frame, shape, pos="L", size_I=size_I)
            RE_img, RE_M_A, RE_center, size_re_ori, R_re_LT = self.get_inputs(frame, shape, pos="R", size_I=size_I)

            alpha_w2c = self.shifting_angles_estimator(LE_center,RE_center)
            
            time_get_eye = time.time() - time_start
            # gaze manipulation
            time_start = time.time()
            
            # gaze redirection
            # left Eye
            LE_infer_img = L_sess.run(LE_img_pred, feed_dict= {
                                                            LE_input_img: np.expand_dims(LE_img, axis = 0),
                                                            LE_input_fp: np.expand_dims(LE_M_A, axis = 0),
                                                            LE_input_ang: np.expand_dims(alpha_w2c, axis = 0),
                                                            LE_phase_train: False
                                                         })
            LE_infer = cv2.resize(LE_infer_img.reshape(size_I[0],size_I[1],3), (size_le_ori[1], size_le_ori[0]))
            # right Eye
            RE_infer_img = R_sess.run(RE_img_pred, feed_dict= {
                                                            RE_input_img: np.expand_dims(RE_img, axis = 0),
                                                            RE_input_fp: np.expand_dims(RE_M_A, axis = 0),
                                                            RE_input_ang: np.expand_dims(alpha_w2c, axis = 0),
                                                            RE_phase_train: False
                                                         })
            RE_infer = cv2.resize(RE_infer_img.reshape(size_I[0],size_I[1],3), (size_re_ori[1], size_re_ori[0]))
            
            # repace eyes
            frame[(R_le_LT[0]+pixel_cut[0]):(R_le_LT[0]+size_le_ori[0]-pixel_cut[0]),
                  (R_le_LT[1]+pixel_cut[1]):(R_le_LT[1]+size_le_ori[1]-pixel_cut[1])] = LE_infer[pixel_cut[0]:(-1*pixel_cut[0]), pixel_cut[1]:-1*(pixel_cut[1])]*255
            frame[(R_re_LT[0]+pixel_cut[0]):(R_re_LT[0]+size_re_ori[0]-pixel_cut[0]),
                  (R_re_LT[1]+pixel_cut[1]):(R_re_LT[1]+size_re_ori[1]-pixel_cut[1])] = RE_infer[pixel_cut[0]:(-1*pixel_cut[0]), pixel_cut[1]:-1*(pixel_cut[1])]*255
               
        frame = self.monitor_para(frame)

#         result, imgencode = cv2.imencode('.jpg', frame, encode_param)
#         data = pickle.dumps(imgencode, 0)
#         size = len(data)
# #         print("{}: {}".format(img_counter, size))
#         client_socket.sendall(struct.pack(">L", size) + data)
        cv2.imshow("Remote",frame)    
        return True
        
 
    def redirect_gaze(self, frame):
        # head detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        face_detect_gray = cv2.resize(gray,(self.size_df[0],self.size_df[1]))
        detections = self.detector(face_detect_gray, 0)
           
        rg_thread = Thread(target=self.flx_gaze, args=(frame, gray, detections))
        rg_thread.start()
        return True

In [9]:
# def main():
redir = False

vs = cv2.VideoCapture(0)
vs.set(3, size_video[0])
vs.set(4, size_video[1])
grs = gaze_redirection_system()
t = time.time()
cv2.namedWindow(conf.uid)
cv2.moveWindow(conf.uid, int(Rs[0]/2)-int(size_window[0]/2),int(Rs[1]/2)-int(size_window[1]/2));
global fps
while 1:
    ret, recv_frame = vs.read()
    if ret:
        cv2.imshow(conf.uid,recv_frame)
        if recv_frame is not None:
            # redirected gaze
            if redir:
                frame = recv_frame.copy()
                try:
                    tag = grs.redirect_gaze(frame)
                except:
                    pass
            else:
#                 result, imgencode = cv2.imencode('.jpg', recv_frame, encode_param)
#                 data = pickle.dumps(imgencode, 0)
#                 size = len(data)
#                 client_socket.sendall(struct.pack(">L", size) + data)
                cv2.imshow("Remote",recv_frame)

            if (time.time() - t) > 1:
                t = time.time()
                if conf.record_time:
                    print('FPS %d' % fps)
                fps = 0

            k = cv2.waitKey(10)
            if k == ord('q'):
                break
            elif k == ord('r'):
                if redir:
                    redir = False
                else:
                    redir = True
            else:
                pass
# free
# vr.stop()
# client_socket.shutdown(socket.SHUT_RDWR)
# client_socket.close()
vs.release()
cv2.destroyAllWindows()
#     return True

In [10]:
# client_socket.shutdown(socket.SHUT_RDWR)
# client_socket.close()
vs.release()
cv2.destroyAllWindows()
# if __name__ == '__main__':
#     main()
L_sess.close()
R_sess.close()