In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from tqdm import tqdm
import numpy as np
from sklearn.metrics import confusion_matrix

from eye_dataset_v1 import *
from eye_model_v1 import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np  

# generate mouth boxes
def get_eye_box(face_bbox,face_landmarks,multiplier=3):
    '''
    this function creates the mouth bbox for the face image by using the landmarks of the mouth corners. If you are not using the RetinaFace landmarks, then you need to replace 
    face_landmarks[6:8],face_landmarks[8:10] with the landmarks corresponding to the left corner and right corner respectively.
    '''
    l,t,r,b=face_bbox
    face_landmarks=np.array(face_landmarks).reshape(5,2)
    left_eye_landmark=face_landmarks[0]
    lx,ly=left_eye_landmark
    right_eye_landmark=face_landmarks[1]
    rx,ry=right_eye_landmark
    eye_box_width = (rx-lx)/multiplier
    eye_box_height = eye_box_width
    left_l,left_t,left_r,left_b=lx-eye_box_width*0.5,ly-eye_box_height*0.5,lx+eye_box_width*0.5,ly+eye_box_height*0.5
    right_l,right_t,right_r,right_b=rx-eye_box_width*0.5,ry-eye_box_height*0.5,rx+eye_box_width*0.5,ry+eye_box_height*0.5    
    left_eye_bbox = [int(left_l),int(left_t),int(left_r),int(left_b)]
    right_eye_bbox = [int(right_l),int(right_t),int(right_r),int(right_b)]
    return left_eye_bbox,right_eye_bbox


def load_image(image_path,bbox,input_dim = 80,expand_ratio = 0):
    '''
    this function reads the image using cv2, crops the image according to the bbox given. It does clipping to ensure image crop is valid and then resizes to the input dim. 
    You can also pass the expand ratio argument to expand the bbox.
    '''
    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    l, t, r, b = bbox
    w, h = r - l, b - t
    l, r = max(0, l - int(expand_ratio * w / 2)), min(image.shape[1] - 1, r + int(expand_ratio * w / 2))
    t, b = max(0, t - int(expand_ratio * h / 2)), min(image.shape[0] - 1, b + int(expand_ratio * h / 2))
    image = cv2.resize(image[t:b, l:r], (input_dim, input_dim))
    return image

    
def apply_input_transforms(image): 
    '''
    this functions applies /255 and ToTensor to convert a numpy array to torch.tensor and normalises it.
    '''
    transforms=Compose([
        ToTensor()
    ])
    image=transforms(image)
    return image 

In [3]:
# inputs

multiplier = 1.5

model_key = 'mobilenetv3_small_050'
output_folder = 'v3.3.4.1'
epoch = 987
model_path = f'/home/jovyan/data/aurora/ISO_L2_eye_mouth_blink/eye/train/result/{output_folder}/weights/epoch{epoch}.pt' 
print(model_path)
input_dim = 80
in_channel = 3
expand_ratio = 0

/home/jovyan/data/aurora/ISO_L2_eye_mouth_blink/eye/train/result/v3.3.4.1/weights/epoch987.pt


In [4]:
# input a list of images and their face landmarks - e.g. get landmarks using RetinaFace package

csvPath = f'/home/jovyan/data/aurora/ISO_L2_eye_mouth_blink/mouth/data/v1.2/v1.2.2_test.csv' 

df = pd.read_csv(csvPath)
df = df[~df.rf_landmarks.isna()]
image_paths = df.image_path.tolist()
bboxes = [eval(x) for x in df.rf_bbox.tolist()]
landmarks = [eval(x) for x in df.rf_landmarks.tolist()]

# create eye boxes from the image landmarks
eye_bboxes = [get_eye_box(bbox,landmark,multiplier) for bbox,landmark  in zip(bboxes,landmarks)] # contains both left and right eye bboxes

In [5]:
# initialise model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# setup model - from scratch or load model for finetune
model = Eye_Net(
    model_key = model_key,
    in_channel = in_channel
).to(device)
model.load_state_dict(torch.load(os.path.join(model_path), map_location = device))
_ = model.eval()
print('model loaded succesfully')

model loaded succesfully


In [6]:
left_scores,right_scores = [],[]

for image_path,bbox in tqdm(zip(image_paths,eye_bboxes)):
    left_bbox,right_bbox = bbox[0],bbox[1]
    left_image = load_image(image_path,left_bbox,input_dim,expand_ratio)
    right_image = load_image(image_path,right_bbox,input_dim,expand_ratio)
    left_image = apply_input_transforms(left_image)
    right_image = apply_input_transforms(right_image)
    
    left_logits = model(left_image.unsqueeze(0).cuda())
    left_logits = left_logits.cpu().detach()
    left_softmax_output = nn.Softmax(dim=1)(left_logits)
    left_softmax_output = [round(x,5) for x in left_softmax_output.numpy()[0].tolist()]
    left_scores.append(left_softmax_output)
    
    right_logits = model(right_image.unsqueeze(0).cuda())
    right_logits = right_logits.cpu().detach()
    right_softmax_output = nn.Softmax(dim=1)(right_logits)
    right_softmax_output = [round(x,5) for x in right_softmax_output.numpy()[0].tolist()]
    right_scores.append(right_softmax_output)

1299it [01:03, 20.53it/s]


In [7]:
len(image_paths),len(eye_bboxes),len(left_scores),len(right_scores)

(1299, 1299, 1299, 1299)

In [1]:
mapping = {
    0:'open',
    1:'close',
    2:'block'
}

for i in range(10):
    image_path = image_paths[i]
    left_bbox,right_bbox = eye_bboxes[i][0],eye_bboxes[i][1]
    left_image = load_image(image_path,left_bbox,input_dim,expand_ratio)
    right_image = load_image(image_path,right_bbox,input_dim,expand_ratio)
    left_score,right_score = left_scores[i],right_scores[i]
    
    left_argmax_prediction,right_argmax_prediction = mapping[np.argmax(left_score)],mapping[np.argmax(right_score)]
    
    plt.subplot(1,2,1)
    plt.imshow(left_image)
    plt.axis('off')
    plt.title(f'P:{left_argmax_prediction}\n{left_score}')
    
    plt.subplot(1,2,2)
    plt.imshow(right_image)
    plt.axis('off')
    plt.title(f'P:{right_argmax_prediction}\n{right_score}')
    
    plt.show()

NameError: name 'image_paths' is not defined