In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from joblib import dump, load
import cv2
import time
import os

In [2]:
# RetinaFace face detector
detector_model = tf.saved_model.load("./tf_retinaface_mbv2")

# Turtleneck Detection Model Loading
MODEL_PATH = "./sk_random_forest/model_938.joblib"
RTN_model = load(MODEL_PATH)
CNN_model = tf.keras.models.load_model('./model/CK+YS+DH+JE.h5')



In [3]:
def one_face(frame, bbs, pointss):
    # process only one face (center ?)
    offsets = [
        (bbs[:, 0] + bbs[:, 2]) / 2 - frame.shape[1] / 2,
        (bbs[:, 1] + bbs[:, 3]) / 2 - frame.shape[0] / 2,
    ]
    offset_dist = np.sum(np.abs(offsets), 0)
    index = np.argmin(offset_dist)
    bb = bbs[index]
    points = pointss[:, index]
    return bb, points


def get_width_and_height(frame, bb, points):
    # draw rectangle and landmarks on face
    w = int(bb[2]) - int(bb[0])  # width
    h = int(bb[3]) - int(bb[1])  # height
    eye2box_ratio = (points[0] - bb[0]) / (bb[2] - points[1])
    list_size = [w, h, eye2box_ratio]
    return list_size


def find_roll(pts):
    return pts[6] - pts[5]


def find_yaw(pts):
    le2n = pts[2] - pts[0]
    re2n = pts[1] - pts[2]
    return le2n - re2n


def find_pitch(pts):
    eye_y = (pts[5] + pts[6]) / 2
    mou_y = (pts[8] + pts[9]) / 2
    e2n = eye_y - pts[7]
    n2m = pts[7] - mou_y
    return e2n / n2m


def face_detector(
    image,
    image_shape_max=640,
    score_min=None,
    pixel_min=None,
    pixel_max=None,
    Ain_min=None,
):
    """
    Performs face detection using retinaface method with speed boost and initial quality checks based on whole image size
    
    Parameters
    ----------
    image : uint8
        image for face detection.
    image_shape_max : int, optional
        maximum size (in pixels) of image. The default is None.
    score_min : float, optional
        minimum detection score (0 to 1). The default is None.
    pixel_min : int, optional
        mininmum face size based on heigth of bounding box. The default is None.
    pixel_max : int, optional
        maximum face size based on heigth of bounding box. The default is None.
    Ain_min : float, optional
        minimum area of face in bounding box. The default is None.
    Returns
    -------
    float array
        landmarks.
    float array
        bounding boxes.
    flaot array
        detection scores.
    float array
        face area in bounding box.
    """

    image_shape = image.shape[:2]

    # perform image resize for faster detection
    if image_shape_max:
        scale_factor = max([1, max(image_shape) / image_shape_max])
    else:
        scale_factor = 1

    if scale_factor > 1:
        scaled_image = cv2.resize(
            image, (0, 0), fx=1 / scale_factor, fy=1 / scale_factor
        )
        bbs_all, points_all = retinaface(scaled_image)
        bbs_all[:, :4] *= scale_factor
        points_all *= scale_factor
    else:
        bbs_all, points_all = retinaface(image)

    bbs = bbs_all.copy()
    points = points_all.copy()

    # check detection score
    if score_min:
        mask = np.array(bbs[:, 4] > score_min)
        bbs = bbs[mask]
        points = points[mask]
        if len(bbs) == 0:
            return [], [], [], []

    # check pixel height
    if pixel_min:
        pixel = bbs[:, 3] - bbs[:, 1]
        mask = np.array(pixel > pixel_min)
        bbs = bbs[mask]
        points = points[mask]
        if len(bbs) == 0:
            return [], [], [], []

    if pixel_max:
        pixel = bbs[:, 3] - bbs[:, 1]
        mask = np.array(pixel < pixel_max)
        bbs = bbs[mask]
        points = points[mask]
        if len(bbs) == 0:
            return [], [], [], []

    # check face area in bounding box
    Ains = []
    for bb in bbs:
        Win = min(image_shape[1], bb[2]) - max(0, bb[0])
        Hin = min(image_shape[0], bb[3]) - max(0, bb[1])
        Abb = (bb[2] - bb[0]) * (bb[3] - bb[1])
        Ains.append(Win * Hin / Abb * 100 if Abb != 0 else 0)
    Ains = np.array(Ains)

    if Ain_min:
        mask = np.array(Ains >= Ain_min)
        bbs = bbs[mask]
        points = points[mask]
        Ains = Ains[mask]
        if len(bbs) == 0:
            return [], [], [], []

    scores = bbs[:, -1]
    bbs = bbs[:, :4]

    return points, bbs, scores, Ains


def retinaface(image):

    height = image.shape[0]
    width = image.shape[1]

    image_pad, pad_params = pad_input_image(image)
    image_pad = tf.convert_to_tensor(image_pad[np.newaxis, ...])
    image_pad = tf.cast(image_pad, tf.float32)

    outputs = detector_model(image_pad).numpy()

    outputs = recover_pad_output(outputs, pad_params)
    Nfaces = len(outputs)

    bbs = np.zeros((Nfaces, 5))
    lms = np.zeros((Nfaces, 10))

    bbs[:, [0, 2]] = outputs[:, [0, 2]] * width
    bbs[:, [1, 3]] = outputs[:, [1, 3]] * height
    bbs[:, 4] = outputs[:, -1]

    lms[:, 0:5] = outputs[:, [4, 6, 8, 10, 12]] * width
    lms[:, 5:10] = outputs[:, [5, 7, 9, 11, 13]] * height

    return bbs, lms


def pad_input_image(img, max_steps=32):
    """pad image to suitable shape"""
    img_h, img_w, _ = img.shape

    img_pad_h = 0
    if img_h % max_steps > 0:
        img_pad_h = max_steps - img_h % max_steps

    img_pad_w = 0
    if img_w % max_steps > 0:
        img_pad_w = max_steps - img_w % max_steps

    padd_val = np.mean(img, axis=(0, 1)).astype(np.uint8)
    img = cv2.copyMakeBorder(
        img, 0, img_pad_h, 0, img_pad_w, cv2.BORDER_CONSTANT, value=padd_val.tolist()
    )
    pad_params = (img_h, img_w, img_pad_h, img_pad_w)

    return img, pad_params


def recover_pad_output(outputs, pad_params):
    """recover the padded output effect"""
    img_h, img_w, img_pad_h, img_pad_w = pad_params
    recover_xy = np.reshape(outputs[:, :14], [-1, 7, 2]) * [
        (img_pad_w + img_w) / img_w,
        (img_pad_h + img_h) / img_h,
    ]
    outputs[:, :14] = np.reshape(recover_xy, [-1, 14])

    return outputs


def discern_random_forest(model, Roll, Yaw, Pitch, width, height, eye2box):
    """
    df_temp = pd.DataFrame(
        {
            "Width": width,
            "Height": height,
            "Eye2Box": eye2box,
            "Roll": Roll,
            "Yaw": Yaw,
            "Pitch": Pitch,
        },
        index=[0],
    )
    predict_val = model.predict(df_temp.values)
    """
    input_values = [[width, height, eye2box, Roll, Yaw, Pitch]]
    tuple_predict_val = model.predict_proba(input_values)
    return tuple_predict_val


# ===========================================================================
def get_rf_prob(model, image):

    pointss_all, bbs_all, scores_all, _ = face_detector(image)

    bbs_all = np.insert(bbs_all, bbs_all.shape[1], scores_all, axis=1)
    pointss_all = np.transpose(pointss_all)

    bbs = bbs_all.copy()
    pointss = pointss_all.copy()

    if len(bbs_all) > 0:  # if at least one face is detected
        # process only one face (center ?)
        bb, points = one_face(image, bbs, pointss)
        list_size = get_width_and_height(image, bb, points)  # width and height
        Roll = find_roll(points)
        Yaw = find_yaw(points)
        Pitch = find_pitch(points)

        turtle_value = discern_random_forest(
            model, Roll, Yaw, Pitch, list_size[0], list_size[1], list_size[2]
        )

        # print(turtle_value)
        return turtle_value

    else:
        return np.NaN


In [17]:
img = cv2.imread('./CK+YS+DH+JE/Good/500.jpg')
display(get_rf_prob(RTN_model, img))
display(CNN_model.predict(np.reshape(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), (-1, 240, 320, 3))))

array([[0.4, 0.6]])

array([[1.0118421e-02, 9.8988158e-01],
       [7.7848217e-18, 1.0000000e+00],
       [9.9997842e-01, 2.1568190e-05],
       [1.0000000e+00, 4.6281533e-15]], dtype=float32)

In [4]:
X = []
y = []

In [5]:
path = './CK+DH/Bad/'
file_list = os.listdir(path)
for file in file_list:
    img = cv2.imread(path + file)
    X.append([
        get_rf_prob(RTN_model, img)[0, 1], 
        CNN_model.predict(np.reshape(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), (-1, 240, 320, 3)))[0,0]
    ])
    y.append(0)
    
    
path = './CK+DH/Good/'
file_list = os.listdir(path)
for file in file_list:
    img = cv2.imread(path + file)
    X.append([
        get_rf_prob(RTN_model, img)[0, 1], 
        CNN_model.predict(np.reshape(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), (-1, 240, 320, 3)))[0,0]
    ])
    y.append(1)

In [6]:
len(y)

940

In [7]:
import csv

In [8]:
f = open('StackingData.csv', 'w', encoding='utf-8', newline='')
wr = csv.writer(f)
wr.writerow(['RTN_prob', 'CNN_prob', 'label'])
for prob, label in zip(X,y):
    wr.writerow([prob[0], prob[1], label])
f.close()

In [10]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression

df = pd.read_csv('./StackingData.csv')
X = np.array(df[['RTN_prob','CNN_prob']])
y = np.array(df[['label']])
clf = LogisticRegression(random_state=0).fit(X, y)

  return f(*args, **kwargs)


In [18]:
img = cv2.imread('./CK+DH/Good/1.jpg')
a = [get_rf_prob(RTN_model, img)[0,1],
    CNN_model.predict(np.reshape(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), (-1, 240, 320, 3)))[0,0]
]
clf.predict_proba([a])

In [24]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
class_names = ['Bad', 'Good']

while True:
    ret, image = cap.read()
    a = [get_rf_prob(RTN_model, image)[0,1],
        CNN_model.predict(np.reshape(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), (-1, 240, 320, 3)))[0,0]
        ]
    pred = clf.predict_proba([a])
    cv2.putText(image, class_names[np.argmax(pred)], (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    cv2.putText(image, str(pred[0,np.argmax(pred)]), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    display(pred)
    cv2.imshow('asdf', image)
    key = cv2.waitKey(100)
    if key == 27:
        break
cv2.destroyAllWindows()
cap.release()

array([[0.27441059, 0.72558941]])

array([[0.26315056, 0.73684944]])

array([[0.27839632, 0.72160368]])

array([[0.25226064, 0.74773936]])

array([[0.24731876, 0.75268124]])

array([[0.23427087, 0.76572913]])

array([[0.17703229, 0.82296771]])

array([[0.12365215, 0.87634785]])

array([[0.16890781, 0.83109219]])

array([[0.26451539, 0.73548461]])

array([[0.98650028, 0.01349972]])

array([[0.98845648, 0.01154352]])

array([[0.98661417, 0.01338583]])

array([[0.98661417, 0.01338583]])

array([[0.98756898, 0.01243102]])

array([[0.98756898, 0.01243102]])

array([[0.99004779, 0.00995221]])

array([[0.99002976, 0.00997024]])

array([[0.43208883, 0.56791117]])

array([[0.61626301, 0.38373699]])

array([[0.7299061, 0.2700939]])

array([[0.74086794, 0.25913206]])

array([[0.98877945, 0.01122055]])

array([[0.97838312, 0.02161688]])

array([[0.98967145, 0.01032855]])

array([[0.98845648, 0.01154352]])

array([[0.98887645, 0.01112355]])

array([[0.98610999, 0.01389001]])

array([[0.98690146, 0.01309854]])

array([[0.7067016, 0.2932984]])

array([[0.01951175, 0.98048825]])

array([[0.08355648, 0.91644352]])

array([[0.06707844, 0.93292156]])

array([[0.02042968, 0.97957032]])

array([[0.01466547, 0.98533453]])

array([[0.00724846, 0.99275154]])

array([[0.01051017, 0.98948983]])

array([[0.00780855, 0.99219145]])

array([[0.00724855, 0.99275145]])

array([[0.00752373, 0.99247627]])

array([[0.00725015, 0.99274985]])

array([[0.00725002, 0.99274998]])

array([[0.00753026, 0.99246974]])

array([[0.04551813, 0.95448187]])

array([[0.17354093, 0.82645907]])

array([[0.41374664, 0.58625336]])

array([[0.58927711, 0.41072289]])

array([[0.82612799, 0.17387201]])

array([[0.80497373, 0.19502627]])

array([[0.45069542, 0.54930458]])

array([[0.65704488, 0.34295512]])

array([[0.43242799, 0.56757201]])

array([[0.48704772, 0.51295228]])

array([[0.49215858, 0.50784142]])

array([[0.53080728, 0.46919272]])

array([[0.4379208, 0.5620792]])

array([[0.42942296, 0.57057704]])

array([[0.56434258, 0.43565742]])

array([[0.4684596, 0.5315404]])

array([[0.48431372, 0.51568628]])

array([[0.64319524, 0.35680476]])

array([[0.6265591, 0.3734409]])

array([[0.62742612, 0.37257388]])

array([[0.8606228, 0.1393772]])

array([[0.76435291, 0.23564709]])

array([[0.85282503, 0.14717497]])

array([[0.54504693, 0.45495307]])

array([[0.94054044, 0.05945956]])

array([[0.82229806, 0.17770194]])

array([[0.65410278, 0.34589722]])

array([[0.62136652, 0.37863348]])

array([[0.54053317, 0.45946683]])

array([[0.50247948, 0.49752052]])

array([[0.54227106, 0.45772894]])

array([[0.83415198, 0.16584802]])

array([[0.62699112, 0.37300888]])

array([[0.87661286, 0.12338714]])

array([[0.92571396, 0.07428604]])

array([[0.83679648, 0.16320352]])

array([[0.8368523, 0.1631477]])

array([[0.97188013, 0.02811987]])

array([[0.98011033, 0.01988967]])

array([[0.91004486, 0.08995514]])

array([[0.46002263, 0.53997737]])

array([[0.52773637, 0.47226363]])

array([[0.67728795, 0.32271205]])

array([[0.81213731, 0.18786269]])

array([[0.59531881, 0.40468119]])

array([[0.78201207, 0.21798793]])

array([[0.94058059, 0.05941941]])

array([[0.78470885, 0.21529115]])

array([[0.6602492, 0.3397508]])

array([[0.92710093, 0.07289907]])

array([[0.80276902, 0.19723098]])

array([[0.82892497, 0.17107503]])

array([[0.93812345, 0.06187655]])

array([[0.75028275, 0.24971725]])

array([[0.8114506, 0.1885494]])

array([[0.93678793, 0.06321207]])

array([[0.78035799, 0.21964201]])

array([[0.63812241, 0.36187759]])

array([[0.58996437, 0.41003563]])

array([[0.45354264, 0.54645736]])

array([[0.66974006, 0.33025994]])

array([[0.74123803, 0.25876197]])

array([[0.6551852, 0.3448148]])

array([[0.30756427, 0.69243573]])

array([[0.11252451, 0.88747549]])

array([[0.09899578, 0.90100422]])

array([[0.09856385, 0.90143615]])

array([[0.08947352, 0.91052648]])

array([[0.09349898, 0.90650102]])

array([[0.1098885, 0.8901115]])

array([[0.0969793, 0.9030207]])

array([[0.10873246, 0.89126754]])

array([[0.10265099, 0.89734901]])

array([[0.11001555, 0.88998445]])

array([[0.10667609, 0.89332391]])

array([[0.10398944, 0.89601056]])

array([[0.0994272, 0.9005728]])

array([[0.10584168, 0.89415832]])

array([[0.12492498, 0.87507502]])

array([[0.12114708, 0.87885292]])

array([[0.12101068, 0.87898932]])

array([[0.15850755, 0.84149245]])

array([[0.12595489, 0.87404511]])

array([[0.09835004, 0.90164996]])

array([[0.16796363, 0.83203637]])

array([[0.12386241, 0.87613759]])

array([[0.1203917, 0.8796083]])

array([[0.11578417, 0.88421583]])

array([[0.09900429, 0.90099571]])

array([[0.09536102, 0.90463898]])

array([[0.1013198, 0.8986802]])

array([[0.10496182, 0.89503818]])

array([[0.093519, 0.906481]])

array([[0.25427396, 0.74572604]])

array([[0.16863929, 0.83136071]])

array([[0.1063307, 0.8936693]])

array([[0.10273028, 0.89726972]])

array([[0.10953701, 0.89046299]])

array([[0.10673251, 0.89326749]])

array([[0.14840105, 0.85159895]])

array([[0.24982675, 0.75017325]])

array([[0.14586795, 0.85413205]])

array([[0.1146018, 0.8853982]])

array([[0.09461145, 0.90538855]])

array([[0.02304535, 0.97695465]])

array([[0.01017379, 0.98982621]])

array([[0.02240705, 0.97759295]])

array([[0.04418751, 0.95581249]])

array([[0.02681085, 0.97318915]])

array([[0.02720288, 0.97279712]])

array([[0.0247977, 0.9752023]])

array([[0.00785503, 0.99214497]])

array([[0.01135963, 0.98864037]])

array([[0.01432955, 0.98567045]])

array([[0.03001673, 0.96998327]])

array([[0.04553076, 0.95446924]])

array([[0.10624604, 0.89375396]])

array([[0.02114938, 0.97885062]])

array([[0.01448924, 0.98551076]])

array([[0.00724847, 0.99275153]])

array([[0.00841118, 0.99158882]])

array([[0.14429648, 0.85570352]])

array([[0.96282408, 0.03717592]])

array([[0.88415154, 0.11584846]])

array([[0.97365756, 0.02634244]])

array([[0.34209927, 0.65790073]])

array([[0.1661234, 0.8338766]])

array([[0.72052898, 0.27947102]])

array([[0.90743877, 0.09256123]])

array([[0.97727424, 0.02272576]])

array([[0.95811451, 0.04188549]])

array([[0.58040491, 0.41959509]])

array([[0.61098891, 0.38901109]])

array([[0.88669838, 0.11330162]])

array([[0.87193766, 0.12806234]])

array([[0.49582087, 0.50417913]])

array([[0.34313676, 0.65686324]])

array([[0.23138989, 0.76861011]])

array([[0.28386856, 0.71613144]])

array([[0.21537674, 0.78462326]])

array([[0.23856098, 0.76143902]])

array([[0.32988377, 0.67011623]])

array([[0.2855837, 0.7144163]])

array([[0.55482384, 0.44517616]])

array([[0.16703001, 0.83296999]])

array([[0.16989191, 0.83010809]])

array([[0.28495202, 0.71504798]])

array([[0.3021718, 0.6978282]])

array([[0.37541718, 0.62458282]])

array([[0.2507494, 0.7492506]])

array([[0.13878654, 0.86121346]])

array([[0.27081155, 0.72918845]])

array([[0.27885403, 0.72114597]])

array([[0.23584033, 0.76415967]])

array([[0.19987611, 0.80012389]])

array([[0.23795006, 0.76204994]])

array([[0.23355265, 0.76644735]])

array([[0.25267912, 0.74732088]])

array([[0.94391593, 0.05608407]])

array([[0.57396187, 0.42603813]])

array([[0.98829568, 0.01170432]])

array([[0.98073624, 0.01926376]])

array([[0.97913967, 0.02086033]])

array([[0.98235887, 0.01764113]])

array([[0.97539514, 0.02460486]])

array([[0.98979339, 0.01020661]])

array([[0.91890812, 0.08109188]])

array([[0.78021601, 0.21978399]])

array([[0.41379258, 0.58620742]])

array([[0.85750656, 0.14249344]])

array([[0.51189854, 0.48810146]])

array([[0.30215951, 0.69784049]])

array([[0.47597723, 0.52402277]])

array([[0.57307301, 0.42692699]])

array([[0.1985038, 0.8014962]])

array([[0.95157033, 0.04842967]])

array([[0.97439141, 0.02560859]])

array([[0.98394178, 0.01605822]])

array([[0.86020832, 0.13979168]])

array([[0.79057848, 0.20942152]])

array([[0.72499113, 0.27500887]])

array([[0.95620846, 0.04379154]])

array([[0.9541221, 0.0458779]])

array([[0.98261133, 0.01738867]])

array([[0.96662261, 0.03337739]])

array([[0.98896694, 0.01103306]])

array([[0.98488253, 0.01511747]])

array([[0.98563848, 0.01436152]])

array([[0.98070751, 0.01929249]])

array([[0.96733194, 0.03266806]])

array([[0.98032117, 0.01967883]])

array([[0.98975692, 0.01024308]])

array([[0.98293426, 0.01706574]])

array([[0.96928235, 0.03071765]])

array([[0.98893095, 0.01106905]])

array([[0.97709617, 0.02290383]])

array([[0.89495158, 0.10504842]])

array([[0.98863545, 0.01136455]])

array([[0.97578157, 0.02421843]])

array([[0.99065122, 0.00934878]])

array([[0.98756366, 0.01243634]])

array([[0.96782768, 0.03217232]])

array([[0.96286989, 0.03713011]])

array([[0.95761295, 0.04238705]])

array([[0.98886826, 0.01113174]])

array([[0.97210591, 0.02789409]])

array([[0.98447491, 0.01552509]])

array([[0.9740518, 0.0259482]])

array([[0.97308735, 0.02691265]])

array([[0.97916914, 0.02083086]])

array([[0.97916914, 0.02083086]])

array([[0.97839068, 0.02160932]])

array([[0.97588077, 0.02411923]])

array([[0.9813434, 0.0186566]])

array([[0.97916911, 0.02083089]])

array([[0.73151035, 0.26848965]])

array([[0.93340606, 0.06659394]])

array([[0.95691545, 0.04308455]])

array([[0.98200155, 0.01799845]])

array([[0.97672863, 0.02327137]])

array([[0.96400251, 0.03599749]])

array([[0.96523876, 0.03476124]])

array([[0.98756392, 0.01243608]])

array([[0.98133964, 0.01866036]])

array([[0.98801858, 0.01198142]])

In [None]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
class_names = ['Bad', 'Good']

while True:
    ret, image = cap.read()
    r = get_rf_prob(RTN_model, image)[0,1]
    c = CNN_model.predict(np.reshape(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), (-1, 240, 320, 3)))[0,0]
    x = 0.2*r + 0.8*c
    pred = np.array([[x, 1 - x]])
    cv2.putText(image, class_names[np.argmax(pred)], (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    cv2.putText(image, str(pred[0,np.argmax(pred)]), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    cv2.imshow('asdf', image)
    key = cv2.waitKey(100)
    if key == 27:
        break
cv2.destroyAllWindows()
cap.release()