In [123]:
import numpy as np
import pickle
import re
import urllib, json
import requests
import cv2
import glob
import time
import csv
from tqdm import tqdm
from scipy.spatial import distance
from sklearn.model_selection import train_test_split

In [124]:
file1 = open("ent-landmarks_new.txt", "r") 
for line in file1:
    if line.startswith("img_name"):
        landmark_name_list = (line.split("\t"))[1:-1]

In [125]:
distances = {}

def clean_up_values(string):
    split = re.split('[, }]', string)
    x = float(split[4])
    y = float(split[1])
    return (x,y)

file1 = open("ent-landmarks_new.txt", "r")

feature_matrix = []
image_name_list = []

for line in file1:
    if line.startswith("img_name"):
        pass
    else:
        feature_vector = None
        line_split = line.split("\t")
        image_name = line_split[0]
        distances[image_name] = {}
        
        for indx, land in enumerate(landmark_name_list):
            distances[image_name][land] = clean_up_values(line_split[indx+1])
#         print(distances)
#         break
        
        dict_ = distances[image_name]
        
        ## 1) FACE LENGTH:-
        right_brow = dict_["right_eyebrow_left_corner"]
        left_brow = dict_["left_eyebrow_left_corner"]
        dict_["crest_center"] = (0.5*(right_brow[0]+left_brow[0]), 0.5*(right_brow[1]+left_brow[1]))
        dict_["face_length"] = distance.euclidean(dict_["mouth_lower_lip_bottom"], 
                                         dict_["crest_center"])
        feature_vector = [dict_["face_length"]]
        feature_vector_name = ["face_length"]
        
        ## 2) FACE WIDTH EYE LEVEL:-
        dict_["face_width_eye_level"] = distance.euclidean(dict_["contour_left1"], dict_["contour_right1"])
        feature_vector.append(dict_["face_width_eye_level"])
        feature_vector_name.append("face_width_eye_level")
        
        ## 3) FACE WIDTH MOUTH LEVEL:-  
        dict_["face_width_mouth_level"] = distance.euclidean(dict_["contour_left5"], dict_["contour_right5"])
        feature_vector.append(dict_["face_width_mouth_level"])
        feature_vector_name.append("face_width_mouth_level")
        
        ## 4) distance between pupils
        dict_["distance_between_pupils"] = distance.euclidean(dict_["left_eye_pupil"], dict_["right_eye_pupil"])
        feature_vector.append(dict_["distance_between_pupils"])
        feature_vector_name.append("distance_between_pupils")
        
        ## 5) ratio between 2 and 3
        dict_["ratio_face_width_eye_level_face_width_mouth_level"] = 1.0*dict_["face_width_eye_level"]/dict_["face_width_mouth_level"]
        feature_vector.append(dict_["ratio_face_width_eye_level_face_width_mouth_level"])
        feature_vector_name.append("ratio_face_width_eye_level_face_width_mouth_level")
        
        ## 6) ratio between 1 and 2
        dict_["ratio_face_length_face_width_eye_level"] = 1.0*dict_["face_length"]/dict_["face_width_eye_level"]
        feature_vector.append(dict_["ratio_face_length_face_width_eye_level"])
        feature_vector_name.append("ratio_face_length_face_width_eye_level")
        
        ## 7) ratio between 1 and 3 
        dict_["ratio_face_length_face_width_mouth_level"] = 1.0*dict_["face_length"]/dict_["face_width_mouth_level"]
        feature_vector.append(dict_["ratio_face_length_face_width_mouth_level"])
        feature_vector_name.append("ratio_face_length_face_width_mouth_level")
        
        ## 8) ratio between 4 and 2 
        dict_["ratio_distance_between_pupils_face_width_eye_level"]  = 1.0*dict_["distance_between_pupils"]/dict_["face_width_eye_level"]
        feature_vector.append(dict_["ratio_distance_between_pupils_face_width_eye_level"])
        feature_vector_name.append("ratio_distance_between_pupils_face_width_eye_level")
        
        ## 9) right eyebrow thickness (above pupil):-  
        dict_["right_eyebrow_thickness"] = distance.euclidean(dict_["left_eyebrow_upper_middle"], 
                                                                dict_["left_eyebrow_lower_middle"])
        feature_vector.append(dict_["right_eyebrow_thickness"])
        feature_vector_name.append("right_eyebrow_thickness")
        
        ## 10) left eyebrow thickness (above pupil):-  
        dict_["left_eyebrow_thickness"] = distance.euclidean(dict_["right_eyebrow_upper_middle"], 
                                                                dict_["right_eyebrow_lower_middle"])
        feature_vector.append(dict_["left_eyebrow_thickness"])
        feature_vector_name.append("left_eyebrow_thickness")
        
        ## 11) right eyebrow arch – height difference between highest point and inner edge
        dict_["right_eyebrow_arch"] = max(dict_["right_eyebrow_right_corner"][1], 
                                          dict_["right_eyebrow_upper_right_quarter"][1],
                                          dict_["right_eyebrow_upper_middle"][1],
                                          dict_["right_eyebrow_upper_left_quarter"][1],
                                         dict_["right_eyebrow_left_corner"][1])
        feature_vector.append(dict_["right_eyebrow_arch"])
        feature_vector_name.append("right_eyebrow_arch")
        
        ## 12) left eyebrow arch – height difference between highest point and inner edge
        dict_["left_eyebrow_arch"] = max(dict_["left_eyebrow_right_corner"][1], 
                                          dict_["left_eyebrow_upper_right_quarter"][1],
                                          dict_["left_eyebrow_upper_middle"][1],
                                          dict_["left_eyebrow_upper_left_quarter"][1],
                                         dict_["left_eyebrow_left_corner"][1])
        feature_vector.append(dict_["left_eyebrow_arch"])
        feature_vector_name.append("left_eyebrow_arch")
        
        ## 15) right eye width
        dict_["right_eye_height"] = distance.euclidean(dict_["right_eye_top"], dict_["right_eye_bottom"])
        feature_vector.append(dict_["right_eye_height"])
        feature_vector_name.append("right_eye_height")
        
        ## 16) left eye width
        dict_["left_eye_height"] = distance.euclidean(dict_["left_eye_top"], dict_["left_eye_bottom"])
        feature_vector.append(dict_["left_eye_height"])
        feature_vector_name.append("left_eye_height")
        
        ## 15) right eye width
        dict_["right_eye_width"] = distance.euclidean(dict_["right_eye_right_corner"], 
                                                      dict_["right_eye_left_corner"])
        feature_vector.append(dict_["right_eye_width"])
        feature_vector_name.append("right_eye_width")
        
        ## 16) left eye width
        dict_["left_eye_width"] = distance.euclidean(dict_["left_eye_left_corner"], 
                                                     dict_["left_eye_right_corner"])
        feature_vector.append(dict_["left_eye_width"])
        feature_vector_name.append("left_eye_width")
        
        ## 17) right eye size
        dict_["right_eye_size"] = dict_["right_eye_height"]*dict_["right_eye_width"]
        feature_vector.append(dict_["right_eye_size"])
        feature_vector_name.append("right_eye_size")
        
        ## 18) left eye size
        dict_["left_eye_size"] = dict_["left_eye_height"]*dict_["left_eye_width"]
        feature_vector.append(dict_["left_eye_size"])
        feature_vector_name.append("left_eye_size")
        
        ## 20) nose width at nostrils
        dict_["nose_width_at_nostrils"] = distance.euclidean(dict_["nose_left"], dict_["nose_right"])
        feature_vector.append(dict_["nose_width_at_nostrils"])
        feature_vector_name.append("nose_width_at_nostrils")
        
        ## 21) nose length
        dict_["nose_top_center"] = (0.5*(dict_["nose_contour_left1"][0]+dict_["nose_contour_right1"][0]), 
                        0.5*(dict_["nose_contour_left1"][1]+dict_["nose_contour_right1"][1]))
        dict_["nose_length"] = distance.euclidean(dict_["nose_contour_lower_middle"],
                                                  dict_["nose_top_center"])
        feature_vector.append(dict_["nose_length"])
        feature_vector_name.append("nose_length")
        
        ## 22) nose size
        dict_["nose_size"] = dict_["nose_length"]*dict_["nose_width_at_nostrils"]
        feature_vector.append(dict_["nose_size"])
        feature_vector_name.append("nose_size")
        
        ## 23) cheekbone width (2-3)
        dict_["cheekbone_width"] = dict_["face_width_eye_level"] - dict_["face_width_mouth_level"]
        feature_vector.append(dict_["cheekbone_width"])
        feature_vector_name.append("cheekbone_width")
        
        ## 24) ratio_cheekbone_width_face_width_eye_level
        dict_["ratio_cheekbone_width_face_width_eye_level"] = 1.0*dict_["cheekbone_width"]/dict_["face_width_eye_level"]
        feature_vector.append(dict_["ratio_cheekbone_width_face_width_eye_level"])
        feature_vector_name.append("ratio_cheekbone_width_face_width_eye_level")
        
        ## 25) thickness of middle of top lip
        dict_["thickness_top_lip_middle"] = distance.euclidean(dict_["mouth_upper_lip_bottom"], 
                                                               dict_["mouth_upper_lip_top"])
        feature_vector.append(dict_["thickness_top_lip_middle"])
        feature_vector_name.append("thickness_top_lip_middle")
        
        ## 26) thickness of right side of top lip
        dict_["thickness_top_lip_right"] = distance.euclidean(dict_["mouth_upper_lip_right_contour3"], 
                                                               dict_["mouth_upper_lip_right_contour2"])
        feature_vector.append(dict_["thickness_top_lip_right"])
        feature_vector_name.append("thickness_top_lip_right")
        
        ## 27) thickness of left side of top lip
        dict_["thickness_top_lip_left"] = distance.euclidean(dict_["mouth_upper_lip_left_contour3"], 
                                                               dict_["mouth_upper_lip_left_contour2"])
        feature_vector.append(dict_["thickness_top_lip_left"])
        feature_vector_name.append("thickness_top_lip_left")
        
        ## 28) average thickness of top lip
        dict_["average_thickness_top_lip"] = np.mean([dict_["thickness_top_lip_middle"],
                                                     dict_["thickness_top_lip_right"],
                                                     dict_["thickness_top_lip_left"]])
        feature_vector.append(dict_["average_thickness_top_lip"])
        feature_vector_name.append("average_thickness_top_lip")
        
        ## 29) thickness of lower lip
        dict_["thickness_of_lower_lip"] = distance.euclidean(dict_["mouth_lower_lip_top"], 
                                                          dict_["mouth_lower_lip_bottom"])
        feature_vector.append(dict_["thickness_of_lower_lip"])
        feature_vector_name.append("thickness_of_lower_lip")
        
        ## 30) thickness of both lips 
        dict_["thickness_of_both_lips"] = distance.euclidean(dict_["mouth_upper_lip_top"], 
                                                          dict_["mouth_lower_lip_bottom"])
        feature_vector.append(dict_["thickness_of_both_lips"])
        feature_vector_name.append("thickness_of_both_lips")
        
        ## 31) length of lips
        dict_["length_of_lips"] = distance.euclidean(dict_["mouth_left_corner"], 
                                                          dict_["mouth_right_corner"])
        feature_vector.append(dict_["length_of_lips"])
        feature_vector_name.append("length_of_lips")
        
        ## 32) chin_length
        dict_["chin_length"] = distance.euclidean(dict_["contour_chin"], 
                                                          dict_["mouth_lower_lip_bottom"])
        feature_vector.append(dict_["chin_length"])
        feature_vector_name.append("chin_length")
        
        ## 33) right jaw length – from bottom of face 
        dict_["right_jaw_length"] = distance.euclidean(dict_["contour_chin"], 
                                                          dict_["contour_right8"])
        feature_vector.append(dict_["right_jaw_length"])
        feature_vector_name.append("right_jaw_length")
        
        ## 34) right jaw length – from bottom of face 
        dict_["left_jaw_length"] = distance.euclidean(dict_["contour_chin"], 
                                                          dict_["contour_left8"])
        feature_vector.append(dict_["left_jaw_length"])
        feature_vector_name.append("left_jaw_length")
        
        ## 36) ratio of (distance from nostrils to eyebrow top) to (distance from face bottome to nostrils)
        dict_["ratio_of_distance_from_nostrils_to_eyebrow_top_to_distance_from_face_bottome_to_nostrils"]\
        = 1.0*(distance.euclidean(dict_["crest_center"], dict_["nose_tip"]))/\
        (distance.euclidean(dict_["contour_chin"], dict_["nose_tip"]))
        
        feature_vector.append(dict_["ratio_of_distance_from_nostrils_to_eyebrow_top_to_distance_from_face_bottome_to_nostrils"])
        feature_vector_name.append("ratio_of_distance_from_nostrils_to_eyebrow_top_to_distance_from_face_bottome_to_nostrils")
        
    feature_matrix.append(feature_vector)
    image_name_list.append(image_name)
    

In [126]:
pickle.dump(distances, open('distances.pickle', 'wb'))
distances = pickle.load(open('distances.pickle', 'rb'))

In [127]:
feature_matrix = np.array(feature_matrix)
image_name_list = np.array(image_name_list)

file1 = open("merged_api_impression.csv", "r")
merged_api = {}
for line in file1:
    
    if line.startswith("img_name"):
        pass
    else:
        image_ID = line.split(',')[-8]
        total_binary = line.split(',')[-5]
        merged_api[str(image_ID) + "_cb.jpeg"] = total_binary

file1.close()

In [128]:
print(feature_matrix.shape)
print(image_name_list.shape)

(8920, 34)
(8920,)


In [129]:
X = []
Y = []
image_names_used = []

for indx, image_name in enumerate(image_name_list):
    try:
        Y.append(int(merged_api[image_name]))
        X.append(feature_matrix[indx,:])
        image_names_used.append(image_name)
    except:
        pass

X = np.array(X)
Y = np.array(Y)

In [130]:
from collections import Counter
from imblearn.over_sampling import SMOTE
print('Original dataset shape %s' % Counter(Y))
sm = SMOTE(random_state=42)
# X = np.array(X)  #.reshape(-1, 1) 
X, Y = sm.fit_resample(X, Y)
print('Resampled dataset shape %s' % Counter(Y))

Original dataset shape Counter({0: 3423, 1: 2117})
Resampled dataset shape Counter({0: 3423, 1: 3423})


In [131]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.35, random_state=42, stratify=Y)

In [132]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [133]:
X_train.shape

(4449, 34)

In [134]:
Counter(y_train)

Counter({0: 2224, 1: 2225})

In [135]:
c=0
for img_filename in tqdm(glob.iglob('/Users/tanvipriya/Documents/quarter_materials/3) Fall 2018/impression_personality/VC_old_data/e/*/*.jpeg')):
    c+=1
c 

9806it [00:00, 85094.20it/s]


9806

## Logistic Regression:-

In [136]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(np.array(X_train), np.array(y_train))
model.score(X_train, y_train)

print('Coefficient: \n', model.coef_)
print('Intercept: \n', model.intercept_)

predicted= model.predict(X_test)

('Coefficient: \n', array([[-4.38181523e-02, -1.58006203e-02, -1.74446109e-02,
         6.37872698e-03, -2.12412037e-01,  1.85463088e-01,
         1.13505306e-01, -3.75839512e-01,  3.18105757e-02,
         4.51289873e-02,  6.91440273e-03, -8.05174200e-03,
        -2.71092706e-02, -1.58861041e-02,  1.02484643e-02,
        -3.25001110e-02,  2.10698011e-03,  1.69992534e-04,
        -1.39104872e-02,  2.52474127e-02, -8.32077911e-04,
         1.64399097e-03,  7.12889141e-03, -1.76971867e-01,
         5.49031402e-02,  1.59097359e-02, -3.53863302e-02,
         7.50889239e-02, -1.99939698e-03,  1.71884572e-02,
        -4.33438981e-02,  1.24141241e-01,  1.54834849e-01,
         4.30957316e-02]]))
('Intercept: \n', array([-0.14874075]))


In [137]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5348352106800167

## Decision Tree Classifier:-

In [138]:
from sklearn import tree

model = tree.DecisionTreeClassifier(criterion='gini') 
model.fit(X_train, y_train)
model.score(X_train, y_train)
predicted= model.predict(X_test)

In [139]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.58906967042136

## SVM (Support Vector Machine):-

In [140]:
from sklearn import svm
model = svm.SVC()
model.fit(X_train, y_train)
model.score(X_train, y_train)
predicted= model.predict(X_test)

In [141]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.6645807259073843

## kNN (k- Nearest Neighbors):-

In [142]:
from sklearn.neighbors import KNeighborsClassifier
KNeighborsClassifier(n_neighbors=6)
model.fit(X_train, y_train)
predicted= model.predict(X_test)

In [143]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.6645807259073843

## Random Forest Classifier:-

In [144]:
from sklearn.ensemble import RandomForestClassifier
model= RandomForestClassifier()
model.fit(X_train, y_train)
predicted= model.predict(X_test)

In [145]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.6299541093032958

## Gradient Boosting Classifier:-

In [146]:
from sklearn.ensemble import GradientBoostingClassifier
model= GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
model.fit(X_train, y_train)
predicted= model.predict(X_test)

In [147]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.6191072173550272

## xgboost:-

In [148]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = XGBClassifier()

model.fit(X_train, y_train)

#Make predictions for test data
y_pred = model.predict(X_test)

In [149]:
(1.0*(y_pred==y_test).sum()/len(y_test))

0.6332916145181476