In [28]:
import numpy as np
import pickle
import re
import urllib, json
import requests
import cv2
import glob
import time
import csv
from tqdm import tqdm
from scipy.spatial import distance
from sklearn.model_selection import train_test_split

In [29]:
distances = {}

def clean_up_values(string):
    split = re.split('[, }]', string)
    x = float(split[4])
    y = float(split[1])
    return (x,y)

file1 = open("ent-landmarks.txt", "r") 
for line in file1:
    if line.startswith("image_name"):
        pass
    else:
        line_split = line.split("\t")
        image_name = line_split[0]
        distances[image_name] = {}

        distances[image_name]["contour_left2"] = clean_up_values(line_split[1])
        distances[image_name]["contour_right2"] = clean_up_values(line_split[2])
        distances[image_name]["right_eyebrow_left_corner"] = clean_up_values(line_split[3])
        distances[image_name]["left_eyebrow_left_corner"] = clean_up_values(line_split[4])
        distances[image_name]["contour_chin"] = clean_up_values(line_split[5])
        distances[image_name]["mouth_lower_lip_bottom"] = clean_up_values(line_split[6])
        distances[image_name]["mouth_lower_lip_top"] = clean_up_values(line_split[7])
        
        width = distance.euclidean(distances[image_name]["contour_left2"], 
                                   distances[image_name]["contour_right2"])
        distances[image_name]["width"] = width
        right_brow = distances[image_name]["right_eyebrow_left_corner"]
        left_brow = distances[image_name]["left_eyebrow_left_corner"]
        
        crest_center = (0.5*(right_brow[0]+left_brow[0]), 0.5*(right_brow[1]+left_brow[1]))
        distances[image_name]["crest_center"] = crest_center
        length = distance.euclidean(distances[image_name]["mouth_lower_lip_bottom"], 
                                   distances[image_name]["crest_center"])
        distances[image_name]["length"] = length
        distances[image_name]["Width_to_Height_Ratio"] = 1.0*width/length

In [30]:
pickle.dump(distances, open('distances.pickle', 'wb'))

In [31]:
file1 = open("merged_api_impression.csv", "r")
merged_api = {}
for line in file1:
    
    if line.startswith("img_name"):
        pass
    else:
        image_ID = line.split(',')[-8]
        total_binary = line.split(',')[-5]
        merged_api[str(image_ID) + "_cb.jpeg"] = total_binary

file1.close()

In [32]:
distances = pickle.load(open('distances.pickle', 'rb'))

In [33]:
X = []
image_names = []
Y = []
for image in distances:
    try:
        Y.append(int(merged_api[image]))
        X.append(distances[image]['Width_to_Height_Ratio'])
        image_names.append(image)
    except:
        pass
len(image_names)

1925

In [34]:
from collections import Counter
from imblearn.over_sampling import SMOTE
print('Original dataset shape %s' % Counter(Y))
sm = SMOTE(random_state=42)
X = np.array(X).reshape(-1, 1) 
X, Y = sm.fit_resample(X, Y)
print('Resampled dataset shape %s' % Counter(Y))

Original dataset shape Counter({0: 1176, 1: 749})
Resampled dataset shape Counter({0: 1176, 1: 1176})


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

In [36]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [37]:
Counter(y_test)

Counter({0: 353, 1: 353})

## Logistic Regression:-

In [38]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(np.array(X_train), np.array(y_train))
model.score(X_train, y_train)

print('Coefficient: \n', model.coef_)
print('Intercept: \n', model.intercept_)

predicted= model.predict(X_test)

('Coefficient: \n', array([[0.09072267]]))
('Intercept: \n', array([-0.13498751]))


In [39]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5212464589235127

## Decision Tree Classifier:-

In [40]:
from sklearn import tree

model = tree.DecisionTreeClassifier(criterion='gini') 
model.fit(X_train, y_train)
model.score(X_train, y_train)
predicted= model.predict(X_test)

In [41]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5169971671388102

## SVM (Support Vector Machine):-

In [42]:
from sklearn import svm
model = svm.SVC()
model.fit(X_train, y_train)
model.score(X_train, y_train)
predicted= model.predict(X_test)

In [43]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5297450424929179

## kNN (k- Nearest Neighbors):-

In [44]:
from sklearn.neighbors import KNeighborsClassifier
KNeighborsClassifier(n_neighbors=6)
model.fit(X_train, y_train)
predicted= model.predict(X_test)

In [45]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5297450424929179

## Random Forest Classifier:-

In [46]:
from sklearn.ensemble import RandomForestClassifier
model= RandomForestClassifier()
model.fit(X_train, y_train)
predicted= model.predict(X_test)

In [47]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5184135977337111

## Gradient Boosting Classifier:-

In [48]:
from sklearn.ensemble import GradientBoostingClassifier
model= GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
model.fit(X_train, y_train)
predicted= model.predict(X_test)

In [49]:
(1.0*(predicted==y_test).sum()/len(y_test))

0.5014164305949008

## xgboost:-

In [50]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = XGBClassifier()

model.fit(X_train, y_train)

#Make predictions for test data
y_pred = model.predict(X_test)

In [51]:
(1.0*(y_pred==y_test).sum()/len(y_test))

0.5212464589235127