In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from src.models.HandcraftedModel import HandcraftedModel
from src.DataManager import DataManager

from src.models.Model import IMAGE_INPUT_SIZE
from src.config import IMDB_CROPPED_PATH, IMBD_CROPPED_METADATA_FILENAME

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

from keras import Sequential
from keras.layers import Dense

In [49]:
N_SAMPLE = .001

# Read the data
data_manager = DataManager(IMDB_CROPPED_PATH, IMBD_CROPPED_METADATA_FILENAME, IMAGE_INPUT_SIZE,
                           n_subset=N_SAMPLE, normalize_images=False, normalize_age=True)
data = data_manager.get_dataset()

# Split into train, validation, test
train, validation, test = data_manager.split_dataset(data)

# Read images
print('Read training images ...')
X_train, y_train = data_manager.get_X(train), data_manager.get_y(train)
print('Read validation images ...')
X_val, y_val = data_manager.get_X(validation), data_manager.get_y(validation)
print('Read test images ...')
X_test, y_test = data_manager.get_X(test), data_manager.get_y(test)

n_sift = 150
color_hist_bins = 128
lbp_n_points = 24
lbp_radius = 3
handcrafted_model = HandcraftedModel(n_sift, color_hist_bins, lbp_n_points, lbp_radius)

# EXTRACTIONS
df_train = handcrafted_model.extract_dataset_features(X_train, y_train)
df_val = handcrafted_model.extract_dataset_features(X_val, y_val)
df_test = handcrafted_model.extract_dataset_features(X_test, y_test)

# CLEANING FROM NANS
data_manager.delete_nan_columns(df_train, df_val, df_test)
data_manager.delete_nan_label_rows(df_train)
data_manager.delete_nan_label_rows(df_val)
data_manager.delete_nan_label_rows(df_test)

Read training images ...


100%|██████████| 273/273 [00:00<00:00, 340.08it/s]


Read validation images ...


100%|██████████| 49/49 [00:00<00:00, 420.34it/s]


Read test images ...


100%|██████████| 138/138 [00:00<00:00, 359.74it/s]


Extracting dataset features ...


100%|██████████| 273/273 [02:32<00:00,  1.79it/s]


Extracting dataset features ...


100%|██████████| 49/49 [00:25<00:00,  1.92it/s]


Extracting dataset features ...


100%|██████████| 138/138 [01:19<00:00,  1.73it/s]


Deleted a maximum of 133 columns
Deleted 5 rows
Deleted 1 rows
Deleted 5 rows


In [50]:
# srotolo
df_train_new = pd.DataFrame()
for _, row in df_train.iterrows():
    d = {}
    i = 0
    for col in range(26):
        d[i] = row[col]
        i = i + 1
    for col in range(26, len(df_train.columns) - 2):
        for el in row[col]:
            d[i] = el
            i = i + 1
    df_train_new = df_train_new.append(d, ignore_index=True)

df_val_new = pd.DataFrame()
for _, row in df_val.iterrows():
    d = {}
    i = 0
    for col in range(26):
        d[i] = row[col]
        i = i + 1
    for col in range(26, len(df_val.columns) - 2):
        for el in row[col]:
            d[i] = el
            i = i + 1
    df_val_new = df_val_new.append(d, ignore_index=True)

df_test_new = pd.DataFrame()
for _, row in df_test.iterrows():
    d = {}
    i = 0
    for col in range(26):
        d[i] = row[col]
        i = i + 1
    for col in range(26, len(df_test.columns) - 2):
        for el in row[col]:
            d[i] = el
            i = i + 1
    df_test_new = df_test_new.append(d, ignore_index=True)

In [51]:
print(df_train_new.shape)
print(df_val_new.shape)
print(df_test_new.shape)

(268, 2330)
(48, 2330)
(133, 2330)


In [52]:
clf = SVC()
clf.fit(df_train_new, df_train["gender"])

SVC()

In [53]:
preds = clf.predict(df_test_new)
acc_test = accuracy_score(df_test["gender"], preds)
conf_mat_test = confusion_matrix(df_test["gender"], preds)
print(acc_test)
print(conf_mat_test)

0.6165413533834586
[[21 37]
 [14 61]]


In [54]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=3,algorithm='brute')
classifier.fit(df_train_new, df_train["gender"])
preds = classifier.predict(df_test_new)
acc_test = accuracy_score(df_test["gender"], preds)
conf_mat_test = confusion_matrix(df_test["gender"], preds)
print(acc_test)
print(conf_mat_test)

0.6090225563909775
[[25 33]
 [19 56]]


In [9]:
model = KNeighborsClassifier(7)
model.fit(df_train.drop(["gender", "age"], axis=1), df_train["gender"])

prediction = model.predict(df_test.drop(["gender", "age"], axis=1))

acc_test = accuracy_score(df_test["gender"], prediction)
conf_mat_test = confusion_matrix(df_test["gender"], prediction)
print(acc_test)
print(conf_mat_test)
#https://github.com/laz08/Image-gender-classification/tree/master/src/classifiers

0.5
[[0 6]
 [1 7]]
