# Gender Classification

I recommend you to install the `opencv` through `pip` because there is well known issues with the `conda` distribution - especially for Linux.

In [445]:
#!conda remove --yes opencv
!pip install opencv-python



In [595]:
import os
import re
from shutil import copyfile
import random
import time

import pandas as pd
import numpy as np
import cv2

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score

In [687]:
NUMBER_OF_SAMPLES = 2900
RESIZE_SHAPE = (40, 40) # (64, 64)
FEMALE_CLASS = 0
MALE_CLASS = 1

In [688]:
female_names_data = pd.read_csv('./faces_data/original_data/female_names.txt', header=None, sep='\n')
male_names_data = pd.read_csv('./faces_data/original_data/male_names.txt', header=None, sep='\n')


print(len(female_names_data))
print(len(male_names_data))

female_names = female_names_data.get_values()[0:, 0]
male_names = male_names_data.get_values()[0:, 0]

female_names = random.sample(set(female_names), NUMBER_OF_SAMPLES)
male_names = random.sample(set(male_names), NUMBER_OF_SAMPLES)

print(len(female_names))
print(len(male_names))

2966
10268
2900
2900


In [689]:
source_image_dir = "./faces_data/original_data/lfw_funneled/"
directory_female = "./faces_data/new_data/female/"
directory_male = "./faces_data/new_data/male/"
if not os.path.exists(directory_female):
    os.makedirs(directory_female, exist_ok=True)
if not os.path.exists(directory_male):
    os.makedirs(directory_male, exist_ok=True)

In [690]:
def separate_data(face_names_data, source, destination):
    for image_name in face_names_data:
        name = re.match("([A-Za-z_-]+)_(\d+.jpg)", image_name)
        if name is None:
            print("Name: {}".format(image_name))
        name = name.group(1)    
        path = source_image_dir + name + "/" + image_name
#         if os.path.exists:
#             print(path)
        #print(path)
        copyfile(path, destination + image_name)

In [691]:
separate_data(female_names, source_image_dir, directory_female)

In [692]:
separate_data(male_names, source_image_dir, directory_male)

In [693]:
# image = cv2.imread('./faces_data/Amelia_Vega_0004.jpg')
# gray = cv2.cvtColor(image,  cv2.COLOR_BGR2GRAY)
# resized = cv2.resize(gray, RESIZE_SHAPE)
# row = np.append(resized.ravel(), FEMALE_CLASS)
# if base_row is None:
#     base_row = row
#     continue
    
#cv2.imshow("im", resized)

#cv2.imwrite("./faces_data/gray.jpg", resized)

# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [694]:
# gender_faces_data = np.empty(shape=(0, 50*50+1)) #64*64+1
# gender_feces_data

In [695]:
image_names = os.listdir("./faces_data/new_data/female")
print(len(image_names))

2900


In [696]:
image_names = os.listdir("./faces_data/new_data/male")
print(len(image_names))

2900


In [697]:
gender_faces_data = np.empty(shape=(0,RESIZE_SHAPE[0]*RESIZE_SHAPE[1]+1)) #64*64+1
face_cascade = cv2.CascadeClassifier('./faces_data/face.xml')

In [698]:
def extract_features(image_path, label):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    row = None
    if faces is not ():
        face = sorted(faces, key=lambda x: (x[2] * x[3]), reverse=True)[0]

        x, y, width, height = face
        face_gray = gray[y: y + height, x: x + width]

        resized = cv2.resize(face_gray, dsize=RESIZE_SHAPE, interpolation=cv2.INTER_CUBIC)
    #     cv2.imwrite('face.jpg', resized)

        row = np.append(resized.ravel(), label)
    #     print(resized.shape)
    #     print(row.shape)
    #     print(resized)
    return row

In [699]:
# extract_features('./faces_data/original_data/lfw_funneled/Aaron_Sorkin/Aaron_Sorkin_0002.jpg', 1)


In [700]:
def append_data(prefix_path, label, gender_faces_data):
    image_names = os.listdir(prefix_path)
    for image_path in image_names:
        # print(image_path)
        curr_row = extract_features(prefix_path + image_path, label)
        if curr_row is not None:
            gender_faces_data = np.append(gender_faces_data, [curr_row], axis=0)
        
    return gender_faces_data    

In [701]:
# print(gender_faces_data)
gender_faces_data = append_data('./faces_data/new_data/female/', FEMALE_CLASS, gender_faces_data)
gender_faces_data.shape

Alicia_Keys_0001.jpg
Martha_Stewart_0002.jpg
Carol_Moseley_Braun_0002.jpg
Gloria_Macapagal_Arroyo_0007.jpg
Muffet_McGraw_0001.jpg
Christine_Baumgartner_0005.jpg
Chandrika_Kumaratunga_0001.jpg
Serena_Williams_0047.jpg
Katie_Harman_0002.jpg
Celine_Dion_0008.jpg
Michelle_Lecky_0001.jpg
Bettina_Rheims_0001.jpg
Laura_Linney_0004.jpg
Anne_ONeil_0001.jpg
Diana_Munz_0002.jpg
Daisy_Fuentes_0004.jpg
Eva_Dimas_0002.jpg
Martha_Martinez_Flores_0001.jpg
Anna_Kournikova_0003.jpg
Shannyn_Sossamon_0001.jpg
Maureen_Fanning_0001.jpg
Michelle_Yeoh_0004.jpg
Laura_Ziskin_0001.jpg
Elena_Bovina_0001.jpg
Betsy_Coffin_0001.jpg
Caroline_Dhavernas_0001.jpg
Melanie_Griffith_0002.jpg
Lindsay_Lohan_0001.jpg
Norah_Jones_0015.jpg
Natalie_Coughlin_0004.jpg
Lynne_Cheney_0002.jpg
Pamela_Anderson_0003.jpg
Mara_Georges_0001.jpg
Princess_Diana_0001.jpg
Chanda_Rubin_0001.jpg
Noel_Niell_0001.jpg
Wu_Yi_0002.jpg
Bonnie_Fuller_0001.jpg
Sheila_Taormina_0001.jpg
Shania_Twain_0001.jpg
Anastasia_Myskina_0003.jpg
Gloria_Macapagal_Arr

Kathleen_Kennedy_Townsend_0004.jpg
Blythe_Hartley_0001.jpg
Nikki_Cascone_0001.jpg
Amanda_Bynes_0001.jpg
Ana_Guevara_0004.jpg
Kristin_Davis_0003.jpg
Tammy_Lynn_Michaels_0002.jpg
Kalpana_Chawla_0005.jpg
Julie_Gerberding_0003.jpg
Gwyneth_Paltrow_0004.jpg
Alisha_Richman_0001.jpg
Susan_Sarandon_0006.jpg
Inga_Hall_0001.jpg
Gunilla_Backman_0001.jpg
Jane_Kaczmarek_0001.jpg
Columba_Bush_0001.jpg
Calista_Flockhart_0001.jpg
Raghad_Saddam_Hussein_0002.jpg
Helen_Clark_0004.jpg
Debbie_Reynolds_0004.jpg
Nora_Ephron_0001.jpg
Nicole_Kidman_0013.jpg
Lydia_Shum_0001.jpg
Renee_Zellweger_0006.jpg
Angela_Bassett_0004.jpg
Winona_Ryder_0020.jpg
Princess_Hisako_0001.jpg
Lindsay_Davenport_0019.jpg
Nida_Blanca_0001.jpg
Amelie_Mauresmo_0016.jpg
Diana_Krall_0006.jpg
Clare_Short_0004.jpg
Rosa_Haywa_de_Condori_0001.jpg
Misty_Dawn_Clymer_0001.jpg
Isabella_Rossellini_0002.jpg
Gloria_Gaynor_0001.jpg
Dolly_Parton_0002.jpg
Susan_Sarandon_0001.jpg
Martina_McBride_0004.jpg
Cynthia_Rowley_0001.jpg
Monica_Serra_0001.jpg
Mery

Diana_Silvius_0001.jpg
Lois_Smart_0001.jpg
Serena_Williams_0017.jpg
Pat_Rochester_0001.jpg
Paula_Prentiss_0001.jpg
Charlene_Barshefsky_0001.jpg
Lima_Azimi_0001.jpg
Danielle_Spencer_0001.jpg
Goldie_Hawn_0001.jpg
Linda_Ham_0001.jpg
Emmanuelle_Beart_0001.jpg
Alison_Lohman_0002.jpg
Ms_Dynamite_0001.jpg
Lisa_Marie_Presley_0002.jpg
Patricia_Medina_0001.jpg
Laura_Bush_0006.jpg
Mireya_Moscoso_0002.jpg
Marina_Anissina_0002.jpg
Susan_Sarandon_0003.jpg
Julie_Gerberding_0001.jpg
Gloria_Macapagal_Arroyo_0027.jpg
Jelena_Dokic_0007.jpg
Ana_Claudia_Talancon_0001.jpg
Olivia_Newton-John_0002.jpg
Jennifer_Lopez_0005.jpg
Coretta_Scott_King_0003.jpg
Condoleezza_Rice_0010.jpg
Cathy_Freeman_0001.jpg
Valerie_Harper_0002.jpg
Princess_Masako_0001.jpg
Gloria_Macapagal_Arroyo_0020.jpg
Corinne_Coman_0002.jpg
Bridgette_Wilson-Sampras_0001.jpg
Julianne_Moore_0013.jpg
Emma_Nicholson_0001.jpg
Ana_Paula_Gerard_0001.jpg
Jean_Brumley_0001.jpg
Marisol_Martinez_Sambran_0001.jpg
Halle_Berry_0014.jpg
Carla_Del_Ponte_0001.jpg

Laura_Morante_0001.jpg
Kristanna_Loken_0002.jpg
Angelina_Jolie_0018.jpg
Salma_Hayek_0001.jpg
Liza_Minnelli_0007.jpg
Lindsay_Davenport_0009.jpg
Shannon_OBrien_0002.jpg
Naomi_Watts_0012.jpg
Sophia_Loren_0006.jpg
Natasha_McElhone_0003.jpg
Paula_Radcliffe_0005.jpg
Angelina_Jolie_0014.jpg
Jennifer_Aniston_0013.jpg
Intisar_Ajouri_0003.jpg
Rebecca_Romijn-Stamos_0001.jpg
Jane_Fonda_0002.jpg
Rose_Marie_0001.jpg
Monique_Gagnon-Tremblay_0001.jpg
Molly_Sims_0001.jpg
Julianne_Moore_0018.jpg
Sally_Field_0001.jpg
Holly_Hunter_0001.jpg
Rachel_Hunter_0004.jpg
Cameron_Diaz_0003.jpg
Yana_Klochkova_0001.jpg
Eunice_Barber_0002.jpg
Reina_Hayes_0001.jpg
Rebekah_Chantay_Revels_0001.jpg
Amporn_Falise_0001.jpg
Valentina_Tereshkova_0001.jpg
Alison_Krauss_0001.jpg
Linda_Mason_0001.jpg
Venus_Williams_0008.jpg
Sheryl_Crow_0002.jpg
Kathryn_Bigelow_0002.jpg
Kristen_Breitweiser_0002.jpg
Aicha_El_Ouafi_0001.jpg
Cathy_Freeman_0002.jpg
Hayden_Panettiere_0001.jpg
Jennifer_Aniston_0017.jpg
Hana_Sadiq_0001.jpg
Shanna_Zolman

Venus_Williams_0015.jpg
Jennifer_Garner_0011.jpg
Celine_Dion_0002.jpg
Cassandra_Heise_0001.jpg
Mira_Sorvino_0001.jpg
Jennifer_Capriati_0010.jpg
Faye_Dunaway_0002.jpg
Isabelle_Huppert_0001.jpg
Edwina_Currie_0001.jpg
Liza_Minnelli_0002.jpg
Jennifer_Aniston_0006.jpg
Elena_Bovina_0003.jpg
Natalie_Cole_0001.jpg
Sharon_Davis_0001.jpg
Catherine_Bell_0001.jpg
Serena_Williams_0041.jpg
Lisa_Girman_0001.jpg
Eve_Ensler_0001.jpg
Mary_Descenza_0001.jpg
Nancy_Pelosi_0005.jpg
Maria_Bello_0001.jpg
Hillary_Clinton_0010.jpg
Petria_Thomas_0002.jpg
Toni_Braxton_0003.jpg
Anneli_Jaatteenmaki_0002.jpg
Julie_Gerberding_0004.jpg
Jerry_Hall_0001.jpg
Cate_Blanchett_0004.jpg
Madeleine_Albright_0003.jpg
Sila_Calderon_0004.jpg
Alicia_Silverstone_0002.jpg
Angelina_Jolie_0011.jpg
Katie_Harman_0001.jpg
Hannah_Stockbauer_0001.jpg
Christine_Gregoire_0001.jpg
Eileen_Coparropa_0001.jpg
Kate_Winslet_0004.jpg
Martha_Beatriz_Roque_0002.jpg
Meg_Wakeman_0001.jpg
Mariana_Pollack_0003.jpg
Tatiana_Kennedy_Schlossberg_0001.jpg
Just

Jelena_Dokic_0004.jpg
Lorraine_Bracco_0001.jpg
Carolyn_Dawn_Johnson_0003.jpg
Condoleezza_Rice_0005.jpg
Arsinee_Khanjian_0001.jpg
Kate_Burton_0001.jpg
Cole_Chapman_0001.jpg
Cecilia_Bolocco_0002.jpg
Princess_Anne_0002.jpg
Linda_Amicangioli_0001.jpg
Ashley_Postell_0001.jpg
Dalia_Rabin-Pelosoff_0001.jpg
Carolina_Kluft_0001.jpg
Yang_Pao-yu_0001.jpg
Winona_Ryder_0019.jpg
Mayumi_Moriyama_0001.jpg
Juljia_Vysotskij_0001.jpg
Victoria_Beckham_0001.jpg
Michelle_Branch_0001.jpg
Emmanuelle_Beart_0002.jpg
Desiree_McKenzie_0001.jpg
Dawna_LoPiccolo_0001.jpg
Georgina_Bardach_0001.jpg
Gong_Li_0001.jpg
Jennifer_Keller_0001.jpg
Mary_Steenburgen_0003.jpg
Natalie_Maines_0003.jpg
Justine_Pasek_0003.jpg
Alicia_Witt_0001.jpg
Nan_Wang_0002.jpg
Anna_Faris_0001.jpg
Mary_Elizabeth_Mastrantonio_0001.jpg
Susie_Castillo_0001.jpg
Agbani_Darego_0001.jpg
Laurie_Pirtle_0001.jpg
Jane_Kaczmarek_0002.jpg
Nastia_Liukin_0001.jpg
Amelia_Vega_0002.jpg
Jennifer_Capriati_0006.jpg
Ashanti_0004.jpg
Meghann_Shaughnessy_0001.jpg
Barba

Gianna_Angelopoulos-Daskalaki_0003.jpg
Jennifer_Lopez_0013.jpg
Venus_Williams_0004.jpg
Sarah_Price_0001.jpg
Winona_Ryder_0001.jpg
Isabel_Orellana_0001.jpg
Catherine_Zeta-Jones_0004.jpg
Elizabeth_Hurley_0005.jpg
Laura_Bush_0003.jpg
Amy_Yasbeck_0001.jpg
Michelle_Branch_0002.jpg
Dolma_Tsering_0002.jpg
Debbie_Allen_0001.jpg
Candie_Kung_0002.jpg
Elizabeth_Hill_0001.jpg
Kareena_Kapoor_0001.jpg
Gloria_Macapagal_Arroyo_0012.jpg
Candie_Kung_0001.jpg
Salma_Hayek_0011.jpg
Jennifer_Capriati_0022.jpg
LeAnn_Rimes_0002.jpg
Claire_Danes_0001.jpg
Megawati_Sukarnoputri_0002.jpg
Patricia_Clarkson_0001.jpg
Carla_Gugino_0001.jpg
Winona_Ryder_0018.jpg
Queen_Elizabeth_II_0004.jpg
Carin_Koch_0001.jpg
Marisa_Tomei_0001.jpg
Catherine_Deneuve_0005.jpg
Gloria_Trevi_0001.jpg
Terry_Lynn_Barton_0001.jpg
Angelina_Jolie_0007.jpg
Nikki_Reed_0001.jpg
Katherine_Harris_0004.jpg
Mariana_Pollack_0002.jpg
Ruth_Pearce_0001.jpg
Katja_Riemann_0001.jpg
Yoko_Ono_0002.jpg
Jane_Walker_Wood_0001.jpg
Ellen_Saracini_0001.jpg
Portia_de

Queen_Elizabeth_II_0005.jpg
Christine_Todd_Whitman_0005.jpg
Emily_Stevens_0001.jpg
Alina_Kabaeva_0001.jpg
Angie_Martinez_0001.jpg
Faye_Alibocus_0001.jpg
Robin_Wright_Penn_0001.jpg
Talisa_Bratt_0001.jpg
Mary_Matalin_0001.jpg
Tamara_Mowry_0001.jpg
Lynn_Abraham_0001.jpg
Carolina_Moraes_0001.jpg
Mariah_Carey_0003.jpg
Cathy_Chisholm_0001.jpg
Megan_Mullally_0002.jpg
Irina_Yatchenko_0001.jpg
Brooke_Gordon_0001.jpg
Meryl_Streep_0008.jpg
Lynn_Redgrave_0003.jpg
Janet_Napolitano_0003.jpg
Fann_Wong_0001.jpg
Kirsten_Dunst_0001.jpg
Ann_Godbehere_0001.jpg
Sarah_Weddington_0001.jpg
Calista_Flockhart_0005.jpg
Jennifer_Capriati_0021.jpg
Mary_Carey_0001.jpg
Winona_Ryder_0022.jpg
Norah_Jones_0008.jpg
Tatiana_Shchegoleva_0001.jpg
Princess_Aiko_0002.jpg
Heather_Mills_0003.jpg
Halle_Berry_0008.jpg
Helene_Eksterowicz_0001.jpg
Jennifer_Lopez_0015.jpg
Ana_Palacio_0008.jpg
Lindsay_Davenport_0015.jpg
Bernadette_Peters_0001.jpg
Daisy_Fuentes_0003.jpg
Zelma_Novelo_0001.jpg
Carly_Gullickson_0001.jpg
Silvia_Farina_El

Judy_Spreckels_0001.jpg
Calista_Flockhart_0006.jpg
Oxana_Fedorova_0001.jpg
Monique_Garbrecht-Enfeldt_0001.jpg
Alexis_Bledel_0001.jpg
Jennifer_Aniston_0001.jpg
Fiona_Milne_0001.jpg
Keira_Knightley_0001.jpg
Sandra_Bullock_0003.jpg
Gina_Torres_0001.jpg
Edie_Falco_0001.jpg
Katharine_Hepburn_0001.jpg
Debra_Brown_0001.jpg
Ludivine_Sagnier_0004.jpg
Pamela_Anderson_0001.jpg
Paula_Radcliffe_0001.jpg
Charlotte_Rampling_0002.jpg
Cameron_Diaz_0005.jpg
Natalie_Cole_0003.jpg
Nancy_Smith_0001.jpg
Julianne_Moore_0003.jpg
Suzanne_Mubarak_0001.jpg
Nadia_Petrova_0001.jpg
Halle_Berry_0016.jpg
Jennifer_Capriati_0018.jpg
Julia_Tymoshenko_0001.jpg
Pauley_Perrette_0001.jpg
Mary_Frances_Seiter_0001.jpg
Alanis_Morissette_0001.jpg
Joy_Lee_Sadler_0001.jpg
Sally_Field_0003.jpg
Cherie_Blair_0004.jpg
Jane_Riley_0001.jpg
Suzanne_Torrance_0001.jpg
Zakia_Hakki_0001.jpg
Sarah_Hughes_0006.jpg
Liv_Tyler_0001.jpg
Goldie_Hawn_0007.jpg
Flor_Montulo_0001.jpg
Naomi_Watts_0006.jpg
Jelena_Dokic_0002.jpg
Cecilia_Bolocco_0003.jpg


(2739, 1601)

In [702]:
gender_faces_data = append_data('./faces_data/new_data/male/', MALE_CLASS, gender_faces_data)
gender_faces_data.shape

Romano_Prodi_0005.jpg
George_W_Bush_0440.jpg
Jason_Kidd_0006.jpg
Gene_Keady_0001.jpg
Spike_Helmick_0001.jpg
Kevin_Tarrant_0001.jpg
John_Anderson_0001.jpg
Douglas_Paal_0001.jpg
Steve_Mariucci_0003.jpg
Tang_Jiaxuan_0004.jpg
John_Edwards_0006.jpg
Carlos_Moya_0012.jpg
Colin_Powell_0217.jpg
Albaro_Recoba_0001.jpg
Matt_Herden_0001.jpg
Saddam_Hussein_0017.jpg
Sebastien_Grosjean_0003.jpg
Jose_Luis_Chilavert_0001.jpg
Ricky_Ray_0001.jpg
Silvio_Berlusconi_0001.jpg
Hamid_Karzai_0010.jpg
Antonio_Palocci_0001.jpg
Vincent_Brooks_0004.jpg
Ruben_Sierra_0001.jpg
John_Edwards_0004.jpg
Izzat_Ibrahim_0001.jpg
Michael_Bloomberg_0004.jpg
Donald_Rumsfeld_0069.jpg
Jimmy_Kimmel_0002.jpg
Vince_Gill_0001.jpg
Amram_Mitzna_0001.jpg
Mikhail_Gorbachev_0001.jpg
Richard_Hellfant_0001.jpg
Kurt_Busch_0002.jpg
Pharrell_Williams_0001.jpg
Tom_Daschle_0007.jpg
Rudolph_Giuliani_0019.jpg
Mauricio_Pochetino_0001.jpg
Pedro_Velasquez_0001.jpg
Alfredo_Pena_0001.jpg
Fidel_Castro_0005.jpg
Yao_Ming_0003.jpg
Pascal_Quignard_0003.jpg
D

Martin_Brodeur_0002.jpg
James_Traficant_0003.jpg
Ronald_Reagan_0002.jpg
Sean_OKeefe_0003.jpg
Enrique_Bolanos_0002.jpg
Kamal_Kharrazi_0002.jpg
Jon_Gruden_0004.jpg
Nick_Turner_0001.jpg
Saddam_Hussein_0020.jpg
Thaksin_Shinawatra_0006.jpg
Bill_Readdy_0001.jpg
Rene_Antonio_Leon_Rodriguez_0001.jpg
Donald_Rumsfeld_0094.jpg
Fernando_Henrique_Cardoso_0001.jpg
Colin_Powell_0019.jpg
Andrew_Bunner_0001.jpg
Kurt_Russell_0001.jpg
Doug_Moe_0001.jpg
Jeffrey_Immelt_0002.jpg
Hank_Aaron_0001.jpg
Colin_Powell_0147.jpg
Gary_Forsee_0001.jpg
John_Paul_II_0011.jpg
Hu_Jintao_0010.jpg
Minnie_Mendoza_0001.jpg
Ariel_Sharon_0035.jpg
Vincent_Brooks_0001.jpg
Robert_Fico_0001.jpg
Michael_Chiklis_0005.jpg
Michael_Olowokandi_0001.jpg
Thabo_Mbeki_0005.jpg
Tiger_Woods_0001.jpg
Guillermo_Canas_0004.jpg
Mahmoud_Abbas_0010.jpg
George_W_Bush_0259.jpg
Alan_Greenspan_0002.jpg
Hans_Blix_0009.jpg
Tiger_Woods_0002.jpg
Roman_Polanski_0001.jpg
Christopher_Reeve_0004.jpg
Irwan_Fadzi_Idris_0001.jpg
Hugo_Chavez_0016.jpg
Rick_Lu_0001.j

Benjamin_Netanyahu_0002.jpg
George_W_Bush_0519.jpg
Ahmad_Masood_0002.jpg
George_W_Bush_0509.jpg
Mike_Holmgren_0002.jpg
Georgi_Parvanov_0002.jpg
Roman_Polanski_0004.jpg
Kieran_Prendergast_0002.jpg
Martin_Burnham_0001.jpg
Tang_Jiaxuan_0001.jpg
Larry_Lucchino_0001.jpg
Sean_Patrick_OMalley_0002.jpg
John_Warner_0001.jpg
Tung_Chee-hwa_0008.jpg
Sebastian_Saja_0002.jpg
Yasser_Arafat_0006.jpg
Ruben_Wolkowyski_0001.jpg
Vincent_Brooks_0002.jpg
Kelsey_Grammer_0001.jpg
Scott_Rolen_0001.jpg
John_Marburger_0001.jpg
Earl_Fritts_0001.jpg
Saddam_Hussein_0013.jpg
Bijan_Darvish_0003.jpg
Tom_Daschle_0016.jpg
Mikulas_Dzurinda_0001.jpg
Aleksander_Kwasniewski_0002.jpg
Zhong_Nanshan_0001.jpg
Dan_Wheldon_0001.jpg
George_P_Bush_0001.jpg
John_Stockton_0003.jpg
Donald_Rumsfeld_0111.jpg
Omar_Sharif_0002.jpg
David_Beckham_0022.jpg
Hans_Blix_0027.jpg
Ira_Einhorn_0001.jpg
Jose_Manuel_Durao_Barroso_0006.jpg
John_Ashcroft_0009.jpg
Prince_Rainier_III_0001.jpg
Vicente_Fox_0027.jpg
Ramon_Santana_0001.jpg
David_Caruso_0003.

Octavio_Lara_0001.jpg
Muhammad_Ali_0010.jpg
George_W_Bush_0146.jpg
Rafael_Ramirez_0002.jpg
Tim_Curry_0002.jpg
Scott_Blum_0001.jpg
Kim_Ryong-sung_0003.jpg
Miguel_Hakim_0001.jpg
David_Ho_0001.jpg
John_Ashcroft_0040.jpg
Georgi_Parvanov_0001.jpg
Colin_Powell_0208.jpg
Tom_Hanks_0003.jpg
Albert_Costa_0002.jpg
Wolfgang_Schneiderhan_0001.jpg
Ted_Nolan_0001.jpg
James_Comey_0001.jpg
John_Sununu_0001.jpg
Tim_Henman_0019.jpg
Thomas_Franklin_0001.jpg
Chan_Choi_0001.jpg
Bill_Frist_0003.jpg
Vladimir_Putin_0025.jpg
James_Blake_0014.jpg
Willie_Wilson_0001.jpg
George_W_Bush_0087.jpg
Bobby_Kielty_0001.jpg
David_Collenette_0001.jpg
Jacques_Chirac_0008.jpg
Shimon_Peres_0002.jpg
Kofi_Annan_0024.jpg
Liu_Ye_0001.jpg
Khader_Rashid_Rahim_0001.jpg
Micheal_Jourdain_Jr_0001.jpg
Arnold_Schwarzenegger_0035.jpg
Geoff_Hoon_0004.jpg
Jay_Garner_0002.jpg
Maurice_Papon_0001.jpg
Gerry_Adams_0004.jpg
Harry_Schmidt_0002.jpg
Colin_Powell_0123.jpg
Tom_Ridge_0030.jpg
George_W_Bush_0462.jpg
Adrien_Brody_0011.jpg
Tom_Ridge_0002.j

John_Ashcroft_0046.jpg
Naoto_Kan_0001.jpg
Gordon_Brown_0005.jpg
Joe_Dicaro_0001.jpg
Gerhard_Schroeder_0045.jpg
John_Ashcroft_0042.jpg
Carlos_Ruckauf_0001.jpg
Paul_Patton_0001.jpg
Darcy_Regier_0001.jpg
Kevin_Harvick_0001.jpg
Paul_Burrell_0001.jpg
Gerhard_Schroeder_0070.jpg
Jose_Maria_Aznar_0019.jpg
Xavier_Malisse_0003.jpg
Lucio_Gutierrez_0010.jpg
Robert_Duvall_0003.jpg
Atal_Bihari_Vajpayee_0012.jpg
Kurt_Warner_0003.jpg
Alvaro_Noboa_0002.jpg
George_W_Bush_0323.jpg
Tim_Robbins_0004.jpg
Joe_Mantello_0002.jpg
John_Ashcroft_0015.jpg
Mahdi_Al_Bassam_0001.jpg
Sebastian_Saja_0003.jpg
Alvaro_Uribe_0002.jpg
Dwayne_Johnson_0001.jpg
Curtis_Rodriguez_0001.jpg
Ariel_Sharon_0060.jpg
Dale_Earnhardt_0001.jpg
Azmi_Bishara_0001.jpg
Carlos_Quintanilla_Schmidt_0001.jpg
Steven_Hatfill_0002.jpg
Chen_Kaige_0001.jpg
Dan_Quayle_0001.jpg
Gerhard_Schroeder_0058.jpg
Michel_Therrien_0001.jpg
Antonio_Banderas_0001.jpg
Laurent_Gbagbo_0002.jpg
Brian_Pavlich_0001.jpg
Bruce_Arena_0001.jpg
Michael_Peat_0001.jpg
George_W_B

Tim_Henman_0012.jpg
George_W_Bush_0156.jpg
Zinedine_Zidane_0004.jpg
James_Kopp_0003.jpg
Joschka_Fischer_0018.jpg
Vitali_Klitschko_0003.jpg
Arnold_Schwarzenegger_0017.jpg
James_McPherson_0001.jpg
Tony_Blair_0069.jpg
Tim_Allen_0001.jpg
Sean_OKeefe_0005.jpg
Scott_Peterson_0002.jpg
Frank_Solich_0004.jpg
Rod_Jong-il_0001.jpg
Jon_Constance_0001.jpg
Vladimir_Putin_0019.jpg
Tim_Henman_0017.jpg
Hugh_Grant_0004.jpg
Paul_Bremer_0020.jpg
James_Kirtley_0001.jpg
Al_Gore_0001.jpg
Matthew_McConaughey_0001.jpg
George_Clooney_0001.jpg
Kamal_Kharrazi_0003.jpg
Lew_Rywin_0001.jpg
Alex_Gonzalez_0001.jpg
Paul_Greengrass_0001.jpg
Colin_Powell_0212.jpg
Larry_Tanenbaum_0001.jpg
Gary_Barnett_0001.jpg
Recep_Tayyip_Erdogan_0009.jpg
Elvis_Presley_0001.jpg
Recep_Tayyip_Erdogan_0016.jpg
Carlos_Ortega_0003.jpg
George_W_Bush_0091.jpg
Marc_Grossman_0001.jpg
Rick_Stansbury_0003.jpg
Dion_Glover_0001.jpg
Martin_Verkerk_0002.jpg
John_Jumper_0001.jpg
George_HW_Bush_0006.jpg
Tony_Blair_0078.jpg
Duane_Lee_Chapman_0002.jpg
Susa

Andrei_Mikhnevich_0002.jpg
John_Ashcroft_0027.jpg
Kamel_Morjane_0001.jpg
Paul_Burrell_0006.jpg
Tony_Blair_0104.jpg
Vladimir_Putin_0044.jpg
Steve_Park_0001.jpg
Benjamin_Netanyahu_0003.jpg
James_Kelly_0009.jpg
Tom_Hanks_0007.jpg
Tim_Duncan_0001.jpg
Fujio_Cho_0003.jpg
Lawrence_Vito_0001.jpg
Jorge_Batlle_0003.jpg
Jamie_Martin_0001.jpg
Mike_Slive_0001.jpg
Bulent_Ecevit_0003.jpg
Joe_Torre_0003.jpg
John_Ashcroft_0025.jpg
Andre_Agassi_0031.jpg
Zhu_Rongji_0002.jpg
Silvio_Berlusconi_0016.jpg
Kofi_Annan_0027.jpg
Vladimir_Putin_0022.jpg
Art_Howe_0002.jpg
Leonard_Schrank_0001.jpg
Elvis_Costello_0001.jpg
John_Mabry_0001.jpg
LeBron_James_0005.jpg
Rick_Santorum_0003.jpg
Rodney_Rempt_0001.jpg
Junichiro_Koizumi_0052.jpg
Jeremy_Greenstock_0013.jpg
John_Ashcroft_0006.jpg
Michael_Caine_0001.jpg
Jim_Tressel_0004.jpg
Tony_Blair_0049.jpg
Lleyton_Hewitt_0037.jpg
Johnny_Hallyday_0001.jpg
Arminio_Fraga_0005.jpg
George_W_Bush_0316.jpg
Luis_Berrondo_0001.jpg
Charlie_Sheen_0001.jpg
Tony_Blair_0085.jpg
Mikulas_Dzuri

Chen_Liang_Yu_0001.jpg
George_Roy_Hill_0001.jpg
Salman_Khan_0001.jpg
Erika_Reyes_0001.jpg
Paul_Krueger_0001.jpg
Roy_Williams_0002.jpg
Guillermo_Coria_0024.jpg
Colin_Powell_0230.jpg
Tang_Jiaxuan_0007.jpg
Charlie_Zaa_0002.jpg
George_W_Bush_0451.jpg
Michael_Jackson_0009.jpg
Tom_Ridge_0026.jpg
Kevin_Costner_0006.jpg
Chris_Whitney_0001.jpg
Paul_Bremer_0010.jpg
Robert_Zoellick_0007.jpg
Greg_Ostertag_0002.jpg
Joe_Cocker_0001.jpg
Ricardo_Lagos_0004.jpg
Hugo_Chavez_0006.jpg
Alexander_Losyukov_0003.jpg
Colin_Powell_0090.jpg
Guillermo_Coria_0017.jpg
John_Ashcroft_0013.jpg
Jose_Lopez_Beltran_0001.jpg
Donald_Rumsfeld_0082.jpg
John_Bolton_0016.jpg
Patrick_Bourrat_0001.jpg
Colin_Powell_0066.jpg
Ken_Balk_0001.jpg
George_Clooney_0009.jpg
Kimi_Raikkonen_0002.jpg
Alvaro_Uribe_0025.jpg
Oscar_Bolanos_0001.jpg
Daniel_Radcliffe_0003.jpg
Bud_Selig_0002.jpg
Donald_Rumsfeld_0025.jpg
Choi_Sung-hong_0003.jpg
John_Kerry_0014.jpg
Don_Siegelman_0002.jpg
John_Howard_0008.jpg
Pervez_Musharraf_0012.jpg
Daryl_Sabara_000

Ralf_Schumacher_0006.jpg
Nelson_Mandela_0002.jpg
Colin_Powell_0117.jpg
Yasser_Arafat_0001.jpg
Thomas_Bjorn_0002.jpg
George_W_Bush_0301.jpg
Gustavo_Noboa_0001.jpg
Humberto_Coelho_0001.jpg
Guillermo_Coria_0027.jpg
Tim_Allen_0002.jpg
John_Paul_II_0003.jpg
Colin_Powell_0140.jpg
Mark_Hurlbert_0001.jpg
Donald_Rumsfeld_0112.jpg
Brian_Cashman_0001.jpg
Gus_Frerotte_0001.jpg
Bill_Simon_0009.jpg
Mike_OConnell_0001.jpg
Rainer_Schuettler_0005.jpg
Win_Aung_0004.jpg
Daniel_Day-Lewis_0001.jpg
Serge_Melac_0001.jpg
Bixente_LIzarazu_0001.jpg
Bruce_Weber_0002.jpg
Gerhard_Schroeder_0026.jpg
George_W_Bush_0201.jpg
George_W_Bush_0255.jpg
George_W_Bush_0501.jpg
Lance_Armstrong_0011.jpg
Daniel_Coats_0001.jpg
Colin_Powell_0194.jpg
Greg_Owen_0002.jpg
Gerhard_Schroeder_0104.jpg
Oscar_DLeon_0001.jpg
Ralph_Sampson_0001.jpg
George_Lopez_0001.jpg
Roger_Federer_0014.jpg
George_W_Bush_0161.jpg
Luis_Ernesto_Derbez_Bautista_0005.jpg
Keanu_Reeves_0012.jpg
Jacques_Chirac_0026.jpg
Hu_Jintao_0006.jpg
John_Wayne_0001.jpg
Luis

(5491, 1601)

In [703]:
np.random.shuffle(gender_faces_data)
gender_faces_data

array([[120.,  77.,  14., ..., 152., 155.,   0.],
       [ 81.,  95., 111., ..., 122., 112.,   0.],
       [ 81.,  88., 134., ..., 166., 176.,   1.],
       ...,
       [213.,  55.,   9., ..., 159., 183.,   1.],
       [ 11.,  15.,  15., ...,  36.,  37.,   1.],
       [ 31.,  24.,  16., ...,  38.,  33.,   0.]])

In [704]:
np.savetxt("./genders_data_40x40_pca_200_large.csv", gender_faces_data.astype(np.int), delimiter=",")

## Train Linear regression Model

In [705]:
gender_faces_data = pd.read_csv('./genders_data_40x40_pca_200_large.csv', sep=',', header=None)
# 
#gender_faces_data = gender_faces_data.as_matrix()
#np.random.shuffle(gender_faces_data)
print(gender_faces_data.shape)
gender_faces_data.head()

(5491, 1601)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1591,1592,1593,1594,1595,1596,1597,1598,1599,1600
0,120.0,77.0,14.0,9.0,18.0,18.0,20.0,15.0,16.0,14.0,...,208.0,206.0,203.0,204.0,195.0,160.0,151.0,152.0,155.0,0.0
1,81.0,95.0,111.0,100.0,83.0,87.0,83.0,92.0,112.0,127.0,...,160.0,159.0,151.0,140.0,134.0,132.0,130.0,122.0,112.0,0.0
2,81.0,88.0,134.0,163.0,167.0,175.0,178.0,188.0,193.0,193.0,...,75.0,100.0,124.0,133.0,146.0,152.0,156.0,166.0,176.0,1.0
3,74.0,81.0,74.0,75.0,98.0,107.0,107.0,88.0,77.0,71.0,...,123.0,113.0,104.0,108.0,111.0,107.0,147.0,220.0,215.0,1.0
4,44.0,46.0,51.0,104.0,177.0,205.0,211.0,213.0,214.0,216.0,...,182.0,218.0,206.0,122.0,130.0,109.0,71.0,52.0,109.0,1.0


In [706]:
features = gender_faces_data.as_matrix()[:, :RESIZE_SHAPE[0]*RESIZE_SHAPE[1]] #features = gender_faces_data[:, :50*50] # features = gender_faces_data.as_matrix()[:, :50*50]
print(features.shape)

(5491, 1600)


## Feature selection

In [707]:
pca = PCA(n_components=200)
pca.fit(features)
features = pca.transform(features)
features.shape

(5491, 200)

## Data normalization

In [708]:
scaler = StandardScaler()
scaler.fit(features)
features = scaler.transform(features)
features

array([[-0.98519802,  1.36311158,  0.79118083, ...,  1.78776283,
         0.34266424, -0.20190846],
       [ 0.09913983,  0.79997289,  0.33421812, ...,  0.11455361,
        -0.17808632, -0.16380186],
       [ 0.89907142,  1.31589897,  1.84274095, ..., -1.54525248,
         1.00147035,  0.85960081],
       ...,
       [ 0.05248495,  0.19896359,  0.4475282 , ...,  0.04793814,
         0.02155197, -0.49456956],
       [-1.84933662,  0.19453059, -1.08459934, ..., -1.0744683 ,
        -0.54407425, -1.14529174],
       [-2.59479381,  0.62558343, -0.07449493, ...,  0.2336995 ,
         0.15685063, -1.63571563]])

In [709]:
labels = gender_faces_data.as_matrix()[:, RESIZE_SHAPE[0]*RESIZE_SHAPE[1]:].ravel() # labels = gender_faces_data.as_matrix()[:, 50*50:].ravel()
print(labels.shape)
print(labels)

(5491,)
[0. 0. 1. ... 1. 1. 0.]


In [710]:
features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, train_size=0.7, test_size=0.3, random_state=42)

## Logistic Regression Modele

### Basic logistic regression

In [711]:
logistic_regression = LogisticRegression()

In [712]:
logistic_regression.fit(features_train, labels_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [713]:
logistic_regression.score(features_test, labels_test)

0.8391990291262136

In [714]:
logistic_regression.score(features_train, labels_train)

0.868592245641426

### Tune the Logistic Regression Params

In [1271]:
kfold = StratifiedKFold(n_splits=8) # train with 8
params = {'C': [0.0001, 0.001, 0.01, 1, 10], 'penalty': ['l2', 'l1']}

In [1272]:
grid_search_logregr = GridSearchCV(
    logistic_regression, param_grid=params, cv=kfold, return_train_score=True)
grid_search_logregr.fit(features_train, labels_train)

GridSearchCV(cv=StratifiedKFold(n_splits=8, random_state=None, shuffle=False),
       error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.0001, 0.001, 0.01, 1, 10], 'penalty': ['l2', 'l1']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [1273]:
grid_search_logregr.best_params_

{'C': 0.01, 'penalty': 'l2'}

In [1270]:
grid_search_logregr.best_score_

0.8397085610200364

In [1274]:
f1_score(labels_test, grid_search_logregr.predict(features_test))

0.8405797101449275

In [1275]:
labels_train[:100]
#labels_train

array([0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
       1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0.,
       1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0.,
       0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0.,
       0., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 1.])

In [1276]:
labels_train[:100]

array([0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
       1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0.,
       1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0.,
       0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0.,
       0., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 1.])

In [1277]:
grid_search_logregr.cv_results_

{'mean_fit_time': array([0.02688548, 0.0152567 , 0.0292415 , 0.01538435, 0.04621741,
        0.02450159, 0.06012151, 0.071769  , 0.06082225, 0.07387519]),
 'mean_score_time': array([0.00056687, 0.00048113, 0.00048733, 0.00048396, 0.00050572,
        0.00065139, 0.0004912 , 0.00057942, 0.00049159, 0.00057784]),
 'mean_test_score': array([0.83346344, 0.49596669, 0.83554515, 0.49596669, 0.83970856,
        0.7704918 , 0.83684621, 0.83762685, 0.83684621, 0.83684621]),
 'mean_train_score': array([0.86234677, 0.49596669, 0.86584123, 0.49596669, 0.86836897,
        0.77896716, 0.86922387, 0.86888938, 0.86940974, 0.86922389]),
 'param_C': masked_array(data=[0.0001, 0.0001, 0.001, 0.001, 0.01, 0.01, 1, 1, 10, 10],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_penalty': masked_array(data=['l2', 'l1', 'l2', 'l1', 'l2', 'l1', 'l2', 'l1', 'l2',
                    'l1'],
    

## Train SVM Model

In [724]:
linear_svm = LinearSVC()
linear_svm.fit(features_train, labels_train)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [725]:
print("Train score: {}".format(linear_svm.score(features_train, labels_train)))
print("Test score: {}".format(linear_svm.score(features_test, labels_test)))
print("Diff score: {}".format(
    linear_svm.score(features_train, labels_train) - linear_svm.score(features_test, labels_test)))

Train score: 0.8665105386416861
Test score: 0.8379854368932039
Diff score: 0.028525101748482218


The difference between the train score and test score is relatively high and the train score is very high. That means high variance and/or imbalanced data between the training set and the test set. So we need to perform k-Fold coross validation with stratification and grid search to to evaluate the variance and the best parameters for this algorithm with the concrete amount of training data.

### Tune the Linear SVM Model

In [730]:
kfold_linear_cv = StratifiedKFold(n_splits=8) # train with 8
params_linear_cv = {'C': [0.0001, 0.001, 0.01], 'penalty': ['l2', 'l2']} # 0.01 is with smaller variance
grid_search_linear_svm = GridSearchCV(
    linear_svm, param_grid=params_linear_cv, cv=kfold_linear_cv, return_train_score=True)

start = time.time()
grid_search_linear_svm.fit(features_train, labels_train)
end = time.time()

In [731]:
print("GridSearchCV took {%.2f} seconds for %d candidate parameter settings."
      % (end - start, len(grid_search_linear_svm.cv_results_['params'])))

GridSearchCV took {4.06} seconds for 6 candidate parameter settings.


In [732]:
grid_search_linear_svm.best_score_

0.8397085610200364

In [733]:
grid_search_linear_svm.best_params_

{'C': 0.001, 'penalty': 'l2'}

In [734]:
# grid_search_linear_svm.grid_scores_
# grid_search_linear_svm

In [736]:
grid_search_linear_svm.cv_results_

{'mean_fit_time': array([0.02907357, 0.02978426, 0.04011017, 0.03980702, 0.17223376,
        0.17438263]),
 'mean_score_time': array([0.00062865, 0.00061372, 0.00057131, 0.00058264, 0.00057799,
        0.00057575]),
 'mean_test_score': array([0.83450429, 0.83450429, 0.83970856, 0.83970856, 0.83554515,
        0.83554515]),
 'mean_train_score': array([0.86565528, 0.86565528, 0.867514  , 0.867514  , 0.86803445,
        0.86803445]),
 'param_C': masked_array(data=[0.0001, 0.0001, 0.001, 0.001, 0.01, 0.01],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_penalty': masked_array(data=['l2', 'l2', 'l2', 'l2', 'l2', 'l2'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 0.0001, 'penalty': 'l2'},
  {'C': 0.0001, 'penalty': 'l2'},
  {'C': 0.001, 'penalty': 'l2'},
  {'C': 0.001, 'penalty': 'l2'},
  {'C': 0.01, 'penalty': 'l2'},
  {'C': 0.0

### Gaussian SVM Model

In [737]:
svm = SVC(C=0.0001, gamma=2)
svm.fit(features_train, labels_train)
svm.score(features_test, labels_test)

0.4945388349514563

## Random Forest Clasifier

In [750]:
forest = RandomForestClassifier(n_estimators=1500, max_depth=2)
forest.fit(features_train, labels_train)
print("score: {}".format(forest.score(features_test, labels_test)))
print("f1_score: {}".format(f1_score(labels_test, forest.predict(features_test))))

score: 0.7402912621359223
f1_score: 0.7559863169897376


In [751]:
print("train score: {}".format(forest.score(features_train, labels_train)))

train score: 0.7876658860265418


## Adaptive Boost

In [764]:
basic_forest = DecisionTreeClassifier(max_depth=1)
ada_boost = AdaBoostClassifier(basic_forest, n_estimators=100)
ada_boost.fit(features_train, labels_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=1.0, n_estimators=100, random_state=None)

In [765]:
ada_boost.score(features_test, labels_test)

0.7651699029126213

In [766]:
ada_boost.score(features_train, labels_train)

0.8568826437678897

## Demo

In [1278]:
import cv2
import numpy as np
import time

# data: https://github.com/opencv/opencv/tree/master/data/haarcascades
# faces, see: https://github.com/opencv/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml
# eyes, see: https://github.com/opencv/opencv/blob/master/data/haarcascades/haarcascade_eye.xml
face_cascade = cv2.CascadeClassifier('./faces_data/face.xml')
my_image = None

size = 50

cap = cv2.VideoCapture(0)
is_running = True
while True:
    _, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # looks for faces within the image
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    w = 10 #30
    h = 10 #50
    for (x, y, width, height) in faces:
        #print("x: {}, y: {}, width: {}, height: {}".format(x, y, width, height))
        # draws a rectangle around the detected face
        cv2.rectangle(frame, (x - w, y - h), (x + width + w, y + height + h), (255, 0, 0), 5)
        face_gray = gray[(y - h): y + height + h, (x - w): x + width + w]

        im_height = face_gray.shape[0]
        im_width = face_gray.shape[1]
        if cv2.waitKey(1) == ord('q'):
            #cv2.imwrite("aaa.jpg", face_gray)
            resized = cv2.resize(face_gray, dsize=RESIZE_SHAPE, interpolation=cv2.INTER_CUBIC)
            my_image = resized
            #cv2.imwrite("bbbq.jpg", resized)
            time.sleep(2)
            #print((np.ravel(face_gray)))
            is_running = False
            break

    cv2.imshow('frame', frame)
    #print(frame.shape)

    if not is_running:
        break

cap.release()
cv2.destroyAllWindows()


In [1279]:
my_image = cv2.imread('./faces_data/fem_n2.JPG', cv2.IMREAD_GRAYSCALE)
my_image = cv2.resize(my_image, dsize=RESIZE_SHAPE)
my_image.shape

# my_image = cv2.imread('./faces_data/gray_3.jpeg', cv2.IMREAD_GRAYSCALE)
# img.shape
# img

(40, 40)

In [1280]:
#cv2.imwrite('./fem_n3.jpg', my_image)
img = my_image.ravel()
print(img[:100])
print(img.shape)



[ 80 113 106 212 129 102 110 106 133  92  81 109  77 118  85 132 181 143
 253 232 114 112 186 173  92 172 202 121 103 201  96 113 129 104 125  90
 112 128 153  89  86  60  88  93 143 123 212 168 115 116 104 109 125 135
 125 121 149 199  74 130 143 229 111 120 140 162 155 192 103 178 114 162
  48  81  57  93  93  87 111 109  62  91  41  93 106  77 124 116 132 106
 178 111  62 125  89 121 159 147 159 250]
(1600,)


In [1306]:
#cv2.imwrite('./test_img.jpg', my_image)
row = extract_features('./faces_data/fem_5.jpeg', FEMALE_CLASS)
img = row[:RESIZE_SHAPE[0]*RESIZE_SHAPE[1]]
img

array([ 36,  29,  28, ...,  70,  44, 146])

In [1307]:
# img = cv2.imread('./faces_data/male_d1.jpg', 0)
# img = row[:RESIZE_SHAPE[0]*RESIZE_SHAPE[1]]
# img = np.ravel(img)
# img

In [1308]:
img = pca.transform([img])
img.shape

(1, 200)

In [1309]:
img = scaler.transform(img)

In [1310]:

result_1 = grid_search_logregr.best_estimator_.predict_proba(img)
result_1

array([[0.92625755, 0.07374245]])

In [1311]:
result_2 = ada_boost.predict_proba(img)
result_2

array([[0.50870257, 0.49129743]])

In [1312]:
result_3 = forest.predict_proba(img)
result_3

array([[0.52505932, 0.47494068]])

In [1313]:
result_3 = linear_svm.predict(img)
result_3

array([0.])

In [1314]:
result_4 = grid_search_linear_svm.predict(img)
result_4

array([0.])

In [1315]:
#X_transformed

In [1316]:
logistic_regression.predict_proba(img)

array([[0.98442426, 0.01557574]])