# 利用特徵值與標記，做SVM分類監督學習

### 1.匯入特徵值與標記

In [1]:
import os, pickle
DATA_PATH = 'face_recognition\image_data'
# "人臉embedding"的資料
with open('face_recognition\image_data\lfw_emb_features.pkl', 'rb') as emb_features_file:
    emb_features =pickle.load(emb_features_file)

# "人臉embedding"所對應的標籤(label)的資料
with open('face_recognition\image_data\lfw_emb_labels.pkl', 'rb') as emb_lables_file:
    emb_labels =pickle.load(emb_lables_file)

# "標籤(label)對應到人臉名稱的字典的資料
with open('face_recognition\image_data\lfw_emb_labels_dict.pkl', 'rb') as emb_lables_dict_file:
    emb_labels_dict =pickle.load(emb_lables_dict_file)

In [4]:
print(emb_features.shape)
print(len(emb_labels))
print(emb_labels_dict)

(9299, 128)
9299
{0: 'Abdullah_Gul', 1: 'Adrien_Brody', 2: 'Ahmed_Chalabi', 3: 'Ai_Sugiyama', 4: 'Al_Gore', 5: 'Al_Sharpton', 6: 'Alan_Greenspan', 7: 'Alastair_Campbell', 8: 'Albert_Costa', 9: 'Alejandro_Toledo', 10: 'Ali_Naimi', 11: 'Allyson_Felix', 12: 'Alvaro_Uribe', 13: 'Amelia_Vega', 14: 'Amelie_Mauresmo', 15: 'Ana_Guevara', 16: 'Ana_Palacio', 17: 'Andre_Agassi', 18: 'Andy_Roddick', 19: 'Angela_Bassett', 20: 'Angela_Merkel', 21: 'Angelina_Jolie', 22: 'Ann_Veneman', 23: 'Anna_Kournikova', 24: 'Antonio_Banderas', 25: 'Antonio_Palocci', 26: 'Ari_Fleischer', 27: 'Ariel_Sharon', 28: 'Arminio_Fraga', 29: 'Arnold_Schwarzenegger', 30: 'Arnoldo_Aleman', 31: 'Ashanti', 32: 'Atal_Bihari_Vajpayee', 33: 'Ben_Affleck', 34: 'Benazir_Bhutto', 35: 'Benjamin_Netanyahu', 36: 'Bernard_Law', 37: 'Bertie_Ahern', 38: 'Bill_Clinton', 39: 'Bill_Frist', 40: 'Bill_Gates', 41: 'Bill_Graham', 42: 'Bill_McBride', 43: 'Bill_Simon', 44: 'Billy_Crystal', 45: 'Binyamin_Ben-Eliezer', 46: 'Bob_Graham', 47: 'Bob_Hope

### 將資料區分成train_data和test_data

In [5]:
# 準備相關變數
X_train = []; y_train = []
X_test = []; y_test = []

# 保存己經有處理過的人臉label
processed = set()

# 分割訓練資料集與驗證資料集
for (emb_feature, emb_label) in zip(emb_features, emb_labels):
    if emb_label in processed:
        X_train.append(emb_feature)
        y_train.append(emb_label)
    else:
        X_test.append(emb_feature)
        y_test.append(emb_label)
        processed.add(emb_label)

# 結果
print('X_train: {}, y_train: {}'.format(len(X_train), len(y_train)))
print('X_test: {}, y_test: {}'.format(len(X_test), len(y_test)))

X_train: 8759, y_train: 8759
X_test: 540, y_test: 540


### 用LinearSVC來做分類訓練

In [7]:
from sklearn.svm import LinearSVC
# import winsound
# 訓練分類器
print('Training classifier')
linearsvc_classifier = LinearSVC(C=2, multi_class='ovr')

# 進行訓練
linearsvc_classifier.fit(X_train, y_train)

# 使用驗證資料集來檢查準確率
score = linearsvc_classifier.score(X_test, y_test)

# 打印分類器的準確率
print("Validation result: ", score)
# winsound.MessageBeep()

Training classifier
Validation result:  0.9740740740740741


### 將完整model儲存使用

In [14]:
# 產生一個人臉的人名列表，以便辨識後來使用
#class_names = [cls.name.replace('_', ' ') for cls in dataset]
member_list = []
class_names = []
for key in sorted(emb_labels_dict.keys()):
    if emb_labels_dict[key] in member_list:
        class_names.append('Unknown')
#         class_names.append(emb_labels_dict[key].replace('_', ' '))

    else:
#         class_names.append('Unknown')
        class_names.append(emb_labels_dict[key].replace('_', ' '))
print(class_names)

# 保存人臉分類器到檔案系統
with open('face_recognition\model\linearsvm\lfw_svm_classifier.pkl', 'wb') as outfile:
    pickle.dump((linearsvc_classifier, class_names), outfile)  #class_names可用客製鍵值對給對應商家
    
print('Saved classifier model to file lfw_svm_classifier.pkl')

['Abdullah Gul', 'Adrien Brody', 'Ahmed Chalabi', 'Ai Sugiyama', 'Al Gore', 'Al Sharpton', 'Alan Greenspan', 'Alastair Campbell', 'Albert Costa', 'Alejandro Toledo', 'Ali Naimi', 'Allyson Felix', 'Alvaro Uribe', 'Amelia Vega', 'Amelie Mauresmo', 'Ana Guevara', 'Ana Palacio', 'Andre Agassi', 'Andy Roddick', 'Angela Bassett', 'Angela Merkel', 'Angelina Jolie', 'Ann Veneman', 'Anna Kournikova', 'Antonio Banderas', 'Antonio Palocci', 'Ari Fleischer', 'Ariel Sharon', 'Arminio Fraga', 'Arnold Schwarzenegger', 'Arnoldo Aleman', 'Ashanti', 'Atal Bihari Vajpayee', 'Ben Affleck', 'Benazir Bhutto', 'Benjamin Netanyahu', 'Bernard Law', 'Bertie Ahern', 'Bill Clinton', 'Bill Frist', 'Bill Gates', 'Bill Graham', 'Bill McBride', 'Bill Simon', 'Billy Crystal', 'Binyamin Ben-Eliezer', 'Bob Graham', 'Bob Hope', 'Bob Stoops', 'Boris Becker', 'Brad Johnson', 'Britney Spears', 'Bulent Ecevit', 'Calista Flockhart', 'Cameron Diaz', 'Carla Del Ponte', 'Carlos Menem', 'Carlos Moya', 'Carmen Electra', 'Carrie-An