# Building the Classifier

## 1. For a single region

1.1 Read saved feature matrix and corresponding labels

In [3]:
import pickle
from os.path import join

region = 'borde_rural'
pickle_path = join('..', '..', 'pickles')
with open(join(pickle_path, 'resnet50_feature_matrix_' + region + '.pkl'), 'rb') as f:
    resnet50_feature_matrix = pickle.load(f)
with open(join(pickle_path, 'labels_' + region + '.pkl'), 'rb') as f:
    labels = pickle.load(f)

1.2 Split data into train and validation set

In [4]:
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils.multiclass import unique_labels

In [None]:
features_train, features_test, labels_train, labels_test = train_test_split(resnet50_feature_matrix,
                                                                            labels,
                                                                            test_size=0.33,
                                                                            random_state=43)

1.3. Train a classifier on the training set and save it to disk

In [11]:
clf = svm.SVC(gamma='scale', probability=True, C=100, decision_function_shape='ovr')
clf.fit(features_train, labels_train)

with open(join(pickle_path, 'classifier.pkl' ), 'wb') as f:
    pickle.dump(clf, f)

1.4. Predict labels on the validation set according to the classifier

In [12]:
predicted_labels = clf.predict(features_test)

1.5. Calculate and plot (to do) the confusion matrix

In [13]:
cm = confusion_matrix(labels_test, predicted_labels)
print(cm)

[[681  75   1   0   0]
 [121 361   2  10   0]
 [  8  12  61   6   0]
 [  6  52  10  29   0]
 [  1   1   0   0   0]]


In [15]:
pred_probas = clf.predict_proba(features_test)

for i in range(len(predicted_labels)):
    print(pred_probas[i], labels_test[i])

[2.08885912e-01 7.89880724e-01 7.74264701e-05 6.23981860e-04
 5.31955225e-04] 0
[1.08218083e-01 8.86040970e-01 1.30118517e-04 4.15729137e-03
 1.45353738e-03] 1
[1.00702050e-01 8.70998548e-01 9.15605206e-04 2.72057601e-02
 1.78037037e-04] 1
[9.51833340e-01 4.66231019e-02 1.17158046e-04 5.30691157e-04
 8.95708845e-04] 0
[0.09360227 0.2310036  0.39233451 0.27256401 0.01049562] 0
[9.91296492e-01 8.03717082e-03 9.44983656e-06 6.70078700e-05
 5.89879029e-04] 0
[3.34683954e-01 6.17078929e-01 1.54529678e-02 3.24102492e-02
 3.73899914e-04] 1
[2.08937898e-01 7.83408759e-01 2.23600799e-03 5.25362484e-03
 1.63709524e-04] 1
[0.59911786 0.02289155 0.36711035 0.00958371 0.00129653] 1
[0.01766116 0.88672907 0.01217492 0.08149595 0.0019389 ] 1
[0.29068116 0.42402848 0.05214114 0.22865312 0.00449609] 3
[9.71221525e-01 1.05103207e-02 1.74913152e-03 1.58043263e-02
 7.14696842e-04] 0
[8.87642331e-01 1.06720361e-01 2.92056725e-03 2.59783646e-03
 1.18903722e-04] 0
[9.13558033e-01 8.53572086e-02 2.76048754e-0

 4.47149478e-04] 0
[9.96673867e-01 2.10643995e-03 3.93416367e-05 1.56511712e-04
 1.02383927e-03] 0
[8.26235155e-01 1.70897125e-01 8.00797605e-04 8.75150549e-04
 1.19177129e-03] 0
[0.06091954 0.78301483 0.00624092 0.14851636 0.00130834] 1
[5.68919030e-02 8.74019498e-01 8.17459469e-03 6.06078269e-02
 3.06177345e-04] 1
[0.93413285 0.04962944 0.00100683 0.00140775 0.01382313] 0
[7.70576099e-01 2.25749058e-01 3.31745347e-04 1.99313403e-03
 1.34996355e-03] 0
[0.9099777  0.06967552 0.01768083 0.00165156 0.00101438] 0
[9.95758049e-01 3.63491697e-03 2.52149794e-05 1.83626492e-05
 5.63456595e-04] 0
[0.18815285 0.1886653  0.38207463 0.22941459 0.01169262] 2
[9.82785065e-01 8.02355754e-03 5.03474967e-04 6.66781555e-04
 8.02112141e-03] 0
[3.82818205e-01 6.11684238e-01 1.07223308e-03 4.29333397e-03
 1.31989731e-04] 1
[0.00596228 0.15076215 0.08507413 0.7560054  0.00219603] 2
[0.71269662 0.27376961 0.00229392 0.00854565 0.0026942 ] 1
[0.22392417 0.7466972  0.00756133 0.02007301 0.00174429] 1
[0.83115

 3.56512503e-04] 1
[2.36600267e-02 1.93940988e-02 8.17400773e-01 1.38936759e-01
 6.08341831e-04] 3
[0.77149986 0.18584466 0.00210484 0.01048322 0.03006742] 0
[9.90579976e-01 8.56206335e-03 1.81727810e-05 7.72407436e-05
 7.62546700e-04] 0
[9.88412702e-01 7.48590066e-03 2.42969564e-03 1.51210989e-03
 1.59592066e-04] 0
[9.60990080e-01 3.53478614e-02 3.01758489e-04 5.50144062e-04
 2.81015634e-03] 0
[9.37597326e-01 6.12947558e-02 1.69001784e-04 2.73568970e-04
 6.65347776e-04] 0
[9.77329239e-01 1.88229900e-02 6.75191061e-05 2.90082240e-03
 8.79429857e-04] 0
[0.37846815 0.34198275 0.20217514 0.0691078  0.00826616] 0
[3.15616408e-02 9.54936041e-01 3.27621817e-04 1.28947045e-02
 2.79992336e-04] 1
[9.70354412e-01 2.88703504e-02 1.35259242e-04 5.46383476e-04
 9.35947273e-05] 0
[9.77815634e-01 2.04321389e-02 1.49767386e-03 6.66453277e-05
 1.87908293e-04] 0
[5.14321489e-01 4.76194156e-01 3.19308770e-03 6.09924066e-03
 1.92026424e-04] 0
[1.20467317e-01 8.58905091e-01 2.94784279e-04 2.00253665e-02
 3

[8.84267225e-01 1.11230872e-01 1.57445586e-03 2.15631188e-03
 7.71134634e-04] 0
[9.48634125e-01 4.84648448e-02 3.65872198e-04 1.03811385e-03
 1.49704405e-03] 0
[1.47664679e-02 9.62817500e-01 3.12815577e-04 2.20143531e-02
 8.88630032e-05] 1
[9.92189677e-01 5.37721617e-03 1.85620158e-04 1.04456062e-04
 2.14303108e-03] 0
[0.75080294 0.1425379  0.06015677 0.03442518 0.0120772 ] 0
[0.11463023 0.82686679 0.0025989  0.05405874 0.00184533] 1
[0.53649926 0.36129502 0.01794699 0.08055569 0.00370304] 1
[9.59418582e-01 2.97245067e-02 5.75586443e-04 7.64002845e-03
 2.64129641e-03] 0
[0.85742974 0.12173648 0.00090331 0.00845062 0.01147985] 1
[9.71806735e-01 2.27540190e-02 1.48048453e-04 2.51602843e-05
 5.26603736e-03] 0
[9.58589995e-04 6.39374591e-04 9.91539515e-01 3.88576147e-03
 2.97675873e-03] 2
[0.52806084 0.4244158  0.00633838 0.03877831 0.00240666] 0
[8.93910736e-01 1.04656596e-01 5.03929996e-05 1.06890498e-03
 3.13370046e-04] 0
[1.65608822e-02 1.29533306e-01 7.03539110e-04 8.51801093e-01
 1.4

[0.91700332 0.029551   0.03406059 0.01248394 0.00690114] 0
[7.89473213e-01 2.00026956e-01 7.62958452e-04 8.58933744e-03
 1.14753437e-03] 0
[7.75234779e-01 2.23823486e-01 1.55749182e-04 6.43917632e-04
 1.42068250e-04] 0
[5.46922005e-02 8.57019168e-01 1.11268276e-03 8.70778063e-02
 9.81427880e-05] 1
[9.71151396e-01 2.53442811e-02 2.33928150e-04 2.58004312e-04
 3.01239033e-03] 0
[9.96870921e-01 2.71260502e-03 1.81524361e-05 6.12110520e-05
 3.37110636e-04] 0
[9.20942534e-01 7.71661588e-02 2.99071015e-04 6.13267209e-04
 9.78968557e-04] 1
[0.53462507 0.44967655 0.00256775 0.00916703 0.0039636 ] 0
[9.72589203e-01 2.15437250e-02 1.34472279e-03 4.47529350e-03
 4.70552991e-05] 1
[0.11944022 0.47794607 0.21644161 0.18144178 0.00473032] 1
[0.77051487 0.21129199 0.01035607 0.00425161 0.00358547] 0
[0.10079875 0.60822095 0.02790135 0.26115646 0.00192249] 1
[7.36082071e-01 2.58994045e-01 8.08482800e-04 3.77407339e-03
 3.41328366e-04] 1
[0.32234847 0.15382842 0.12291375 0.38846935 0.01244001] 3
[0.118

6. Load all feature matrices into one and train the classifier

In [5]:
regions = ['borde_rural', 'borde_soacha', 'mixco_1_and_ebenezer', 'mixco_3']

In [6]:
global_feature_list = []
label_list = []
import numpy

for region in regions:
    with open(join(pickle_path, 'resnet50_feature_matrix_' + region + '.pkl'), 'rb') as f:
        resnet50_feature_matrix = pickle.load(f)
        global_feature_list.append(resnet50_feature_matrix)
    with open(join(pickle_path, 'labels_' + region + '.pkl'), 'rb') as f:
        labels = pickle.load(f)
        label_list.append(labels)
        
global_feature_matrix = numpy.concatenate(global_feature_list, axis=0)
gloabal_labels = numpy.concatenate(label_list, axis=0)

In [7]:
global_feature_matrix.shape

(13849, 2048)

In [8]:
gloabal_labels.shape

(13849,)

In [9]:
clf = svm.SVC(gamma='scale', probability=True, C=100, decision_function_shape='ovr')
clf.fit(global_feature_matrix, gloabal_labels)

with open(join(pickle_path, 'classifier_global.pkl' ), 'wb') as f:
    pickle.dump(clf, f)