In [1]:
import graphlab
from graphlab import SFrame
from __future__ import division
import numpy

A newer version of GraphLab Create (v1.9) is available! Your current version is v1.8.5.

You can use pip to upgrade the graphlab-create package. For more information see https://dato.com/products/create/upgrade.


### Load the SFrames

In [3]:
NorthEast_sframe = graphlab.load_sframe('data/normalized/northEast_sframe_new/');
South_sframe = graphlab.load_sframe('data/normalized/South_sframe_new/');
North_sframe = graphlab.load_sframe('data/normalized/North_sframe_new/');

print("NorthEast: "+str(len(NorthEast_sframe)));
print("South: "+str(len(South_sframe)));
print("North: "+str(len(North_sframe)));
print(str(len(NorthEast_sframe)+len(South_sframe)+len(North_sframe)) + " images in total");

NorthEast: 188
South: 110
North: 48
346 images in total


In [3]:
data_sframe = NorthEast_sframe.append(South_sframe)
data_sframe = data_sframe.append(North_sframe)

In [4]:
NorthEast_train, NorthEast_test = NorthEast_sframe.random_split(0.8, seed=0)
print(len(NorthEast_train));
print(len(NorthEast_test));

159
29


In [5]:
South_train, South_test = South_sframe.random_split(0.8, seed=0)
print(len(South_train));
print(len(South_test));

95
15


In [6]:
North_train, North_test = North_sframe.random_split(0.8, seed=0)
print(len(North_train));
print(len(North_test));

41
7


In [7]:
train_sframe = NorthEast_train.append(South_train)
train_sframe = train_sframe.append(North_train)

test_sframe = NorthEast_test.append(South_test)
test_sframe = test_sframe.append(North_test)

### Features array

In [8]:
features = ['chin',
 'chin_mouth',
 'eye',
 'eye_inner',
 'eye_outer',
 'eyebrow_inner',
 'eyebrow_outer',
 'mouth',
 'nose',
 'nose_mouth']

# NorthEast Classifier

## SVM

In [9]:
svm_NorthEast = graphlab.svm_classifier.create(train_sframe, features=features, target='NorthEast', validation_set=None, max_iterations=30)

### Accuracy of this model against the NorthEast test set

In [55]:
svm_NorthEast.evaluate(NorthEast_test)

{'accuracy': 1.0, 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 1
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        1        |   29  |
 +--------------+-----------------+-------+
 [1 rows x 3 columns], 'f1_score': 1.0, 'precision': 1.0, 'recall': 1.0}

### Accuracy of this model against the South and North

In [11]:
print("Accuracy against South test set: "+str(svm_NorthEast.evaluate(South_test)['accuracy']))
print("Accuracy against North test set: "+str(svm_NorthEast.evaluate(North_test)['accuracy']))

Accuracy against South test set: 0.666666666667
Accuracy against North test set: 0.714285714286


## Logistic Classifier

In [12]:
logistic_NorthEast = graphlab.logistic_classifier.create(train_sframe, features=features, target='NorthEast', validation_set=None,
                                                      l1_penalty=0, l2_penalty=0)

### Accuracy of this model against the NorthEast test set

In [49]:
logistic_NorthEast.evaluate(NorthEast_test)['accuracy']

0.9655172413793104

### Accuracy of this model against the South and North test set

In [14]:
print("Accuracy against South test set: "+str(logistic_NorthEast.evaluate(South_test)['accuracy']))
print("Accuracy against North test set: "+str(logistic_NorthEast.evaluate(North_test)['accuracy']))

Accuracy against South test set: 0.866666666667
Accuracy against North test set: 0.857142857143


This is a better model than the one using SVM.

## Neural Net Classifier

In [38]:
neural_NorthEast = graphlab.neuralnet_classifier.create(train_sframe, features=features, target='NorthEast', validation_set=None)

Using network:

### network layers ###
layer[0]: FullConnectionLayer
  init_sigma = 0.01
  init_random = gaussian
  init_bias = 0
  num_hidden_units = 10
layer[1]: SigmoidLayer
layer[2]: FullConnectionLayer
  init_sigma = 0.01
  init_random = gaussian
  init_bias = 0
  num_hidden_units = 2
layer[3]: SoftmaxLayer
### end network layers ###

### network parameters ###
learning_rate = 0.001
momentum = 0.9
### end network parameters ###



### Accuracy of this model against the NorthEast test set

In [52]:
neural_NorthEast.evaluate(NorthEast_test)['accuracy']

1.0

### Accuracy of this model against the South and North test set

In [40]:
print("Accuracy against South test set: "+str(neural_NorthEast.evaluate(South_test)['accuracy']))
print("Accuracy against North test set: "+str(neural_NorthEast.evaluate(North_test)['accuracy']))

Accuracy against South test set: 0.0
Accuracy against North test set: 0.0


This model simply predicts all the examples as NorthEast. Neural nets require tens of thousands of data to train them to a good degree of accuracy.

# North Classifier

## SVM Classifier

In [24]:
svm_North = graphlab.svm_classifier.create(train_sframe, features=features, target='North', validation_set=None, max_iterations=30)

### Accuracy of this model against the North test set

In [25]:
svm_North.evaluate(North_test)['accuracy']

1.0

### Accuracy of this model against the NorthEast and South test set

In [26]:
print("Accuracy against NorthEast test set: "+str(svm_North.evaluate(NorthEast_test)['accuracy']))
print("Accuracy against South test set: "+str(svm_North.evaluate(South_test)['accuracy']))

Accuracy against NorthEast test set: 0.0
Accuracy against South test set: 0.0


As we can see, the SVM Classifier on trained on recognizing North features is not good as it wrongly predicts all the examples in the South and NorthEast test set as Norths. This can be attributed to having only around 38 training samples to train the SVM classifier. Let us now try logistic regression on the North test set

## Logistic Classifier

In [27]:
logistic_North = graphlab.logistic_classifier.create(train_sframe, features=features, target='North', validation_set=None,
                                                       l1_penalty=0, l2_penalty=0)

### Accuracy of this model against the North test set

In [28]:
logistic_North.evaluate(North_test)['accuracy']

0.5714285714285714

### Accuracy of this model against the South and NorthEast test set

In [29]:
print("Accuracy against NorthEast test set: "+str(logistic_North.evaluate(NorthEast_test)['accuracy']))
print("Accuracy against South test set: "+str(logistic_North.evaluate(South_test)['accuracy']))

Accuracy against NorthEast test set: 1.0
Accuracy against South test set: 0.866666666667


As we can see, because of the lack of data, this model does not have a good success rate in predicting Norths. But it does a good job in predicting **non**-Norths

## Neural Net Classifier

In [30]:
neural_North = graphlab.neuralnet_classifier.create(train_sframe, features=features, target='North', validation_set=None)

Using network:

### network layers ###
layer[0]: FullConnectionLayer
  init_sigma = 0.01
  init_random = gaussian
  init_bias = 0
  num_hidden_units = 10
layer[1]: SigmoidLayer
layer[2]: FullConnectionLayer
  init_sigma = 0.01
  init_random = gaussian
  init_bias = 0
  num_hidden_units = 2
layer[3]: SoftmaxLayer
### end network layers ###

### network parameters ###
learning_rate = 0.001
momentum = 0.9
### end network parameters ###



### Accuracy of this model against the North test set

In [46]:
neural_North.evaluate(North_test)

{'accuracy': 0.0, 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 1
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        0        |   7   |
 +--------------+-----------------+-------+
 [1 rows x 3 columns]}

### Accuracy of this model against the South and NorthEast test set

In [45]:
print("Accuracy against NorthEast test set: "+str(neural_North.evaluate(NorthEast_test)['accuracy']))
print("Accuracy against South test set: "+str(neural_North.evaluate(South_test)['accuracy']))

Accuracy against NorthEast test set: 1.0
Accuracy against South test set: 1.0


# South Classifier

## SVM Classifier

In [56]:
svm_South = graphlab.svm_classifier.create(train_sframe, features=features, target='South', validation_set=None, max_iterations=30)

### Accuracy of this model agains the South test set

In [60]:
svm_South.evaluate(NorthEast_test)

{'accuracy': 0.0, 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 1
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        0        |   95  |
 +--------------+-----------------+-------+
 [1 rows x 3 columns], 'f1_score': 0.0, 'precision': None, 'recall': 0.0}

### Accuracy of this model against the North and NorthEast test sets

In [62]:
print("Accuracy against North test set: "+str(svm_South.evaluate(North_test)['accuracy']))
print("Accuracy against NorthEast test set: "+str(svm_South.evaluate(NorthEast_test)['accuracy']))

Accuracy against North test set: 1.0
Accuracy against NorthEast test set: 1.0


## Logistic Classifier

In [9]:
logistic_South = graphlab.logistic_classifier.create(train_sframe, features=features, target='South', validation_set=None,
                                                    l1_penalty=0, l2_penalty=0)

### Accuracy of this model against the South test set

In [10]:
logistic_South.evaluate(South_test)

{'accuracy': 0.6666666666666666, 'auc': 0.0, 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 2
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        1        |   10  |
 |      1       |        0        |   5   |
 +--------------+-----------------+-------+
 [2 rows x 3 columns], 'f1_score': 0.8, 'log_loss': 0.5524594737085454, 'precision': 1.0, 'recall': 0.6666666666666666, 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+-----+-----+----+---+
 | threshold | fpr | tpr | p  | n |
 +-----------+-----+-----+----+---+
 |    0.0    | nan | 1.0 | 15 | 0 |
 |   1e-05   | nan | 1.0 | 15 | 0 |
 |   2e-05   | nan | 1.0 | 15 | 0 |
 |   3e-05   | nan | 1.0 | 15 | 0 |
 |   4e-05   | nan | 1.0 | 15 | 0 |
 |   5e-05   | nan | 1.0 | 15 | 0 |
 |   6e-05   | nan | 1.0 | 15 | 0 |
 |  

### Accuracy of this model against the NorthEast and North test sets

In [11]:
print("Accuracy against North test set: "+str(logistic_South.evaluate(North_test)['accuracy']))
print("Accuracy against NorthEast test set: "+str(logistic_South.evaluate(NorthEast_test)['accuracy']))

Accuracy against North test set: 0.571428571429
Accuracy against NorthEast test set: 1.0
