In [None]:
import util, perceptron, nb, samples
from sklearn.svm import SVC
import numpy as np

In [None]:
DIGIT_DATUM_WIDTH=28
DIGIT_DATUM_HEIGHT=28
FACE_DATUM_WIDTH=60
FACE_DATUM_HEIGHT=70

In [None]:
def basicFeatureExtractorDigit(datum):
  """
  Returns a set of pixel features indicating whether
  each pixel in the provided datum is white (0) or gray/black (1)
  """
  a = datum.getPixels()

  features = util.Counter()
  for x in range(DIGIT_DATUM_WIDTH):
    for y in range(DIGIT_DATUM_HEIGHT):
      if datum.getPixel(x, y) > 0:
        features[(x,y)] = 1
      else:
        features[(x,y)] = 0
  return features

def fourSquareFeatureExtractorDigit(datum):
  features = util.Counter()
  for x in range(0,DIGIT_DATUM_WIDTH,4):
    for y in range(0,DIGIT_DATUM_HEIGHT,4):
      templi = []
      for k in range(4):
        flag = False
        for l in range(4):
          if datum.getPixel(x+k,y+l) > 0:
            features[(x,y)] = 1
            flag = True
            break
          else:
            features[(x,y)] = 0

        if flag == True:
          break
  return features

def basicFeatureExtractorFace(datum):
  """
  Returns a set of pixel features indicating whether
  each pixel in the provided datum is an edge (1) or no edge (0)
  """
  a = datum.getPixels()

  features = util.Counter()
  for x in range(FACE_DATUM_WIDTH):
    for y in range(FACE_DATUM_HEIGHT):
      if datum.getPixel(x, y) > 0:
        features[(x,y)] = 1
      else:
        features[(x,y)] = 0
  return features

def fiveSquareFeatureExtractorFace(datum):
  features = util.Counter()
  for x in range(0,FACE_DATUM_WIDTH,5):
    for y in range(0,FACE_DATUM_HEIGHT,5):
      templi = []
      for k in range(5):
        flag = False
        for l in range(5):
          if datum.getPixel(x+k,y+l) > 0:
            features[(x,y)] = 1
            flag = True
            break
          else:
            features[(x,y)] = 0

        if flag == True:
          break
  return features

In [None]:
ITERATIONS = 10
DATASET = 'digits' # 'digits' or 'faces'
TRAIN_PERCENT = 100
TEST_PERCENT = 100
CLASSIFIER = 'NaiveBayes' # 'Perceptron' or 'NaiveBayes'
UseNewFeatureExtractor = True


In [None]:
classifiers = {'Perceptron': perceptron.Perceptron,
               'NaiveBayes': nb.NaiveBayesClassifier}

features = {'digits': basicFeatureExtractorDigit, 'faces': basicFeatureExtractorFace}

NewFeatures = {'digits': fourSquareFeatureExtractorDigit, 'faces': fiveSquareFeatureExtractorFace}

In [None]:
DATUM_WIDTH = DIGIT_DATUM_WIDTH if DATASET == 'digits' else FACE_DATUM_WIDTH
DATUM_HEIGHT = DIGIT_DATUM_HEIGHT if DATASET == 'digits' else FACE_DATUM_HEIGHT
legalLabels = list(range(10)) if DATASET == 'digits' else list(range(2))
numTraining = 5000 if DATASET == 'digits' else 451
numTest = 1000 if DATASET == 'digits' else 150
numTraining = int(numTraining * TRAIN_PERCENT / 100)
numTest = int(numTest * TEST_PERCENT / 100)
if UseNewFeatureExtractor:
    getFeatures = NewFeatures[DATASET]
else:
    getFeatures = features[DATASET]

In [None]:
DATUM_WIDTH

In [None]:
if(DATASET=="faces"):
    rawTrainingData = samples.loadDataFile("data/facedata/facedatatrain", numTraining,DATUM_WIDTH,DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("data/facedata/facedatatrain", numTest,DATUM_WIDTH,DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", numTest)
    rawTestData = samples.loadDataFile("data/facedata/facedatatest", numTest,DATUM_WIDTH,DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", numTest)
else:
    rawTrainingData = samples.loadDataFile("data/digitdata/trainingimages", numTraining,DATUM_WIDTH,DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("data/digitdata/validationimages", numTest,DATUM_WIDTH,DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("data/digitdata/validationlabels", numTest)
    rawTestData = samples.loadDataFile("data/digitdata/testimages", numTest,DATUM_WIDTH,DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest)

In [None]:
trainingData = list(map(getFeatures, rawTrainingData))
validationData = list(map(getFeatures, rawValidationData))
testData = list(map(getFeatures, rawTestData))

In [None]:
classifier = classifiers[CLASSIFIER](legalLabels)

In [None]:
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print(str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)))

In [None]:
def modifyFeatures(datapoint):
    data = ([[None for i in range(DATUM_WIDTH)] for j in range(DATUM_HEIGHT)])
    for i in range(len(data)):
        for j in range(len(data[i])):
            data[i][j] = datapoint[(i,j)]
    return np.array(data)

In [None]:
newTrainingData = np.array(list(map(modifyFeatures, trainingData)))
newTrainingData.shape
nsamples, nx, ny = newTrainingData.shape
d2_train_dataset = newTrainingData.reshape((nsamples,nx*ny))

In [None]:
newTestData = np.array(list(map(modifyFeatures, testData)))
newTestData.shape
nsamples, nx, ny = newTestData.shape
d2_test_dataset = newTestData.reshape((nsamples,nx*ny))

In [None]:
d2_train_dataset.shape

In [None]:
model = SVC().fit(d2_train_dataset, trainingLabels)

In [None]:
d2_test_dataset[1]

In [None]:
guesses = list(model.predict(d2_test_dataset))

In [None]:
len(guesses)

In [None]:
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print(str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)))