# [CW2](https://www.cs.bris.ac.uk/Teaching/Resources/COMS21202/cw2/cw2.html)
## TODO
- Import train data
- Find features
- Feature selection
- Build classifier
- Show decision boundries
- Create test data
- Classify test data

## Potential features
- Edges
- Corners
- black pixel count
- texture detection using histograms
- Number of shapes (find joined shapes) e.g. 2 shapes likely an i
- width to height ratio of cropped image

**In Fourier Domain space**:
- rectangle at corner differentiate t and s
- calculate total magnitude in arc from centre with particular radius

## Reading list
- [A Fourier Descriptor Based Character Recognition Engine](http://cs.boisestate.edu/~tim/papers/spie-jared.pdf) - includes list of features
- [Character Recognition Using Fourier Descriptors](http://cs.boisestate.edu/~amit/mscs/samples/Jared_Hopkins_Project_Report.pdf) - Very long but good detail + experiments

In [38]:
import re
import glob
import scipy.misc
import numpy as np
import matplotlib.pyplot as plt
from scipy import fftpack
import numpy.fft


%matplotlib inline
# notebook
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = (64.0, 48.0)


In [34]:
# Read our images

# Read image and convert to binary
def load_img(filename):
    img = scipy.misc.imread(filename, flatten=True, mode='L')
    img[img == 255] = 1
    img = np.logical_not(img).astype(int)
    return img

# Show a binary image
def show_img(img):
    fig, ax = plt.subplots()
    ax.imshow(img, cmap='Greys')
    plt.show()
    
# Calc magnitude spectrum
def calc_mag_spec(img):
    f = np.fft.fft2(img)
    fshift = np.fft.fftshift(f)
    return 20*np.log(np.abs(fshift))

In [80]:
class Character():
    def __init__(self, filename, label=None, filename_id=None):
        self.filename = filename
        self.img      = load_img(filename)
        self.spec     = calc_mag_spec(self.img)
        self.label    = label
        self.filename_id = filename_id
        
        if type(self.label) == str:
            self.label = self.label.upper()
        
    def show(self):
        show_img(self.img)
        
    def show_spec(self):
        show_img(self.spec)
        
    def show_with_spec(self):
        fig = plt.figure()
        ax = fig.add_subplot(1, 2, 1)
        plt.imshow(self.img, cmap='Greys')
        ax = fig.add_subplot(1, 2, 2)
        plt.imshow(self.spec, cmap='Greys')
        
        plt.tight_layout()
        plt.show()
        
    def __repr__(self):
        return str(self.label)
        
class Dataset():
    def __init__(self, dataset):
        self.dataset = dataset
    
    def label_is(self, label):
        return Dataset([c for c in self.dataset if c.label == label.upper()])
    
    def show(self):        
        fig = plt.figure()
        for i, c in enumerate(self.dataset, start=1):
            ax = fig.add_subplot(len(self.dataset), 1, i)
            plt.imshow(c.img, cmap='Greys')
            ax.axis('off')
            ax.set_title(c.filename_id)
        
        plt.tight_layout()
        plt.show()
        
    def show_with_spec(self):
        fig = plt.figure()     
        for i, c in enumerate(self.dataset):
            ax = fig.add_subplot(len(self.dataset), 2, i*2+1)
            plt.imshow(c.img, cmap='Greys')
            ax.axis('off')
            ax.set_title(c.filename_id)
            
            ax = fig.add_subplot(len(self.dataset), 2, i*2+2)
            plt.imshow(c.spec, cmap='Greys')
            ax.axis('off')
            ax.set_title('%s magnitude spectrum' % c.filename_id)
            
        plt.tight_layout()
        plt.subplots_adjust(right=0.3)
        plt.show()
    
    def append(self, character):
        self.dataset.append(character)
        return self
    
    def __iter__(self):
        return iter(self.dataset)
    
    def __len__(self):
        return len(self.dataset)
    
    def __contains__(self, x):
        return x in self.dataset
    
    def __setitem__(self, key, value):
        self.dataset[key] = value
        
    def __getitem__(self, key):
        if type(key) == int:
            return self.dataset[key]
        elif type(key) == str:
            if len(key) == 1:
                return self.label_is(key)
            else:
                for c in self.dataset:
                    if c.filename_id == key.upper():
                        return c
                return None
        elif type(key) == slice:
            return Dataset(self.dataset[key])
        else:
            raise Exception('unknown key %s' % key)
    
    def __delitem__(self, key):
        del self.dataset[key]
        
    def __repr__(self):
        return "".join([c.label for c in self.dataset])
    
    

In [None]:
# Load training dataset
def load_data(path):
    dataset = Dataset([])
    for filename in glob.glob(path):
        m = re.search(r"\/([a-z])([0-9]+)\.GIF", filename, re.IGNORECASE)
        label = None
        filename_id = None
        if m:
            label =  m.group(1)
            filename_id = "%s%s" % (label, m.group(2))
        dataset.append(Character(filename, label=label, filename_id=filename_id))
    return dataset

train_dataset = load_data('characters_data/*.GIF')
train_dataset['S1'].show_with_spec()
train_dataset['T1'].show_with_spec()
train_dataset['V1'].show_with_spec()
