In [30]:
import os 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.image as Image
import cv2
import time
from sklearn.decomposition import PCA
from skimage.measure import block_reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score


# Data import & Pre-processing --------------------------------------------------------------------

In [71]:
# Gets paths for images 
def get_catalog(file):
    species = []
    pics = []
    for root, subdir, files in os.walk(file):
        species.append(subdir)
        pics.append(files)
    species = species[0]
    
    catalog = {}
    for i in range(len(species)):
        catalog[species[i]] = pics[i+1]
    return catalog

In [72]:
def get_pics_colors(catalog, path, species_index, pic_index):
    count = 1
    new_catalog = {}
    for specie in catalog:
        blues, greens, reds = [], [], []
        for i in range(pic_index):
            colors = []
            blue, green, red = cv2.split(cv2.cvtColor(cv2.imread(path + specie + '/' + catalog[specie][i]), cv2.COLOR_BGR2RGB))
            res1 = cv2.resize(blue, dsize=(98, 64), interpolation=cv2.INTER_NEAREST)
            res2 = cv2.resize(green, dsize=(98, 64), interpolation=cv2.INTER_NEAREST)
            res3 = cv2.resize(red, dsize=(98, 64), interpolation=cv2.INTER_NEAREST)
            
            blues.append(res1.flatten())
            greens.append(res2.flatten())
            reds.append(res3.flatten())
            
        new_catalog[specie] = np.array([blues,greens,reds])
        if count == species_index:
            break
        count += 1
        
    return new_catalog
    

In [73]:
#Gets dictionary of pictures in grayscalse from paths
def get_pics_gray(catalog,path, species_index,pic_index):
    count = 1
    new_catalog = {}
    for specie in catalog:
        images =[]
        for i in range(pic_index):
            image = cv2.imread(path + specie + '/' + catalog[specie][i],cv2.IMREAD_GRAYSCALE)
            res = cv2.resize(image, dsize=(98, 64), interpolation=cv2.INTER_NEAREST)
            images.append(res.flatten())
        new_catalog[specie] = np.array(images)
        if count == species_index:
            break
        count += 1
        
    return new_catalog
    

In [74]:
# gets dictionary of pictures in rgb from paths
def get_pics(catalog,path, species_index,pic_index):
    count = 1
    new_catalog = {}
    for specie in catalog:
        images =[]
        for i in range(pic_index):
            image = cv2.imread(path + specie + '/' + catalog[specie][i])
            res = np.array(cv2.resize(image, dsize=(112, 112), interpolation=cv2.INTER_NEAREST))
            images.append(np.reshape(res,(12544,3)))
        new_catalog[specie] = images
        if count == species_index:
            break
        count += 1
        
    return new_catalog

In [75]:
# Plots sample pictures
def plot_sample(catalog, n_species,n_pics,flag):
    count = 1
    for specie in catalog:
        for i in range(n_pics):
            if flag:
                picture = plt.imshow(np.reshape(catalog[specie][i],(64,98)))
            else:
                picture = plt.imshow(np.reshape(catalog[specie][i],(112,112,3)))
            plt.show()
        if count == n_species:
            break
        count += 1
        

In [76]:
# Returns dictionary: species as keys, mxn array with pictures as rows and pixels as columns. 
def import_pics(path, nspec,npic):
    bird_catalog = get_catalog(path)
    #pics = get_pics_gray(bird_catalog,path, nspec, npic)
    pics = get_pics_colors(bird_catalog,path, nspec, npic)
    return pics

In [92]:
# Main function for data preprocessing ----------------------------------------------------------------------------------
seed = 2
np.random.RandomState(seed)


#Declare variables 
train_file, valid_file, test_file = 'data/birds/train/', 'data/birds/valid/', 'data/birds/test/'
num_species_train, num_pics_train = 10, 120
num_species_val, num_pics_val = 3, 5
num_species_test, num_pics_test = 10, 5

#get pictures
train_pictures = import_pics(train_file, num_species_train, num_pics_train)
valid_pictures = import_pics(valid_file, num_species_val, num_pics_val)
test_pictures = import_pics(test_file, num_species_test, num_pics_test)

#plot samples of imported images
#plot_sample(test_pictures,2,2,1)


# Dimensionality Reduction -------------------------------------------------------------------------

In [93]:
def eigendecomposition(images):
    species_values = {}
    for specie, values in images.items():
        color_values = []
        for color in values:
            mean = np.mean(color,axis=0)
            x = (color-mean)/ 255
            U, S, V = np.linalg.svd(x.T)
            color_values.append([U,S,mean])    
        species_values[specie] = color_values
    print("Done eigendecomposing.")
    return species_values

In [94]:
def get_eigenspecies(eigen,n_comp):
    species_eigenvectors = {}
    for spec, vals in  eigen.items():
        color_values = []
        for color in vals:
            U = color[0]
            mean = color[2]
            eigenvectors =  U[:,:n_comp]
            color_values.append((eigenvectors,mean))
        species_eigenvectors[spec] = color_values
    print("Done getting eigenspecies.")
    return species_eigenvectors

In [95]:
def combine_pics(pictures):
    test_images = []
    for spec,test_im in pictures.items():
        if len(test_images) == 0:
            test_images = test_im
        else:
            test_images = np.hstack([test_images,test_im])
    print("Done combining pics.")
    return test_images
    

In [96]:
def get_residuals(eigenspecies,test):
    residuals_matrix = []
    test = test / 255
    matrix_residual = []
    for i in range(3):
        residual_color = []
        for image in test[i]:
            per_species_residual = []
            for specie, metrics in eigenspecies.items():
                pre_image = image - (metrics[i][1]/255)
                residual = np.linalg.norm(pre_image - (metrics[i][0] @ metrics[i][0].T @ pre_image))**2
                per_species_residual.append(residual)
            residual_color.append(per_species_residual)
        matrix_residual.append(residual_color) 
    print("Done getting residuals.")
    return np.array(matrix_residual)

In [97]:
def get_classes(n_test_species,n_test_pics):
    temp_a = np.arange(n_test_species)
    temp_b = np.repeat(temp_a,n_test_pics)
    return np.array(temp_b +1)

In [None]:
start = time.time()
eigenvectors = eigendecomposition(train_pictures)
rgb_eigenspecies = get_eigenspecies(eigenvectors, 48)
combined_test_pics = combine_pics(test_pictures)
residuals = get_residuals(rgb_eigenspecies, combined_test_pics)
end = time.time()
print(end-start)

Done eigendecomposing.
Done getting eigenspecies.
Done combining pics.


In [91]:
l2_residuals = np.sqrt(np.sum(residuals**2,axis=0))
classification = l2_residuals.argmin(axis=1) + 1
true_classes = get_classes(num_species_test, num_pics_test)
print(accuracy_score(true_classes,classification))

0.6


In [78]:
new_residuals = []
for n in residuals:
    weights = (n / np.sum(n,axis=1)[:,None])
    new_residuals.append(weights)
new_residuals = np.array(new_residuals)

minimums = np.argmin(new_residuals,axis=0)
for i in range(minimums.shape[0]):
    for j in range(minimums.shape[1]):
        new_residuals[minimums[i][j],i,j] = 0
new_res = 1- new_residuals

final = []
for n in range(3):
    new = residuals[n] * new_res[n]
    final.append(new)

avg_residuals = np.sum(residuals,axis=0) / 3
classification = avg_residuals.argmin(axis=1) + 1
true_classes = get_classes(num_species_test, num_pics_test)
print(accuracy_score(true_classes,classification))

0.52


In [None]:
def eval_model(eigenspec,test_im,shape, n_test_species, n_test_pics):
    score = []
    principal_comp = []
    counter = np.arange(1,shape[0], 100)
    for i in counter:
        eigenspecies = get_eigenspecies(eigenspec, i)
        residuals = get_residuals(eigenspecies,test_im)
        classification = residuals.argmin(axis=1) + 1
        true_classes = get_classes(n_test_species,n_test_pics)
        score.append(accuracy_score(true_classes,classification))
        principal_comp.append(i)
        print(i," iteration complete.")
    return score, principal_comp
    

# Density Estimation -----------------------------------------------------------------------------------

# Traditional Classification --------------------------------------------------------------------------

# Deep Learning -----------------------------------------------------------------------------------------

In [None]:
for specie in pictures:
    for n in pictures[specie]:
        print(n.shape)