In [15]:
import numpy as np
from numpy import genfromtxt

from scipy import *
from scipy.linalg import norm, pinv, det
from scipy.optimize import minimize
import csv
import cvxopt as opt
from cvxopt import matrix, solvers

import matplotlib.pyplot as plt

from PIL import Image
import matplotlib.image as mping

import pandas as pd
from __future__ import division


In [16]:
#Read the data 
Xtr = genfromtxt('Xtr.csv', delimiter = ',')
d = Xtr.shape[1]

Xtr = np.delete(Xtr, (d-1), axis = 1)

Ytr = pd.read_csv('Ytr.csv')
Ytr = np.array(Ytr)
Ytr = np.delete(Ytr, (0), axis = 1)

#Xtr is a 5000 * 3072 dimensional matrix

#Ytr is a 5000*1 dimensional matrix



In [17]:
#This function crops the image and detects the shape of the target object 

def Crop(imagecol):
    imagecol = np.reshape(imagecol,(3,32,32))
    image_bw = (0.21*imagecol[0,:,:]+0.72*imagecol[1,:,:]+ 0.07*imagecol[2,:,:])
    first_channel_kept=[]
    second_channel_kept=[]
    third_channel_kept=[]
    
    m=mean(image_bw)
    diffpos=(image_bw.max()-m)* 0.05
    diffneg=(m-image_bw.min())* 0.05
    threspos=m+diffpos
    thresneg=m-diffneg
    for line in range(image_bw.shape[0]):
        
        select_indices = np.argwhere((image_bw[line,:] > threspos) | (image_bw[line,:]<thresneg ))
        #print(len(select_indices))
        if(len(select_indices)>1):
            firstIndex=select_indices[0][0]
            secondIndex=min(select_indices[-1][0]+1,32)
            first_channel_kept=np.concatenate([first_channel_kept,imagecol[0,line,firstIndex:secondIndex]])
            second_channel_kept=np.concatenate([second_channel_kept,imagecol[1,line,firstIndex:secondIndex]])
            third_channel_kept=np.concatenate([third_channel_kept,imagecol[2,line,firstIndex:secondIndex]])
   
    
    cropped_image_col=np.zeros((3,first_channel_kept.shape[0]))
    #[h,s,v]=rgb_to_hsv(first_channel_kept,second_channel_kept,third_channel_kept)
    cropped_image_col[0]=first_channel_kept
    cropped_image_col[1]=second_channel_kept
    cropped_image_col[2]=third_channel_kept
    
    return cropped_image_col

In [18]:
#The block to display images

def displayImage(image):
    plt.imshow(image,cmap='Greys')
    plt.show()

In [19]:
#Preprocessing images
#Code to tranform images into 16*16*16 dimensional histograms


def Compute_Histogram(Im, nbBins, X):
    
    Xtilde = np.reshape(Xtr, (5000, 3, 1024))
    
    Image = Im
    R = Image[0,:]
    G = Image[1,:]
    B = Image[2,:]
    
    
    minR = np.min(Xtilde[:,0,:])
    minG = np.min(Xtilde[:,1,:])
    minB = np.min(Xtilde[:,2,:])

    maxR = np.max(Xtilde[:,0,:])
    maxG = np.max(Xtilde[:,1,:])
    maxB = np.max(Xtilde[:,2,:])
    
    LR = np.linspace(minR, maxR, nbBins)
    LG = np.linspace(minG, maxG, nbBins)
    LB = np.linspace(minB, maxB, nbBins)
    
    
    h = np.zeros((nbBins,nbBins,nbBins))
    
    nbpixel = 0
    for i in range(nbBins-1):
        for j in range(nbBins-1):
            for k in range(nbBins-1):
                h[i,j,k] = (np.where((R>=LR[i]) & (R < LR[i+1]) & (G>=LG[j]) & (G < LG[j+1]) &(B>=LB[k]) & (B < LB[k+1]))[0]).shape[0]
                nbpixel += h[i,j,k] 
    h = h/nbpixel
    h = np.reshape(h, nbBins**3)
    return h


In [20]:
def Quadratic_Kernel(x,y):
    return  (np.dot(x,y)+1)**2

    
    
def KernelComputation(x):
    K = np.zeros((x.shape[0],x.shape[0]))
    for line in range(x.shape[0]):
        for col in range(x.shape[0]):
            if (line<=col):
                value= Quadratic_Kernel(x[line,:],x[col,:])
                K[col,line]=value
                K[line,col]=value
    return K

In [21]:
#I mmodified a bit th Quadratic_Kernel to test the efficiency of a polynomial kernel. 
#Clearly we should better use a polynomial kernel with a degree greater or equal than 2
#Return x (images) and y corresponding to label1 and label2 
def Fill_x_y_label(X, label1,label2, K):

    x1=X[np.where(Ytr==label1)[0],:]
    x2=X[np.where(Ytr==label2)[0],:]
    x=np.concatenate([x1,x2])
    
    KTrain = K[np.concatenate([np.where(Ytr==label1)[0],np.where(Ytr==label2)[0]]),:]
    KTrain = KTrain[:,np.concatenate([np.where(Ytr==label1)[0],np.where(Ytr==label2)[0]])]
    
    y1=np.ones(x1.shape[0])
    y2=-1*np.ones(x2.shape[0])
    y=np.concatenate([y1,y2])
    return x,y,KTrain;

In [22]:
#Optimization model
def Training_model(y, Kernel, Lambda):
    P = matrix(2*Kernel)
    q = matrix(-2*y)
    G = matrix(np.concatenate(((-1)*np.diag(y),np.diag(y))), tc='d')
    h = matrix(np.concatenate([np.zeros(y.shape), (1/(2*float(Lambda*y.shape[0])))*np.ones(y.shape)]), tc='d')
    A = matrix(np.ones(y.shape[0]),(1,y.shape[0]))
    b = matrix(0.0)
    sol = solvers.qp(P,q,G,h,A,b)
    alpha = sol['x']
    print sol['status']
    alpha = np.array(alpha)
    return alpha


#Compute the new obtained label
def SVM_function(alpha, xtrain ,x_querry):
    value = 0
    for i in range(alpha.shape[0]):
        Current_Image=xtrain[i,:]
        value += alpha[i]*Quadratic_Kernel(x_querry,Current_Image)
    return value


In [23]:
#Block to perform the optimization with the Quadratic_Kernel

#Just a test for the one vs one classifier 

def Train_Model(Histogram, Lambda):
    AlphaClassifiers = np.zeros((NbLabel,NbLabel,Histogram.shape[0]))
    b = np.zeros((NbLabel,NbLabel))
    K = KernelComputation(Histogram)
    for label1 in range(NbLabel):
        for label2 in range(NbLabel):
            if(label2 > label1):
                xTrain, yTrain, KTrain = Fill_x_y_label(Histogram, label1,label2,K)
                alpha  = Training_model(yTrain, KTrain, Lambda)
                count =0
                for i in range(alpha.shape[0]):
                    if((alpha[i]*yTrain[i] > 0) and (alpha[i]*yTrain[i] < (1/(2*Lambda*yTrain.shape[0])))):
                        count +=1
                        b[label1,label2] += yTrain[i]
                        for j in range(alpha.shape[0]):
                            b[label1,label2] -= alpha[j]*Quadratic_Kernel(xTrain[i,:],xTrain[j,:])
                b[label1,label2]=b[label1,label2]/count
                AlphaClassifiers[label1, label2, :] = alpha[:,0]
    return AlphaClassifiers, b, K

In [24]:
def FindLabels(Histogram, Image,threshold, K, AlphaClassifiers,b):
    delta = np.zeros((NbLabel,1))
    for label1 in range(NbLabel):
        for label2 in range(NbLabel):
            if(label2 > label1):
                xTrain, yTrain, KTrain = Fill_x_y_label(Histogram, label1,label2, K)
                value = SVM_function(AlphaClassifiers[label1,label2,:], xTrain, Image) + b[label1,label2]
                if value > 0:
                    delta[label1,0] += 1
                if value < 0:
                    delta[label2,0] += 1
                    
    firstlabel = np.argmax(delta[:,0])
    bestClass = np.argwhere(delta[:,0] >= threshold * (NbLabel-1))
    bestClass = bestClass[:,0]
    while bestClass.shape[0] > 1:
        delta = np.zeros((NbLabel,1))
        for i in bestClass:
            for j in bestClass:
                if(i < j):
                    xTrain, yTrain, KTrain = Fill_x_y_label(Histogram, label1,label2, K)
                    value = SVM_function(AlphaClassifiers[i,j,:], xTrain, Image) + b[i,j]
                    if value > 0:
                        delta[i,0] += 1
                    else:
                        delta[j,0] += 1
        
        bestClass = np.argwhere(delta[:,0] >= threshold * (bestClass.shape[0]-1))
        bestClass = bestClass[:,0]
    if(bestClass.shape[0] == 0):
        labelFound = firstlabel
    else:
        labelFound = bestClass[0]
    return labelFound
            

In [25]:
Xte = genfromtxt('Xte.csv', delimiter = ',')
d = Xte.shape[1]
Xte = np.delete(Xte, (d-1), axis = 1)

In [27]:
def Classify(InputX, InputY, InputTest, nbBins, Lambda, threshold):
    n = InputX.shape[0]
    InputHistogram = np.zeros((n,nbBins**3))
    
    for i in range(n):
        Im = InputX[i,:]
        Im = np.reshape(Im,(3,1024))
        InputHistogram[i,:] = Compute_Histogram(Im, nbBins,InputX)
    AlphaClassifiers, b, K = Train_Model(InputHistogram, Lambda)
    
    TestHistogram = np.zeros((n,nbBins**3))
    for i in range(InputTest.shape[0]):
        Im = InputTest[i,:]
        Im = np.reshape(Im,(3,1024))
        TestHistogram[i,:] = Compute_Histogram(Im, nbBins,InputTest)
        label_found = FindLabels(InputHistogram, Im, threshold, K, AlphaClassifiers,b)
        Yte[i] = label_found
        
    
    Yte = np.zeros(Xte.shape[0])
    with open('test.csv', 'wb') as f:
        writer = csv.writer(f)
        writer.writerow(['Id','Prediction'])
        i=0
        for val in Yte:
            writer.writerow([i+1,int(val)])
            i = i+1
    
    

In [None]:
Classify(Xtr, Ytr, Xte, 16, 0.00001, 0.7)