In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from numpy import genfromtxt
import pywt
from datetime import time, datetime, timedelta
import random
import itertools
import io
import json
import cvxopt

from classification import *
from kernel import *

In [3]:
Xtr = genfromtxt('../data/Xtr.csv', delimiter=',')
Ytr = genfromtxt('../data/Ytr.csv', delimiter=',')
Xte = genfromtxt('../data/Xte.csv', delimiter=',')

Xtr = np.delete(Xtr, 3072, axis=1)
Xte = np.delete(Xte, 3072, axis=1)
Ytr = Ytr[1:,1]
N = len(Ytr)

## Test new SVM

In [4]:
#features_functions = [fourier_modulus_1D_kernel, fourier_modulus_2D_kernel, fourier_phase_2D_kernel, wavelet_transform]
#lambdas = [10, 1, 0.1, 0.01]
#features_functions = [fourier_modulus_2D_kernel]
features_functions = [scattering_with_haar_wavelets]
lambdas = [1.0, 0.1, 0.01, 0.001]
submission = False

results_cross_val = {}

for feature_function in features_functions:
    Xtr_t = feature_function(Xtr)
    for _lambda in lambdas:
        results_cross_val[(feature_function, _lambda)] = []
        for i in range(5):
            
            if submission:
                Xte_t = feature_function(Xte)
                features = Xtr_t.T
                labels = Ytr
                X = Xte_t.T
            else:
                mask_test = random.sample(range(0, 5000), 1000)
                mask_train =  [i for i in range(5000) if i not in mask_test]

                Xtr_train = Xtr_t[mask_train, :].T
                Xtr_test = Xtr_t[mask_test, :].T
                Ytr_train = Ytr[mask_train]
                Ytr_test = Ytr[mask_test]
                
                features = Xtr_train
                labels = Ytr_train
                X = Xtr_test

            t1 = datetime.now()
            alphas, bias = one_versus_all_SVM(features, labels, _lambda=_lambda)
            print 'model fitted'
            prediction = predict_SVM(alphas, bias, features, X)
            t2 = datetime.now()
            print t2-t1
            
            if submission == False:
                well_classified = 0
                for i in range(len(prediction)):
                    if prediction[i] == Ytr_test[i]:
                         well_classified+=1
                print 'lambda = ', _lambda, ', good classification rate = ', float(well_classified)/len(Ytr_test)
            else:
                results_cross_val[(feature_function, _lambda)].append(well_classified)

     pcost       dcost       gap    pres   dres
 0: -4.0358e+02 -4.0693e+01  3e+04  2e+02  2e-14
 1: -3.6936e+01 -1.4022e+01  2e+03  9e+00  2e-14
 2: -3.8952e+00 -1.0685e+01  1e+02  6e-01  3e-15
 3: -1.1007e+00 -8.2677e+00  1e+01  3e-02  3e-15
 4: -9.3241e-01 -1.9981e+00  1e+00  1e-03  6e-15
 5: -9.2997e-01 -1.0623e+00  1e-01  2e-04  2e-15
 6: -9.3152e-01 -1.0560e+00  1e-01  1e-04  2e-15
 7: -9.3546e-01 -1.0013e+00  7e-02  4e-05  2e-15
 8: -9.3827e-01 -9.7236e-01  3e-02  2e-05  2e-15
 9: -9.3980e-01 -9.5891e-01  2e-02  7e-06  2e-15
10: -9.4091e-01 -9.4968e-01  9e-03  3e-06  2e-15
11: -9.4142e-01 -9.4634e-01  5e-03  1e-06  2e-15
12: -9.4173e-01 -9.4439e-01  3e-03  4e-07  2e-15
13: -9.4188e-01 -9.4363e-01  2e-03  2e-07  2e-15
14: -9.4204e-01 -9.4286e-01  8e-04  9e-08  2e-15
15: -9.4214e-01 -9.4246e-01  3e-04  1e-08  2e-15
16: -9.4219e-01 -9.4230e-01  1e-04  1e-10  2e-15
17: -9.4221e-01 -9.4225e-01  4e-05  1e-17  2e-15
18: -9.4222e-01 -9.4223e-01  1e-05  3e-17  2e-15
19: -9.4223e-01 -9.42

In [None]:
DF = pd.DataFrame(data=pd.Series(prediction), columns=['Prediction'])
DF.index += 1
DF.to_csv('../data/'+'fourier_modulus_2D_0_1.csv', index=True, index_label='Id', sep=',')

## Toy data for tests

In [None]:
n_samples = 150
mean_1 = [0, -3]
cov = [[3, 0], [0, 3]]
X_1 = np.random.multivariate_normal(mean_1, cov, n_samples).T
mean_2 = [3, 3]
X_2 = np.random.multivariate_normal(mean_2, cov, n_samples).T
mean_3 = [-3, 3]
X_3 = np.random.multivariate_normal(mean_3, cov, n_samples).T
X = np.concatenate((X_1, X_2, X_3), axis = 1)

y = np.concatenate((np.zeros((1,n_samples)), np.ones((1, n_samples)), 2*np.ones((1,n_samples))), axis=1)
y = y[0,:]
Xtr_t = X.T
Ytr = y

mask_test = range(0,450, 5)
mask_train = [i for i in range(450) if i not in mask_test]

Xtr_train = Xtr_t[mask_train, :].T
Xtr_test = Xtr_t[mask_test, :].T
Ytr_train = Ytr[mask_train]
Ytr_test = Ytr[mask_test]