In [1]:
# Imports
import numpy as np
import scipy.io as sio
from sklearn import decomposition
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.pipeline import FeatureUnion
from sklearn import cross_validation
import autosklearn
from sklearn.feature_selection import SelectKBest, f_classif, SelectPercentile
# Import the data
trainData = sio.loadmat('Train.mat')
testData = sio.loadmat('Test.mat')

In [2]:
# Get the values of the train data
Xtrain = trainData.get('Xtrain')
Ytrain = trainData.get('Ytrain')
eventsTrain = trainData.get('eventsTrain')
subjectsTrain = trainData.get('subjectsTrain')
x = trainData.get('x')
y = trainData.get('y')
z = trainData.get('z')

# Get the values of the test data
# Get the test data into a numpy array
testX = testData.get('Xtest')
eventsTest = testData.get('eventsTest')
subjectsTest = testData.get('subjectsTest')
testX = np.array( testX, np.float32)

In [3]:
# Set up dictionary for training and test subjects
trainDict = {}
testDict = {}
for i in xrange(1, max(subjectsTrain)+1):
    trainDict[i] = []
    testDict[i] = []

In [4]:
# Get all the indices for each subject
# Training subjects first
for i in xrange(0, len(subjectsTrain)):
    value = int(subjectsTrain[i])
    trainDict[value].append(i)

# Now test subjects
for i in xrange(0, len(subjectsTest)):
    value = int(subjectsTest[i])
    testDict[value].append(i)

In [5]:
# Set up the feature transforms
# Use PCA to reduce the dimensionality
comp = 453 # number of components
cw = {}
cw[1] = 1
cw[0] = 1
cw[3] = 1
pca = decomposition.PCA(n_components=comp)

selection = SelectKBest(k=180)
class_stuff = SelectPercentile(f_classif, percentile = 10)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection),("class_stuff",class_stuff)])

Xscale = preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True).fit_transform(Xtrain)
Xpca = combined_features.fit(Xtrain, np.ravel(Ytrain)).transform(Xtrain)

In [6]:
# Set up the classifier dictionary
classDict = {}
for i in xrange(1, max(subjectsTrain)+1):
    classDict[i]= SVC(C = 100, cache_size=200, coef0=0.0, gamma = 0.0001,
          degree=3, kernel='rbf', max_iter=-1, class_weight  = cw,
          probability=True,random_state=None, shrinking=True, 
          tol=0.0001, verbose=False)

In [7]:
# Get the values for each subject, concatenate to the
# numpy array
# Train each of the classifiers
for i in xrange(1, max(subjectsTrain)+1):
    # Do for each subject
    X = np.array()
    Y = np.array()
    for k in xrange(0, 5):
        for index in trainDict[i]: 
            # Loop over each indices in the array at dict 
            # key i
            X = np.vstack((X, Xpca[index,:]))
            Y = np.vstack((Y, Ytrain[index,:]))
    classDict[i].fit(X, np.ravel(Y))

In [8]:
# Scale and feature select the test data
testX = preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True).fit_transform(testX)
testNew =  combined_features.transform(testX)
testY = np.zeros(1001)

In [9]:
# Get the test vectors and then test each one
for i in xrange(1, max(subjectsTest)+1):
    # Set up the numpy arrays
    Y = []
    Y = np.array(Y, np.float32)
    X = testNew
    # Get the test data from each subject
    for index in testDict[i]:
        X = np.vstack((X, testNew[index,:]))
    X = X[len(X)-len(testDict[i]):,:]
    # Now test on the classifiers if not zero
    if( len(testDict[i]) != 0):
        Y = classDict[i].predict(X)
        # Put in the right index of testY
        for j in xrange(0, len(testDict[i])):
            testY[testDict[i][j]] = Y[j] 

In [None]:
print testY

In [None]:
# Get the probabilities
new = np.zeros((1001,3))
for i in xrange(0, 1001):
    if( testY[i] == 0 ):
        new[i,:] = [1.0000, 0.0000, 0.0000]
    elif( testY[i] == 1 ):
        new[i,:] = [0.0000, 1.0000, 0.0000]
    else:
        new[i,:] = [0.0000, 0.0000, 1.0000]

In [None]:
# Put into CSV file
np.savetxt('prediction.csv', new, delimiter=",")