In [1]:
# Imports
import numpy as np
import scipy.io as sio
from sklearn import decomposition
from sklearn import preprocessing
from sklearn.pipeline import FeatureUnion
from sklearn.feature_selection import SelectKBest, f_classif, SelectPercentile
from sklearn import linear_model
from sklearn import cross_validation
from sklearn.cross_decomposition import PLSRegression
from sklearn import tree

In [2]:
'''
Import your data 
'''
# Load the .mat files
events_1000 = sio.loadmat('Data/events_1000.mat')
missIdx = sio.loadmat('Data/missIdx.mat')
provideData_1000 = sio.loadmat('Data/provideData_1000.mat')
provideIdx = sio.loadmat('Data/provideIdx.mat')
trainData = sio.loadmat('Data/Train.mat')
testData = sio.loadmat('Data/Test.mat')
yTest = sio.loadmat('Data/Ytest.mat')
# 
events = events_1000.get('events')
missidx = missIdx.get('missIdx')
provideData = provideData_1000.get('provideData')
provideidx = provideIdx.get('provideIdx')

Xtrain = trainData.get('Xtrain')
Yt = trainData.get('Ytrain')
Yt2 = yTest.get('Ytest')
Xtest = testData.get('Xtest')

In [3]:
'''
Get full training data
'''
# Concatenate the Xtrain and Xtest
x = np.vstack((Xtrain, Xtest))
y = np.vstack((Yt, Yt2))
Xtrain = x[:, provideidx[0,0]-1]
Ytrain = x[:, missidx[0,0]-1]

for i in xrange(1, np.shape(x)[1] + 1):
    if( i in provideidx and i != provideidx[0,0] ):
        Xtrain = np.vstack(( Xtrain, x[:,i-1] ))
    elif( i in missidx and i != missidx[0,0] ):
        Ytrain = np.vstack(( Ytrain, x[:,i-1] ))

Xtrain = Xtrain.T
Ytrain = Ytrain.T
print "Xtrain:"
print np.shape(Xtrain)
print "Ytrain:"
print np.shape(Ytrain)

Xtrain:
(1502, 3172)
Ytrain:
(1502, 2731)


In [4]:
'''
Set up dimensionality reduction 
'''
# Parameters
comp = 800
k_ = 850
percentile_ = 10
# PCA
pca = decomposition.PCA(n_components=comp)
# Feature selection
def f_regression(X,Y):
    import sklearn
    return sklearn.feature_selection.f_regression(X,Y,center=False)
selection = SelectKBest(k=k_, score_func=f_regression)
# Percentile selection
class_stuff = SelectPercentile(f_regression, percentile=percentile_)
# Feature Union
#combined_features = FeatureUnion([("pca", pca), ("univ_select", selection), ("class_stuff", class_stuff)])
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

In [5]:
'''
Reduce dimensions of Xtrain
'''
# Scale Xtrain
Xscale = preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True).fit_transform(Xtrain)
# Reduce dimensions
Xpca = combined_features.fit_transform(Xscale, np.ravel(y))
print np.shape(Xpca)

(1502, 1650)


In [None]:
'''
Setup the Lasso Regression Classifier
'''
# Parameters
alpha_ = 1.25e3
max_iter_ = -1
# Setup the parameters for lasso regression
lasso = linear_model.Lasso(alpha=alpha_)

In [None]:
'''
Train the classifier and compute the score
'''
# Train classifier
lasso.fit(Xpca, Ytrain)
# Compute score
print lasso.score(Xpca, Ytrain)
print np.mean(cross_validation.cross_val_score(lasso, Xpca, Ytrain, scoring="mean_squared_error", cv=10))

In [None]:
'''
Test on the test set
'''
# Reduce testX
testPCA = combined_features.transform(provideData)
print np.shape(testPCA)
# Predict
yTest = lasso.predict(testPCA)
print np.shape(yTest)

In [None]:
'''
Setup the Ridge Regression Classifier
'''
# Parameters
alpha_ = 1.25e3
max_iter_ = -1
tol_ = 1e-6
# Setup the parameters for lasso regression
ridge = linear_model.Ridge(alpha=alpha_)

In [None]:
'''
Train the classifier and compute the score
'''
print np.shape(Xpca)
# Train classifier
ridge.fit(Xpca, Ytrain)
# Comput score
print "Score:"
print ridge.score(Xpca, Ytrain)
print "CV:"
print np.mean(cross_validation.cross_val_score(ridge, Xpca, Ytrain, scoring="mean_squared_error", cv=10))

In [None]:
'''
Test on the test set
'''
# Reduce testX
testPCA = combined_features.transform(provideData)
print np.shape(testPCA)
# Predict
yTest2 = ridge.predict(testPCA)
print np.shape(yTest2)

In [None]:
# Get average
yFinal = yTest
for i in xrange( 0, 1000):
    for j in xrange(0, 2731):
        yFinal[i][j] = (yTest[i][j] + yTest2[i][j]) / 2

print np.shape(yFinal)

In [None]:
'''
Print in the CSV file
'''
np.savetxt('prediction.csv', yTest, delimiter=",",fmt='%1.4f' )