In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import scipy
from scipy.stats import pearsonr
from scipy import signal as sig
from scipy.io import loadmat


In [2]:
#open leaderboard data
leaderboard_data = loadmat('..\\leaderboard_data.mat')
raw_training_data = loadmat('..\\raw_training_data.mat')

In [50]:
#glove data for training - per subject
train_dg_s1 = raw_training_data['train_dg'][0][0]
train_dg_s2 = raw_training_data['train_dg'][1][0]
train_dg_s3 = raw_training_data['train_dg'][2][0]

In [64]:
train_dg_s1_downsample = train_dg_s1[::50][:-1]
train_dg_s2_downsample = train_dg_s2[::50][:-1]
train_dg_s3_downsample = train_dg_s3[::50][:-1]

In [5]:
# print(train_dg_s1_downsample.shape)
# print(feats_s1_train.shape)
# plt.scatter(feats_s1_train[:,[0:]], train_dg_s1_downsample)

In [65]:
file_s1 = open("all_feats_s1_train_fx.npy", "rb")
feats_s1_train = np.load(file_s1)
file_s1.close()

file_s2 = open("all_feats_s2_train_fx.npy", "rb")
feats_s2_train = np.load(file_s2)
file_s2.close()

file_s3 = open("all_feats_s3_train_fx.npy", "rb")
feats_s3_train = np.load(file_s3)
file_s3.close()

In [66]:
train_inds = np.random.choice(np.arange(0,len(feats_s1_train)), 4500, replace=False)
selection_mask = np.zeros(len(feats_s1_train))
selection_mask[train_inds] = 1

In [67]:
feats_s1_train_split = feats_s1_train[selection_mask.astype(bool)]
feats_s1_valid_split = feats_s1_train[~selection_mask.astype(bool)]

print(feats_s1_train_split.shape)
print(feats_s1_valid_split.shape)

feats_s2_train_split = feats_s2_train[selection_mask.astype(bool)]
feats_s2_valid_split = feats_s2_train[~selection_mask.astype(bool)]

print(feats_s2_train_split.shape)
print(feats_s2_valid_split.shape)

feats_s3_train_split = feats_s3_train[selection_mask.astype(bool)]
feats_s3_valid_split = feats_s3_train[~selection_mask.astype(bool)]

print(feats_s3_train_split.shape)
print(feats_s3_valid_split.shape)

(4500, 434)
(1499, 434)
(4500, 336)
(1499, 336)
(4500, 448)
(1499, 448)


In [68]:
dg_s1_train_split = train_dg_s1_downsample[selection_mask.astype(bool)]
dg_s1_valid_split = train_dg_s1_downsample[~selection_mask.astype(bool)]

dg_s2_train_split = train_dg_s2_downsample[selection_mask.astype(bool)]
dg_s2_valid_split = train_dg_s2_downsample[~selection_mask.astype(bool)]


dg_s3_train_split = train_dg_s3_downsample[selection_mask.astype(bool)]
dg_s3_valid_split = train_dg_s3_downsample[~selection_mask.astype(bool)]


In [34]:
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor

In [35]:
reg = MLPRegressor(hidden_layer_sizes=[100,200,100], activation='tanh', alpha=1e-5).fit(feats_s1_train_split, dg_s1_train_split)


In [36]:
pred = reg.predict(feats_s1_valid_split)

In [37]:
subj1_corr = []
for i in range(5):
    finger_pred = pred.transpose()[i]
    finger_truth = dg_s1_valid_split.transpose()[i]
    subj1_corr.append(pearsonr(finger_pred, finger_truth)[0])

print(subj1_corr)

[0.1105848999542174, 0.16378143852378532, 0.1902031888729781, 0.16138842421814187, 0.16834463288518295]


In [38]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rfr_reg = RandomForestRegressor(n_estimators=1000).fit(feats_s1_train_split, dg_s1_train_split)


In [41]:

pred = rfr_reg.predict(feats_s1_valid_split)

subj1_corr = []
for i in range(5):
    finger_pred = pred.transpose()[i]
    finger_truth = dg_s1_valid_split.transpose()[i]
    subj1_corr.append(pearsonr(finger_pred, finger_truth)[0])

print(subj1_corr)

[0.3628948124755439, 0.445706219652532, 0.3260761581038143, 0.37348195648853644, 0.3085596314587819]


In [43]:
model_fname = 'subject1_rfr_1000.model'
pickle.dump(rfr_reg, open(model_fname, 'wb'))

In [42]:
leaderboard_data_s1 = leaderboard_data['leaderboard_ecog'][0][0]
print(leaderboard_data_s1.shape)

(147500, 62)


In [44]:
rfr_reg_loaded = pickle.load(open(model_fname, 'rb'))
pred = rfr_reg_loaded.predict(feats_s1_valid_split)

subj1_corr = []
for i in range(5):
    finger_pred = pred.transpose()[i]
    finger_truth = dg_s1_valid_split.transpose()[i]
    subj1_corr.append(pearsonr(finger_pred, finger_truth)[0])

print(subj1_corr)

[0.3628948124755439, 0.445706219652532, 0.3260761581038143, 0.37348195648853644, 0.3085596314587819]


In [53]:
# SUBJECT 2
rfr_reg_s2 = RandomForestRegressor(n_estimators=1000).fit(feats_s2_train_split, dg_s2_train_split)


In [57]:
pred2 = rfr_reg_s2.predict(feats_s2_valid_split)

subj2_corr = []
for i in range(5):
    finger_pred = pred2.transpose()[i]
    finger_truth = dg_s2_valid_split.transpose()[i]
    subj2_corr.append(pearsonr(finger_pred, finger_truth)[0])

print(subj2_corr)

[0.5013905240357175, 0.3952314970935013, 0.44173697171067366, 0.47536595670923487, 0.41999721305827437]


In [59]:
model_fname_s2 = 'subject2_rfr_1000.model'
pickle.dump(rfr_reg_s2, open(model_fname_s2, 'wb'))

In [69]:
rfr_reg_s3 = RandomForestRegressor(n_estimators=1000).fit(feats_s3_train_split, dg_s3_train_split)


In [70]:
pred3 = rfr_reg_s3.predict(feats_s3_valid_split)

subj3_corr = []
for i in range(5):
    finger_pred = pred3.transpose()[i]
    finger_truth = dg_s3_valid_split.transpose()[i]
    subj3_corr.append(pearsonr(finger_pred, finger_truth)[0])

print(subj3_corr)

[0.5606863346069813, 0.4588151028936611, 0.43433989473331025, 0.552828754443724, 0.529255182645821]


In [85]:
pred3_all = rfr_reg_s3.predict(feats_s3_train)

pred3_all_long = []
for row in pred3_all:
    for i in range(50):
        pred3_all_long.append(row)
        
# It's short 50 entries, so add the last row 50 more times
for i in range(50):
    pred3_all_long.append(row)

pred3_all_long = np.array(pred3_all_long)
    
subj3_corr = []
print(pred3_all_long.shape)
print(train_dg_s3)
for i in range(5):
    finger_pred = pred3_all_long.transpose()[i]
    finger_truth = train_dg_s3.transpose()[i]
    print(finger_pred.shape)
    print(finger_truth.shape)
    subj3_corr.append(pearsonr(finger_pred, finger_truth)[0])

print(subj3_corr)

(300000, 5)
[[-0.00501347  0.68820906  2.57802963  0.48578787  0.02674103]
 [-0.00501347  0.68820906  2.57802963  0.48578787  0.02674103]
 [-0.00501347  0.68820906  2.57802963  0.48578787  0.02674103]
 ...
 [-0.24686241 -0.94949675 -0.25215101 -0.45340061 -0.56300163]
 [-0.24658966 -0.94949961 -0.25224686 -0.45382118 -0.56321907]
 [-0.24631977 -0.94950056 -0.25234175 -0.45423698 -0.56343555]]
(300000,)
(300000,)
(300000,)
(300000,)
(300000,)
(300000,)
(300000,)
(300000,)
(300000,)
(300000,)
[0.8978521447564057, 0.8819424136601375, 0.884203991853101, 0.8921962863843796, 0.8916418227539928]


In [71]:
model_fname_s3 = 'subject3_rfr_1000.model'
pickle.dump(rfr_reg_s3, open(model_fname_s3, 'wb'))