In [1]:
import numpy as np
from matplotlib import pyplot as plt
import random
plt.style.use('classic')
%matplotlib inline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


In [2]:
import sklearn
print(sklearn.__version__)

1.1.1


In [None]:
numpy.__version

In [3]:
seed=1
random.seed(seed)
np.random.seed(seed)

In [4]:
def plot_cross_temporal_decoding(data_array, title='Cross Temporal Decoding', xylines=[]):
    plt.figure(figsize=(6, 6), dpi=60)
    plt.imshow(data_array, origin='lower', vmin=0, vmax=1)
    for xyline in xylines:
        plt.axvline(x=xyline, color='white')
        plt.axhline(y=xyline, color='white')
    plt.title(title)
    plt.colorbar(shrink=0.75)
    plt.xlabel("Test set time window")
    plt.ylabel("Train set time window")
    plt.tight_layout()

In [5]:
def split_train_test(dataset, dataset_labels, train_size=None, test_size=None, features=None):
    """
        Split the given dataset to train and test. Also selecting a specific set of features if given in the input argument.
    """
    split_dataset = {}
    if features is None:
        features = np.arange(dataset.shape[2])
    if train_size is None:
        train_size = int(dataset.shape[0]*2/3)
    if test_size is None:
        test_size = int(dataset.shape[0]*1/3)

    trial_indices = np.arange(dataset.shape[0])
    np.random.shuffle(trial_indices)
    dataset = dataset[trial_indices,:,:]
    dataset_labels = dataset_labels[trial_indices]

    split_dataset['train'] = dataset[:train_size, :, features]
    split_dataset['test'] = dataset[-test_size:, :, features]
    split_dataset['train_labels'] = dataset_labels[:train_size]
    split_dataset['test_labels'] = dataset_labels[-test_size:]
    
    return split_dataset

In [6]:
def compute_performance_LDA(train_set, train_labels, test_set, test_labels, solver_method=None):
    """
        Perform LDA by fitting on the train set and measure the accuracy on the test set.
    """
    if solver_method is None:
        solver_method='svd'
    clf = LinearDiscriminantAnalysis(solver=solver_method)   
    clf.fit(train_set, train_labels)
    score = clf.score(test_set, test_labels)
    return score

In [7]:
time_bin = 5
bin_step = 1
T = 75 # The trial length.
perf_matrix_range = np.arange(0, T-bin_step, bin_step)
perf_matrix_length = len(perf_matrix_range)

# just some plot parameters
time_points = [int(T*(1/15)), int(T*(3/15)), int(T*(7/15)), int(T*(9/15)),  int(T*(13/15))]
xy = []
for time_point in time_points:
     xy.append(np.where(perf_matrix_range==time_point)[0][0])


In [None]:
for solver in ['svd']:
    for random_picks in [50,100,150,200,250,300,350]:
        for train_trials in [50,100,150,200,250,300,350]:
            # set the size of the test trials
            test_trials=50
            print('Random Features '+str(random_picks) +'\nTrained Trials '+str(train_trials) +'\nTested Trials '+str(test_trials))
            # set the matrix with the performance matrices of lda for all versions of data
            all_performances_matrices_lda = []
            for version in np.arange(3):
                print("Data version",version)
                # matrix that keeps the lda scores of all train set time windows to the respective test set time windows
                performance_matrix_lda = np.zeros((perf_matrix_length, perf_matrix_length))
                # Load datasets and their respective labels: shape(trials X time X features)
                data = np.load('trial_data_'+str(version)+".npy")
                data_labels = np.load('trial_labels_'+str(version)+'.npy')
                # pick random features
                random_features = np.random.choice(data.shape[2], random_picks, replace=False)
                # create dataset with train and test sets from random features
                dataset = split_train_test(data, data_labels, features=random_features, train_size=train_trials, test_size=test_trials)

                print("Train set shape:", dataset['train'].shape)
                print("Test set shape:", dataset['test'].shape)
                # iterate over train time bin trials
                for a, j in enumerate(perf_matrix_range):
                    #print(i, i+time_bin)
                    train_set = np.mean(dataset['train'][:,j:j+time_bin,:], axis=1)
                    # iterate over test time bin trials
                    for b, k in enumerate(perf_matrix_range):
                        #print(k, k+time_bin)
                        test_set = np.mean(dataset['test'][:,k:k+time_bin,:], axis=1)
                        # compute the LDA score of the train set on the test set and add it in the respective position in the performance matrix
                        performance_matrix_lda[a,b] = compute_performance_LDA(train_set, dataset['train_labels'],
                                                                              test_set, dataset['test_labels'],
                                                                              solver_method=solver)
                # add this version scores in the list with all the matrices
                all_performances_matrices_lda.append(performance_matrix_lda)
            # plot an iamge of the mean performances of all 3 data versions
            plot_cross_temporal_decoding(np.mean(all_performances_matrices_lda, axis=0), str(random_picks)+' Random Picked Features\n'+str(train_trials)+' Trained Trials\n'+str(test_trials)+' Tested Trials\n',
                                                     xylines=xy)
            plt.savefig(solver+'_train-size'+str(train_trials)+'_random-features'+str(random_picks)+'_test-size'+str(test_trials)+'.png', bbox_inches='tight')

Random Features 50
Trained Trials 50
Tested Trials 50
Data version 0
Train set shape: (50, 75, 50)
Test set shape: (50, 75, 50)
Data version 1
Train set shape: (50, 75, 50)
Test set shape: (50, 75, 50)
Data version 2
Train set shape: (50, 75, 50)
Test set shape: (50, 75, 50)
Random Features 50
Trained Trials 100
Tested Trials 50
Data version 0
Train set shape: (100, 75, 50)
Test set shape: (50, 75, 50)
Data version 1
Train set shape: (100, 75, 50)
Test set shape: (50, 75, 50)
Data version 2
Train set shape: (100, 75, 50)
Test set shape: (50, 75, 50)
Random Features 50
Trained Trials 150
Tested Trials 50
Data version 0
Train set shape: (150, 75, 50)
Test set shape: (50, 75, 50)
Data version 1
Train set shape: (150, 75, 50)
Test set shape: (50, 75, 50)
Data version 2
Train set shape: (150, 75, 50)
Test set shape: (50, 75, 50)
Random Features 50
Trained Trials 200
Tested Trials 50
Data version 0
Train set shape: (200, 75, 50)
Test set shape: (50, 75, 50)
Data version 1
Train set shape: (2

In [47]:
data.shape

(400, 75, 400)

In [48]:
data_labels.shape

(400,)