In [1]:
# Pandas is used for data manipulation
import pandas as pd
import numpy as np

# Read in data
z_data = pd.read_csv('esem_act_deact_zscore_groups.csv',sep=';',decimal='.')

# Remove irrelevant columns
z_data.drop(list(z_data.filter(regex = '_deact')),axis = 1, inplace = True)
z_data.drop(list(z_data.filter(regex = 'aggr')),axis = 1, inplace = True)

# === drop all columns that are not necessary for learning =====================================
z_data = z_data.drop('scan', axis = 1)
z_data = z_data.drop('trial', axis = 1)
z_data = z_data.drop('snippet', axis = 1)
z_data = z_data.drop('response', axis = 1)

#=== now create the different groups and compute the groupwise mean ============================
grouped = z_data.groupby(['proband', 'task'])
groupedAgg = grouped.aggregate(np.mean)
labels = groupedAgg.index.get_level_values(level='task')
#=== end aggregation ===========================================================================

# Saving feature names for later use
feature_list = list(groupedAgg.columns)
# Convert to numpy array
features = np.array(groupedAgg)
labels = np.array(labels)


numLabels = 3 # num of different labels (comprehension and rest)
sizeTrainSet = 13 # num of participants used for training
sizeTestSet = 4 # num participants used for testing
numParticipants = sizeTrainSet + sizeTestSet

#get the rows excluding the last indexed (i.e., row with the index 35 is the last to include)
training_features = features[0:numLabels*sizeTrainSet]
training_features = np.array(training_features)
testing_features = features[numLabels*sizeTrainSet:numLabels*numParticipants]
testing_features = np.array(testing_features)

training_target = labels[0:numLabels*sizeTrainSet]
training_target = np.array(training_target)
testing_target = labels[numLabels*sizeTrainSet:numLabels*numParticipants]
testing_target = np.array(testing_target)

In [2]:
numLabels*numParticipants

51

In [3]:
print(training_features.shape)
print(testing_features.shape)
print(training_target.shape)
print(testing_target.shape)

(39, 1606)
(12, 1606)
(39,)
(12,)


In [4]:
from tpot import TPOTClassifier
fileNameTPot = 'AllROIs/tpot_mnist_pipeline_CoarseAverageParticipantSplit_AllROIs'

tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, n_jobs=20)
tpot.fit(training_features, training_target)
print(tpot.score(testing_features, testing_target))
tpot.export(fileNameTPot + '.py')

Optimization Progress:  33%|███▎      | 40/120 [05:14<1:46:29, 79.87s/pipeline] 

Generation 1 - Current best internal CV score: 1.0


Optimization Progress:  51%|█████     | 61/120 [10:15<2:23:50, 146.27s/pipeline]

Generation 2 - Current best internal CV score: 1.0


Optimization Progress:  68%|██████▊   | 81/120 [10:55<54:05, 83.21s/pipeline]   

Generation 3 - Current best internal CV score: 1.0


Optimization Progress:  84%|████████▍ | 101/120 [11:49<11:23, 36.00s/pipeline]

Generation 4 - Current best internal CV score: 1.0


                                                                              

Generation 5 - Current best internal CV score: 1.0

Best pipeline: LinearSVC(input_matrix, C=10.0, dual=False, loss=squared_hinge, penalty=l1, tol=0.01)
0.9166666666666666




True

In [5]:
with open(fileNameTPot + '.py') as f:
    content = f.readlines()

cleanedContent = []
for line in content:
    if 'tpot_data' not in line and 'training_target, testing_target' not in line:
        cleanedContent.append(line)

fileForLearning = fileNameTPot + '_cleaned.py'
with open(fileForLearning, 'w') as filehandle:  
    for line in cleanedContent:
        filehandle.write('%s\n' % line)

In [6]:
import pdb
# === 2 to 7 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
#     pdb.set_trace()
    if i <= 2 or i >= 15:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [7]:
print(len(indecesTrain))
print(len(indecesTest))

39
12


In [8]:
%run -i $fileForLearning

In [9]:
finalAccuracy = 0
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [10]:
# === 4 to 9 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 8 or i >= 21:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [11]:
%run -i $fileForLearning

In [12]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [13]:
# === 6 to 11 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 14 or i >= 27:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [14]:
%run -i $fileForLearning

In [15]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [16]:
# === 8 to 13 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 20 or i >= 33:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [17]:
%run -i $fileForLearning

In [18]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [19]:
# === 10 to 15 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 26 or i >= 39:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [20]:
%run -i $fileForLearning

In [21]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [22]:
with open('listOfAccuracies.txt','a+') as f:
    f.write('%s\n' % 'Coarse_AllROIs' + str(finalAccuracy) + '\n')
