In [1]:
# Pandas is used for data manipulation
import pandas as pd
import numpy as np

# Read in data
z_data = pd.read_csv('fse17_act_deact_zscore_groups.csv',sep=';',decimal='.')

# Remove irrelevant columns
z_data.drop(list(z_data.filter(regex = '_deact')),axis = 1, inplace = True)
z_data.drop(list(z_data.filter(regex = 'aggr')),axis = 1, inplace = True)

# === drop all columns that are not necessary for learning =====================================
z_data = z_data.drop('scan', axis = 1)
z_data = z_data.drop('trial', axis = 1)
z_data = z_data.drop('snippet', axis = 1)
z_data = z_data.drop('response', axis = 1)

#=== now create the different groups and compute the groupwise mean ============================
grouped = z_data.groupby(['proband', 'task'])
groupedAgg = grouped.aggregate(np.mean)
labels = groupedAgg.index.get_level_values(level='task')
#=== end aggregation ===========================================================================

# Saving feature names for later use
feature_list = list(groupedAgg.columns)
# Convert to numpy array
features = np.array(groupedAgg)
labels = np.array(labels)


numLabels = 2 # num of different labels (comprehension and rest)
sizeTrainSet = 11 # num of participants used for training
sizeTestSet = 3 # num participants used for testing
numParticipants = sizeTrainSet + sizeTestSet

#get the rows excluding the last indexed (i.e., row with the index 35 is the last to include)
training_features = features[0:numLabels*sizeTrainSet]
training_features = np.array(training_features)
testing_features = features[numLabels*sizeTrainSet:numLabels*numParticipants]
testing_features = np.array(testing_features)

training_target = labels[0:numLabels*sizeTrainSet]
training_target = np.array(training_target)
testing_target = labels[numLabels*sizeTrainSet:numLabels*numParticipants]
testing_target = np.array(testing_target)

  interactivity=interactivity, compiler=compiler, result=result)


In [32]:
print(training_features.shape)
print(testing_features.shape)
print(training_target.shape)
print(testing_target.shape)

(22, 390)
(6, 390)
(22,)
(6,)


In [2]:
fileNameTPot = 'Act/tpot_mnist_pipeline_CoarseAverageParticipantSplit_Act'

In [13]:
from tpot import TPOTClassifier


tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, n_jobs=20)
tpot.fit(training_features, training_target)
print(tpot.score(testing_features, testing_target))
tpot.export(fileNameTPot + '.py')

                                                                                                                       

Generation 1 - Current best internal CV score: 1.0


                                                                                                                       

Generation 2 - Current best internal CV score: 1.0


                                                                                                                       

Generation 3 - Current best internal CV score: 1.0


                                                                                                                       

Generation 4 - Current best internal CV score: 1.0


                                                                                                                       

Generation 5 - Current best internal CV score: 1.0


                                                                                                                       


Best pipeline: GaussianNB(input_matrix)
1.0


True

In [3]:
with open(fileNameTPot + '.py') as f:
    content = f.readlines()

cleanedContent = []
for line in content:
    if 'tpot_data' not in line and 'training_target, testing_target' not in line:
        cleanedContent.append(line)

fileForLearning = fileNameTPot + '_cleaned.py'
with open(fileForLearning, 'w') as filehandle:  
    for line in cleanedContent:
        filehandle.write('%s\n' % line)

In [4]:
# === 2 to 7 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i%numParticipants <= 2 or i%numParticipants >= 6:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [5]:
print(len(indecesTrain))
print(len(indecesTest))

22
6


In [16]:
%run -i $fileForLearning

In [17]:
finalAccuracy = 0
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 6 (of 6 )
Accuary:  100.0


In [18]:
# === 4 to 9 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i%numParticipants <= 4 or i%numParticipants >= 8:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [19]:
%run -i $fileForLearning

In [20]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 6 (of 6 )
Accuary:  100.0


In [21]:
# === 6 to 11 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i%numParticipants <= 6 or i%numParticipants >= 10:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [22]:
%run -i $fileForLearning

In [23]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 6 (of 6 )
Accuary:  100.0


In [24]:
# === 8 to 13 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i%numParticipants <= 8 or i%numParticipants >= 12:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [25]:
%run -i $fileForLearning

In [26]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 6 (of 6 )
Accuary:  100.0


In [27]:
# === 10 to 15 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i%numParticipants <= 9 or i%numParticipants >= 13:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [28]:
%run -i $fileForLearning

In [29]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 6 (of 6 )
Accuary:  100.0


In [30]:
with open('listOfAccuracies.txt','a+') as f:
    f.write('%s\n' % 'Coarse_Act' + str(finalAccuracy) + '\n')
