In [1]:
# Pandas is used for data manipulation
import pandas as pd
import numpy as np

# Read in data and display first 5 rows
z_data = pd.read_csv('data_task_groups_roi_deact.csv',sep=';',decimal='.')
# alphabetically: 0: C, 1: R, 2, S
z_data['task'] = z_data['task'].replace({'C': 0, 'R': 1, 'S': 2})
z_data.drop(list(z_data.filter(regex = '_deact')),axis = 1, inplace = True)

#=== remove duplicates ========================================================================
#first, drop everything non-numeric, because tranpose will otherwise convert columns to object type ===
z_data = z_data.drop('scan', axis = 1)
z_data = z_data.drop('trial', axis = 1)
z_data = z_data.drop('snippet', axis = 1)
z_data = z_data.drop('response', axis = 1)

#=== end removing duplicates ===================================================================

#=== now create the different groups and compute the groupwise mean ============================
grouped = z_data.groupby(['proband', 'task'])
groupedAgg = grouped.aggregate(np.mean)
labels = groupedAgg.index.get_level_values(level='task')
#=== end aggregation ===========================================================================

# Saving feature names for later use
feature_list = list(groupedAgg.columns)
# Convert to numpy array
features = np.array(groupedAgg)
labels = np.array(labels)

#get the rows excluding the last indexed (i.e., row with the index 587 is the last to include)
training_features = features[0:36]
training_features = np.array(training_features)
testing_features = features[36:48]
testing_features = np.array(testing_features)

training_target = labels[0:36]
training_target = np.array(training_target)
testing_target = labels[36:48]
testing_target = np.array(testing_target)

In [8]:
fileNameTPot = 'ROIs/tpot_mnist_pipeline_CoarseAverageParticipantSplit_Act'

In [2]:
from tpot import TPOTClassifier


tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,n_jobs = 20)
tpot.fit(training_features, training_target)
print(tpot.score(testing_features, testing_target))
tpot.export(fileNameTPot + '.py')

Optimization Progress:  33%|███▎      | 40/120 [00:11<02:28,  1.86s/pipeline]

Generation 1 - Current best internal CV score: 0.788888888888889


Optimization Progress:  50%|█████     | 60/120 [00:14<01:14,  1.24s/pipeline]

Generation 2 - Current best internal CV score: 0.788888888888889


Optimization Progress:  67%|██████▋   | 80/120 [00:17<00:34,  1.17pipeline/s]

Generation 3 - Current best internal CV score: 0.8555555555555555


Optimization Progress:  83%|████████▎ | 100/120 [00:26<00:24,  1.22s/pipeline]

Generation 4 - Current best internal CV score: 0.8555555555555555


                                                                              

Generation 5 - Current best internal CV score: 0.8555555555555555

Best pipeline: LogisticRegression(input_matrix, C=5.0, dual=False, penalty=l1)
0.9166666666666666




True

In [9]:
with open(fileNameTPot + '.py') as f:
    content = f.readlines()
# you may also want to remove whitespace characters like `\n` at the end of each line
# content = [x.strip() for x in content] 

#      or 'exported_pipeline = ' not in line
cleanedContent = []
for line in content:
    if 'tpot_data' not in line and 'training_target, testing_target' not in line:
        cleanedContent.append(line)

fileForLearning = fileNameTPot + '_cleaned.py'
with open(fileForLearning, 'w') as filehandle:  
    for line in cleanedContent:
        filehandle.write('%s\n' % line)

In [10]:
%run -i $fileForLearning

  return f(*args, **kwds)


In [11]:
finalAccuracy = 0
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 8 (of 12 )
Accuary:  66.66666666666666


In [12]:

# === 2 to 7 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):

    if i <= 2 or i >= 15:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [13]:
print(indecesTrain)
print(indecesTest)

[0, 1, 2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]
[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]


In [14]:
%run -i $fileForLearning

In [15]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 8 (of 12 )
Accuary:  66.66666666666666


In [16]:
# === 4 to 9 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 8 or i >= 21:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [17]:
%run -i $fileForLearning

In [18]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 8 (of 12 )
Accuary:  66.66666666666666


In [19]:
# === 6 to 11 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 14 or i >= 27:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [20]:
%run -i $fileForLearning

In [21]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 10 (of 12 )
Accuary:  83.33333333333334


In [22]:
# === 8 to 13 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 20 or i >= 33:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [23]:
%run -i $fileForLearning

In [24]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 9 (of 12 )
Accuary:  75.0


In [25]:
# === 10 to 15 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 26 or i >= 39:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [26]:
%run -i $fileForLearning

In [27]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 10 (of 12 )
Accuary:  83.33333333333334


In [25]:
with open('listOfAccuracies.txt','a+') as f:
    f.write('%s\n' % 'Coarse_Act' + str(finalAccuracy) + '\n')
