In [1]:
# Pandas is used for data manipulation
import pandas as pd
import numpy as np

# Read in data and display first 5 rows
z_data = pd.read_csv('esem_act_deact_zscore_groups.csv',sep=';',decimal='.')

z_data.drop(list(z_data.filter(regex = '_act')),axis = 1, inplace = True)
z_data.drop(list(z_data.filter(regex = 'aggr')),axis = 1, inplace = True)

#=== remove duplicates ========================================================================
#first, drop everything non-numeric, because tranpose will otherwise convert columns to object type ===
z_data = z_data.drop('scan', axis = 1)
z_data = z_data.drop('trial', axis = 1)
z_data = z_data.drop('snippet', axis = 1)
z_data = z_data.drop('response', axis = 1)

#=== end removing duplicates ===================================================================

#=== now create the different groups and compute the groupwise mean ============================
grouped = z_data.groupby(['proband', 'task'])
groupedAgg = grouped.aggregate(np.mean)
labels = groupedAgg.index.get_level_values(level='task')
#=== end aggregation ===========================================================================

# Saving feature names for later use
feature_list = list(groupedAgg.columns)
# Convert to numpy array
features = np.array(groupedAgg)
labels = np.array(labels)

#get the rows excluding the last indexed (i.e., row with the index 587 is the last to include)
training_features = features[0:36]
training_features = np.array(training_features)
testing_features = features[36:48]
testing_features = np.array(testing_features)

training_target = labels[0:36]
training_target = np.array(training_target)
testing_target = labels[36:48]
testing_target = np.array(testing_target)

In [2]:
fileNameTPot = 'Deact/tpot_mnist_pipeline_CoarseAverageParticipantSplit_Deact'

In [3]:
from tpot import TPOTClassifier


tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,n_jobs = 20)
tpot.fit(training_features, training_target)
print(tpot.score(testing_features, testing_target))
tpot.export(fileNameTPot + '.py')

Optimization Progress:  33%|███▎      | 10/30 [00:01<00:13,  1.46pipeline/s]

Generation 1 - Current best internal CV score: 0.9111111111111111


Optimization Progress:  50%|█████     | 15/30 [00:02<00:07,  1.88pipeline/s]

Generation 2 - Current best internal CV score: 0.9111111111111111


Optimization Progress:  67%|██████▋   | 20/30 [00:03<00:03,  3.28pipeline/s]

Generation 3 - Current best internal CV score: 0.9111111111111111


Optimization Progress:  83%|████████▎ | 25/30 [00:04<00:01,  2.79pipeline/s]

Generation 4 - Current best internal CV score: 0.9333333333333332


                                                                            

Generation 5 - Current best internal CV score: 0.9333333333333332

Best pipeline: LogisticRegression(MinMaxScaler(input_matrix), C=1.0, dual=False, penalty=l2)
1.0




True

In [4]:
with open(fileNameTPot + '.py') as f:
    content = f.readlines()
# you may also want to remove whitespace characters like `\n` at the end of each line
# content = [x.strip() for x in content] 

#      or 'exported_pipeline = ' not in line
cleanedContent = []
for line in content:
    if 'tpot_data' not in line and 'training_target, testing_target' not in line:
        cleanedContent.append(line)

fileForLearning = fileNameTPot + '_cleaned.py'
with open(fileForLearning, 'w') as filehandle:  
    for line in cleanedContent:
        filehandle.write('%s\n' % line)

In [5]:
%run -i $fileForLearning

In [6]:
finalAccuracy = 0
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [7]:

# === 2 to 7 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):

    if i <= 2 or i >= 15:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [8]:
%run -i $fileForLearning

In [9]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [10]:
# === 4 to 9 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 8 or i >= 21:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)

In [11]:
%run -i $fileForLearning

In [12]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 12 (of 12 )
Accuary:  100.0


In [13]:
# === 6 to 11 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 14 or i >= 27:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [14]:
%run -i $fileForLearning

In [15]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 10 (of 12 )
Accuary:  83.33333333333334


In [16]:
# === 8 to 13 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 20 or i >= 33:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [17]:
%run -i $fileForLearning

In [18]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 9 (of 12 )
Accuary:  75.0


In [19]:
# === 10 to 15 =============================================
indecesTrain = []
indecesTest = []
for i in range(len(labels)):
    if i <= 26 or i >= 39:
        indecesTrain.append(i)
    else:
        indecesTest.append(i)

training_features = np.take(features,indecesTrain,axis=0)
testing_features = np.take(features,indecesTest,axis=0)

training_target = np.take(labels,indecesTrain,axis=0)
testing_target = np.take(labels,indecesTest,axis=0)


In [20]:
%run -i $fileForLearning

In [21]:
num_matches = 0;
for a, b in zip(testing_target, results):
    if a == b:
        num_matches = num_matches + 1
print('Number of matches:',num_matches,'(of',testing_target.size,')')

accuracy = num_matches/testing_target.size*100
print('Accuary: ',accuracy)

if accuracy > finalAccuracy:
    finalAccuracy = accuracy

Number of matches: 11 (of 12 )
Accuary:  91.66666666666666


In [22]:
with open('listOfAccuracies.txt','a+') as f:
    f.write('%s\n' % 'Coarse_Deact' + str(finalAccuracy) + '\n')
