In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

In [2]:
clinical_attributes = pd.read_csv('step_04/clinical_attributes.csv')
z_score = pd.read_csv('step_04/z_score.csv')
mutation = pd.read_csv('step_04/mutation.csv')
response = pd.read_csv('step_04/response.csv')

In [3]:
y = response['overall_survival'].to_numpy()
datasets = [
    ("clinical", clinical_attributes.to_numpy()),
    ("z_score", z_score.to_numpy()),
    ("mutation", mutation.to_numpy())
]

# Teapot

In [6]:
from tpot import TPOTClassifier



In [10]:
seed = 42
generations = 50

In [11]:
X_clinical_train, X_clinical_test, y_train, y_test = train_test_split(clinical_attributes, response, test_size=0.2, random_state=seed)

X_mutation_train, X_mutation_test, _, _ = train_test_split(mutation, response, test_size=0.2, random_state=seed)

X_z_score_train, X_z_score_test, _, _ = train_test_split(z_score, response, test_size=0.2, random_state=seed)

In [12]:
tpot = TPOTClassifier(generations=generations, population_size=20, verbosity=2, random_state=42)
tpot.fit(X_clinical_train, np.squeeze(y_train))
print(tpot.score(X_clinical_test, np.squeeze(y_test)))
tpot.export('step_07/tpot_clinical_pipeline.py')

                                                                              
Generation 1 - Current best internal CV score: 0.6825396825396826
                                                                              
Generation 2 - Current best internal CV score: 0.6899470899470899
                                                                              
Generation 3 - Current best internal CV score: 0.6899470899470899
                                                                               
Generation 4 - Current best internal CV score: 0.692063492063492
                                                                               
Generation 5 - Current best internal CV score: 0.6962962962962963
                                                                                 
Generation 6 - Current best internal CV score: 0.6962962962962963
                                                                               
Generation 7 - Current best internal CV score:

In [13]:
tpot = TPOTClassifier(generations=generations, population_size=20, verbosity=2, random_state=42)
tpot.fit(X_mutation_train, np.squeeze(y_train))
print(tpot.score(X_mutation_test, np.squeeze(y_test)))
tpot.export('step_07/tpot_mutation_pipeline.py')

                                                                                 
Generation 1 - Current best internal CV score: 0.5523809523809523
                                                                                
Generation 2 - Current best internal CV score: 0.5523809523809523
                                                                              
Generation 3 - Current best internal CV score: 0.5523809523809524
                                                                               
Generation 4 - Current best internal CV score: 0.5523809523809524
                                                                                 
Generation 5 - Current best internal CV score: 0.5597883597883597
                                                                               
Generation 6 - Current best internal CV score: 0.5597883597883597
                                                                               
Generation 7 - Current best internal CV 

In [14]:
tpot = TPOTClassifier(generations=generations, population_size=20, verbosity=2, random_state=42)
tpot.fit(X_z_score_train, np.squeeze(y_train))
print(tpot.score(X_z_score_test, np.squeeze(y_test)))
tpot.export('step_07/tpot_z_score_pipeline.py')

                                                                                
Generation 1 - Current best internal CV score: 0.6338624338624339
                                                                                
Generation 2 - Current best internal CV score: 0.6338624338624339
                                                                                
Generation 3 - Current best internal CV score: 0.6338624338624339
                                                                                 
Generation 4 - Current best internal CV score: 0.6380952380952382
                                                                                 
Generation 5 - Current best internal CV score: 0.6380952380952382
                                                                                 
Generation 6 - Current best internal CV score: 0.6380952380952382
                                                                                 
Generation 7 - Current best inter