# Python Review Project


In [247]:
import os

print("Current working directory: {0}".format(os.getcwd()))

os.chdir('../analysis_data')

print("New working directory: {0}".format(os.getcwd()))

Current working directory: /mnt/c/Users/Anton/Documents/QBIO490/qbio_490_antonwagner/analysis_data
New working directory: /mnt/c/Users/Anton/Documents/QBIO490/qbio_490_antonwagner/analysis_data


In [248]:
# Set up CPTAC

#import cptac
import cptac

# View datadets
cptac.list_datasets()

# Download the clear cell renal cell carcinoma (kidney) data set
cptac.download(dataset="Ccrcc")
ccrcc = cptac.Ccrcc()

ccrcc.list_data()


Below are the dataframes contained in this dataset and their dimensions:

clinical
	194 rows
	171 columns
CNV
	110 rows
	19285 columns
followup
	352 rows
	27 columns
medical_history
	370 rows
	4 columns
methylation
	107 rows
	15885 columns
phosphoproteomics
	194 rows
	81550 columns
phosphoproteomics_gene
	194 rows
	6127 columns
proteomics
	194 rows
	11710 columns
somatic_mutation
	8350 rows
	3 columns
transcriptomics
	185 rows
	19275 columns


In [249]:
import numpy as np # support for arrays and vectorized operations
import pandas as pd # support for DataFrames and Series
import matplotlib.pyplot as plt # main graphing library
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier # default number of neighbors looked at is 5
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB

import warnings
warnings.filterwarnings('ignore')


In [250]:
# get protein data

protein_data = ccrcc.get_proteomics() # get the proteomics data

protein_data.columns = protein_data.columns.get_level_values(0) 

protein_data # view the data
print(protein_data.shape)

(194, 11710)


In [251]:
# get RNA data

rna_data = ccrcc.get_transcriptomics()
print(rna_data.shape)
rna_data


(185, 19275)


Name,A1BG,A1CF,A2M,A2ML1,A3GALT2,A4GALT,A4GNT,AAAS,AACS,AADAC,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00004,0.995336,16.677828,353.263362,0.046634,0.031027,17.196885,0.896109,15.831130,2.938550,0.019029,...,2.622184,5.415676,5.293887,3.731920,12.541358,2.677696,15.595194,11.401808,9.976986,23.334614
C3L-00010,0.679400,16.682712,359.078446,0.077350,0.068617,13.560508,1.743989,16.690257,3.154143,0.000000,...,2.873604,9.209695,3.669353,2.560578,13.570779,4.097483,15.449647,11.550727,9.432121,25.724814
C3L-00011,0.354549,0.245606,222.075350,0.060736,0.273536,1.321499,0.172369,18.757568,6.942752,0.000000,...,7.998655,28.780560,2.801800,2.503315,10.209840,0.178842,11.670596,11.342045,6.763858,32.090615
C3L-00026,2.543775,16.347532,228.282343,0.085684,0.152020,7.868391,1.448911,17.648610,6.175010,0.031078,...,2.754936,12.639323,5.262024,2.796869,10.718552,0.800663,15.887414,11.788588,8.169953,24.752283
C3L-00079,4.355205,4.858958,275.090167,0.106359,0.000000,6.863003,2.338081,15.480282,4.584445,0.000000,...,7.497914,14.400917,2.907591,2.417113,10.127549,3.442177,12.807428,17.494840,9.733803,24.528238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-01646.N,1.287655,11.076241,228.037113,0.106505,0.066692,8.195971,0.982361,18.086400,10.225658,0.040903,...,2.338694,3.274051,3.500041,3.223773,15.663483,3.219438,17.530849,6.471017,11.860328,25.489666
C3N-01648.N,1.435986,9.772280,291.393930,0.172457,0.214180,8.955806,1.838108,14.597847,6.598100,0.056297,...,1.983803,1.486905,3.865995,3.197000,15.581449,5.774722,17.131855,10.133624,15.256983,22.669748
C3N-01649.N,1.082318,8.378616,249.779349,0.100673,0.160751,7.419323,2.290458,17.666594,11.108717,0.000000,...,2.007450,2.590684,3.805016,2.943559,17.274857,5.856484,16.392044,6.775269,12.881048,25.672959
C3N-01651.N,0.770924,15.539566,273.743429,0.208978,0.185383,12.206999,2.377283,13.946563,6.920178,0.022739,...,1.861395,2.904921,3.089317,2.529058,14.839389,5.801733,18.372506,9.149521,13.918938,24.578885


In [252]:
# get clinical data

clinical_data = ccrcc.get_clinical()
print(clinical_data.shape)
clinical_data


(194, 171)


Name,Sample_Tumor_Normal,tumor/normal,gender,age,height_in_cm,height_in_inch,weight_in_kg,weight_in_lb,BMI,race,...,histologic_type_of_normal_tissue,slide_is_free_of_tumor,consistent_with_local_pathology_report,findings_not_consistent_with_local_pathology_report,weight_in_mg,minutes_clamp_1_to_collection,minutes_clamp_2_to_collection,minutes_collection_to_frozen,consistent_with_diagnostic_report,patient_medications
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00004,Tumor,TN,Male,72,170.0,67.0,66.0,145.0,22.80,White,...,,,Yes,,484.0,,,21,Yes,"rivaroxaban,tylenol,Aspirin,glycolax,colace,zocor"
C3L-00010,Tumor,TN,Male,30,177.0,70.0,107.0,236.0,34.15,White,...,,,Yes,,575.0,,,17,Yes,"Rivaroxaban,Esomeprazole ,Tramadol"
C3L-00011,Tumor,TN,Female,63,180.0,71.0,89.0,196.0,27.47,White,...,,,Yes,,272.0,,,18,Yes,"Multi Vitamin,Levothyroxine Sodium,Ibandronate..."
C3L-00026,Tumor,TN,Female,65,163.0,64.0,75.0,165.0,28.23,White,...,,,Yes,,212.0,13.0,,13,Yes,"Levothyroxine Sodium,Cyproheptadine HCL,Citrac..."
C3L-00079,Tumor,TN,Male,49,175.0,69.0,116.0,256.0,37.88,White,...,,,Yes,,675.0,,,28,Yes,"ibuprofen,Norco,miralax"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-01646.N,Normal,,,,,,,,,,...,"renal cortex, medulla",Yes,Yes,,256.0,,,20,,
C3N-01648.N,Normal,,,,,,,,,,...,renal cortex,Yes,Yes,,288.0,,,8,,
C3N-01649.N,Normal,,,,,,,,,,...,"renal medulla, pelvis",Yes,Yes,,301.0,,,29,,
C3N-01651.N,Normal,,,,,,,,,,...,"renal cortex, medulla",Yes,Yes,,280.0,,,22,,


In [253]:
# Find the top 5 DE RNA

# mask out patient IDs who do not have rna_data
clinical_data_with_rna = clinical_data.loc[rna_data.index, :]

# create masks for the two clinical feature groups
stageI_mask_with_rna = np.where(clinical_data_with_rna.loc[:, "tumor_stage_pathological"] == "Stage I", True, False)
stageIII_mask_with_rna = np.where(clinical_data_with_rna.loc[:, "tumor_stage_pathological"] == "Stage III", True, False)

# create new clinical dataframe with mask
stageI_clinical_with_rna = pd.DataFrame(clinical_data_with_rna[stageI_mask_with_rna])
stageIII_clinical_with_rna = pd.DataFrame(clinical_data_with_rna[stageIII_mask_with_rna])

# create new rna dataframe with mask
stageI_rna = pd.DataFrame(rna_data[stageI_mask_with_rna])
stageIII_rna = pd.DataFrame(rna_data[stageIII_mask_with_rna])

# apply log2 to rna_data 
stageI_rna = np.log2(stageI_rna)
stageIII_rna = np.log2(stageIII_rna)

# drop NaN values and inf values
stageI_rna = stageI_rna.replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna(axis=1)
stageIII_rna = stageIII_rna.replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna(axis=1)

# Create lists to store DE results
top5_rna_list = ["", "", "", "", ""]
rna_top5_DE = [0, 0, 0, 0, 0]

# Find the top 5 DE RNA
# loop through all RNA in rna_data
for col in rna_data.columns:
    # check if the RNA is in both comparison groups
    if col in stageI_rna and col in stageIII_rna:
        
        # count expression
        sum = 0
        counter = 0
        for row in stageI_rna.index:
            sum += stageI_rna.loc[row, col]
            counter += 1
        stageI_mean_expression = sum / counter # calculate average
        
        # count expression
        sum = 0
        counter = 0
        for row in stageIII_rna.index:
            sum += stageIII_rna.loc[row, col]
            counter += 1
        stageIII_mean_expression = sum / counter #calculate average
       
        # find difference between average expression
        differential_expression = abs(stageI_mean_expression - stageIII_mean_expression)
       
        # check if the RNA's differential expression is in the top 5 and adjust the lists if needed
        if differential_expression > rna_top5_DE[4]:
            if differential_expression > rna_top5_DE[3]:
                if differential_expression > rna_top5_DE[2]:
                    if differential_expression > rna_top5_DE[1]:
                        if differential_expression > rna_top5_DE[0]:
                            rna_top5_DE[4] = rna_top5_DE[3]
                            top5_rna_list[4] = top5_rna_list[3]
                            rna_top5_DE[3] = rna_top5_DE[2]
                            top5_rna_list[3] = top5_rna_list[2]
                            rna_top5_DE[2] = rna_top5_DE[1]
                            top5_rna_list[2] = top5_rna_list[1]
                            rna_top5_DE[1] = rna_top5_DE[0]
                            top5_rna_list[1] = top5_rna_list[0]
                            rna_top5_DE[0] = differential_expression
                            top5_rna_list[0] = col
                        else:
                            rna_top5_DE[4] = rna_top5_DE[3]
                            top5_rna_list[4] = top5_rna_list[3]
                            rna_top5_DE[3] = rna_top5_DE[2]
                            top5_rna_list[3] = top5_rna_list[2]
                            rna_top5_DE[2] = rna_top5_DE[1]
                            top5_rna_list[2] = top5_rna_list[1]
                            rna_top5_DE[1] = differential_expression
                            top5_rna_list[1] = col
                    else:
                        rna_top5_DE[4] = rna_top5_DE[3]
                        top5_rna_list[4] = top5_rna_list[3]
                        rna_top5_DE[3] = rna_top5_DE[2]
                        top5_rna_list[3] = top5_rna_list[2]
                        rna_top5_DE[2] = differential_expression
                        top5_rna_list[2] = col
                else:
                    rna_top5_DE[4] = rna_top5_DE[3]
                    top5_rna_list[4] = top5_rna_list[3]
                    rna_top5_DE[3] = differential_expression
                    top5_rna_list[3] = col
            else:
                rna_top5_DE[4] = differential_expression
                top5_rna_list[4] = col

# display results
print('Top 5 DE RNAs: ', top5_rna_list)
print('DE values: ', rna_top5_DE)

Top 5 DE RNAs:  ['AJAP1', 'DPEP1', 'GALNT5', 'IL20RB', 'RYR2']
DE values:  [1.7385942919844544, 1.5409691011644857, 1.5137845013254805, 1.4021781083904463, 1.395319936401435]


In [254]:
# Find top 5 DE proteins

# create masks for the two clinical feature groups
stageI_mask = np.where(clinical_data.loc[:, "tumor_stage_pathological"] == "Stage I", True, False)
stageIII_mask = np.where(clinical_data.loc[:, "tumor_stage_pathological"] == "Stage III", True, False)

# create new clinical dataframe with mask
stageI_clinical = pd.DataFrame(clinical_data[stageI_mask])
stageIII_clinical = pd.DataFrame(clinical_data[stageIII_mask])

# create new protein dataframe with mask
stageI_protein = pd.DataFrame(protein_data[stageI_mask])
stageIII_protein = pd.DataFrame(protein_data[stageIII_mask])

# mask out infinity values
stageI_negative_inf_mask = np.where(stageI_protein.loc[:,:] == float('-inf'), False, True)
stageIII_negative_inf_mask = np.where(stageIII_protein.loc[:,:] == float('-inf'), False, True)
stageI_protein = pd.DataFrame(stageI_protein[stageI_negative_inf_mask])
stageIII_protein = pd.DataFrame(stageIII_protein[stageIII_negative_inf_mask])
stageI_positive_inf_mask = np.where(stageI_protein.loc[:,:] == float('inf'), False, True)
stageIII_positive_inf_mask = np.where(stageIII_protein.loc[:,:] == float('inf'), False, True)
stageI_protein = pd.DataFrame(stageI_protein[stageI_positive_inf_mask])
stageIII_protein = pd.DataFrame(stageIII_protein[stageIII_positive_inf_mask])

# filter out NaN values
stageI_protein = stageI_protein.dropna(axis="columns")
stageIII_protein = stageIII_protein.dropna(axis="columns")

# Create lists to store DE results
top5_protein_list = ["", "", "", "", ""]
protein_top5_DE = [0, 0, 0, 0, 0]

# Find the top 5 DE protein
# loop through all proteins in protein_data
for col in protein_data.columns:
    # check if the protein is in both comparison groups
    if bool(col in stageI_protein) & bool(col in stageIII_protein):
        
        # count expression
        sum = 0
        counter = 0
        for row in stageI_protein.index:
            sum += stageI_protein.loc[row, col]
            counter += 1
        stageI_mean_expression = sum / counter # calculate average
        
        # count expression
        sum = 0
        counter = 0
        for row in stageIII_protein.index:
            sum += stageIII_protein.loc[row, col]
            counter += 1
        stageIII_mean_expression = sum / counter # calculate average

        # find difference between average expression
        differential_expression = abs(stageI_mean_expression - stageIII_mean_expression)
 
        # check if the protein's differential expression is in the top 5 and adjust the lists if needed
        try:
            if differential_expression > protein_top5_DE[4]:
                if differential_expression > protein_top5_DE[3]:
                    if differential_expression > protein_top5_DE[2]:
                        if differential_expression > protein_top5_DE[1]:
                            if differential_expression > protein_top5_DE[0]:
                                protein_top5_DE[4] = protein_top5_DE[3]
                                top5_protein_list[4] = top5_protein_list[3]
                                protein_top5_DE[3] = protein_top5_DE[2]
                                top5_protein_list[3] = top5_protein_list[2]
                                protein_top5_DE[2] = protein_top5_DE[1]
                                top5_protein_list[2] = top5_protein_list[1]
                                protein_top5_DE[1] = protein_top5_DE[0]
                                top5_protein_list[1] = top5_protein_list[0]
                                protein_top5_DE[0] = differential_expression
                                top5_protein_list[0] = col
                            else:
                                protein_top5_DE[4] = protein_top5_DE[3]
                                top5_protein_list[4] = top5_protein_list[3]
                                protein_top5_DE[3] = protein_top5_DE[2]
                                top5_protein_list[3] = top5_protein_list[2]
                                protein_top5_DE[2] = protein_top5_DE[1]
                                top5_protein_list[2] = top5_protein_list[1]
                                protein_top5_DE[1] = differential_expression
                                top5_protein_list[1] = col
                        else:
                            protein_top5_DE[4] = protein_top5_DE[3]
                            top5_protein_list[4] = top5_protein_list[3]
                            protein_top5_DE[3] = protein_top5_DE[2]
                            top5_protein_list[3] = top5_protein_list[2]
                            protein_top5_DE[2] = differential_expression
                            top5_protein_list[2] = col
                    else:
                        protein_top5_DE[4] = protein_top5_DE[3]
                        top5_protein_list[4] = top5_protein_list[3]
                        protein_top5_DE[3] = differential_expression
                        top5_protein_list[3] = col
                else:
                    protein_top5_DE[4] = differential_expression
                    top5_protein_list[4] = col
        except:
            print('', end='')

# display results
print('Top 5 DE proteins: ', top5_protein_list)
print('DE values: ', protein_top5_DE)

Top 5 DE proteins:  ['FTL', 'HBZ', 'HBA2', 'CMA1', 'HBB']
DE values:  [0.8621652797318512, 0.6034531374509484, 0.5891002256474992, 0.5834638847268123, 0.5573026444527898]


In [255]:
# Top 5 RNA: 'AJAP1', 'DPEP1', 'GALNT5', 'IL20RB', 'RYR2'
# Top 5 Protein: 'FTL', 'HBZ', 'HBA2', 'CMA1', 'HBB'

# apply log2 to rna_data 
rna_data_copy = pd.DataFrame(np.log2(rna_data))

# drop NaN values and inf values
rna_data_copy = rna_data_copy.replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna(axis=1)

rna_x = pd.DataFrame(rna_data_copy.loc[:,top5_rna_list])


protein_data_copy = pd.DataFrame(protein_data)

# drop NaN values and inf values
protein_data_copy = protein_data_copy.replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna(axis=1)

protein_x = pd.DataFrame(protein_data_copy.loc[:,top5_protein_list])

# create empty DataFrames for x_data and y_data for ML
x_data = pd.DataFrame()
y_data = pd.DataFrame()

# fill x_data and y_data columns
for rna in top5_rna_list:
    x_data[rna] = []
for protein in top5_protein_list:
    x_data[protein] = []
x_data['tumor_stage_pathological'] = []
y_data['tumor_stage_pathological'] = []


# add rows to fill in expression data for x_data and y_data

# loop through all patient IDs
for row in clinical_data.index:
    # check if pateint ID in both protein_data and rna_data
    if bool(row in protein_x.index) & bool(row in rna_x.index):
        
        # add row to fill in expression data for x_data
        x_row_data = {
                    top5_rna_list[0]:[rna_x.loc[row, top5_rna_list[0]]], 
                    top5_rna_list[1]:[rna_x.loc[row, top5_rna_list[1]]],
                    top5_rna_list[2]:[rna_x.loc[row, top5_rna_list[2]]],
                    top5_rna_list[3]:[rna_x.loc[row, top5_rna_list[3]]],
                    top5_rna_list[4]:[rna_x.loc[row, top5_rna_list[4]]],
                    top5_protein_list[0]:[protein_x.loc[row, top5_protein_list[0]]], 
                    top5_protein_list[1]:[protein_x.loc[row, top5_protein_list[1]]], 
                    top5_protein_list[2]:[protein_x.loc[row, top5_protein_list[2]]], 
                    top5_protein_list[3]:[protein_x.loc[row, top5_protein_list[3]]], 
                    top5_protein_list[4]:[protein_x.loc[row, top5_protein_list[4]]],
                    'tumor_stage_pathological':[clinical_data.loc[row, 'tumor_stage_pathological']]
                    }
        
        # add gene name to row
        x_new_row = pd.DataFrame(x_row_data, index=[row])
        x_data = pd.concat([x_data, x_new_row])
        
        # add row to fill in clinicial stage data for x_data
        y_row_data = {
                    'tumor_stage_pathological':[clinical_data.loc[row, 'tumor_stage_pathological']]
                    }
        # add gene name to rows
        y_new_row = pd.DataFrame(y_row_data, index=[row])
        y_data = pd.concat([y_data, y_new_row])


# remove non-target data, inf/-inf, and NaN values
x_data = x_data.replace('Stage II', np.nan).replace('Stage IV', np.nan).replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna(axis=0)
y_data = y_data.replace('Stage II', np.nan).replace('Stage IV', np.nan).replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna(axis=0)

# remove target from train data
x_data = x_data.drop(columns='tumor_stage_pathological')

print(x_data)
print(y_data)       

              AJAP1     DPEP1    GALNT5    IL20RB      RYR2       FTL  \
C3L-00004 -2.519207 -1.239811 -5.281123  2.685910 -0.721420 -1.724339   
C3L-00010 -0.069282  4.515644 -1.685391  0.394540 -2.134346 -0.363228   
C3L-00026  0.177869  4.635430  2.729468  1.139609 -1.565029  1.301930   
C3L-00079 -1.161677  0.376214 -3.413559  0.441469 -0.421905 -1.496648   
C3L-00088 -1.951992  0.384952 -0.749469  0.716994 -1.197209 -1.895355   
...             ...       ...       ...       ...       ...       ...   
C3N-01361  1.600187  1.924839  0.110036  0.307398 -1.436694 -0.461986   
C3N-01522 -1.398691  2.166432 -6.167662  0.298801  2.159864 -1.675052   
C3N-01646 -1.251963 -0.946572 -0.635367  7.399686 -0.645290 -1.817120   
C3N-01649 -0.097150  3.354027 -1.442680 -0.220637  2.057367 -1.066512   
C3N-01808 -0.590156  4.861775 -2.466955  0.540075 -1.611685  1.895297   

                HBZ      HBA2      CMA1       HBB  
C3L-00004 -0.370098 -0.786307 -0.918273 -0.762287  
C3L-00010  0.240576

In [256]:
# scale the separated data
scaler = StandardScaler()
scaled_x_data = scaler.fit_transform(x_data)

# convert y_data into a list
y_data_list = []
for row in y_data.index:
    y_data_list.append(y_data.loc[row, 'tumor_stage_pathological'])

In [257]:
# Test four classifiers

classifiers_names = [
    'KNeighborsClassifier', 
    'DecisionTreeClassifier', 
    'MLPClassifier', 
    'GaussianNB'
]

classifiers = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    MLPClassifier(),
    GaussianNB()
]

classifiers_perf = {
    0: [],
    1: [],
    2: [],
    3: []
}

# run prediction models 10 times
for n in range(10):
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(scaled_x_data, y_data_list, train_size=0.7)
    
    # run all four classifiers
    for i in range(len(classifiers)):
        model = classifiers[i]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # calculate accuracy of the model
        correct = 0
        index = 0
        for length in y_pred:
            if y_pred[index] == y_test[index]:
                correct += 1
            index += 1
        accuracy = correct / len(y_test) 
        classifiers_perf[i].append(accuracy) # record accuracy

    # display progress (intervals 0f two)
    if ((n+1) % 2) == 0:
        print(f'Completed {n+1}/10 simulations')

# calculate average accuracies and print
print('\nAfter 10 simulations, the average accuracy of each classifier is as follows:')
for i in classifiers_perf:
    print(f'\t{classifiers_names[i]} : {np.mean(classifiers_perf[i])}')



Completed 2/10 simulations
Completed 4/10 simulations
Completed 6/10 simulations
Completed 8/10 simulations
Completed 10/10 simulations

After 10 simulations, the average accuracy of each classifier is as follows:
	KNeighborsClassifier : 0.7269230769230769
	DecisionTreeClassifier : 0.6230769230769231
	MLPClassifier : 0.7269230769230769
	GaussianNB : 0.773076923076923


In [None]:
# KNeighborsClassifier, MLPClassifier, and GaussianNB all seem to have an accuracy between 0.7 and 0.8 after 10 simuations.
# DecisionTreeClassifier seems to be the lowest with an average accuracy between 0.55 and 0.7 