In [20]:
import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [9]:
# Redefine path when not using Nansen
df = pd.read_csv('/Users/david.hedges/projects/shodair/gsa_uncommented.vcf', sep='\t')
df_labels = pd.read_excel('/Users/david.hedges/projects/shodair/regDocs/Copy of PGX risperidone only Erica7_23_2019.xlsx')

In [10]:
# Encode data as 0,1,2 and clear out non-genetic data
df = df.fillna(0)
df = df.replace('0/0',0)
df = df.replace('0/1',1)
df = df.replace('1/1',2)
df = df.replace('./.',0)
df = df.drop(columns=['#CHROM','POS','REF','ALT','QUAL','FILTER','INFO','FORMAT'])

In [11]:
# Rename Headers in VCF to only have Shodair ID
header = list(df)
vals = []
for i in header:
    try:
        x = re.search(r'SCH-(\d+)', i).group(0)
        vals.append(x)
    except:
        vals.append(i)

column_dict = dict(zip(header,vals))
df = df.rename(index=str, columns=column_dict)


# Reset Index to be Shodair ID and transpose dataframe
df = df.set_index('ID')
df_T = df.T
df_T.shape


# Rename headers in labels to be identical to headers in VCF
ID = ['SCH-'+str(i) for i in df_labels['Random ID']]
df_labels['ID'] = ID

# Clean out unnecessary headers
df_labels = df_labels.drop(columns=['Random ID','Plate','Sample Order','Plate ID Location','Gender','Concentration',
                         '260/2030 via nanodrop','Enzyme','Gene1','Gene2','Phenotype','Race','COMMENT',
                         'Allele Activity Score 1','Allele Activity Score 2','Diplotype Activity Score',
                         'Metabollizer Status'])
df_labels = df_labels.set_index('ID')
df_labels

# Combine dataframes to get drug info in dataframe 
df_merged = df_T.join(df_labels)
df_merged

Unnamed: 0,GSA-rs114420996,rs10458597,ilmnseq_rs9701296,rs9701055,GSA-rs9283150,GSA-rs9326622,ilmnseq_rs9651229_F2BT,ilmnseq_rs9701872,ilmnseq_rs9701872_ilmndup1,ilmnseq_rs11497407,...,chrM_16465,chrM_16465_ilmndup1,200610-37,rs3937033,ilmnseq_rs386829315,ilmnseq_rs386829316,Aripiprazole,Olanzapine,Quetiapine,Risperidone
SCH-845,0,0,0,0,0,0,0,0,0,0,...,0,0,0,2,0,0,1,1,1,1
SCH-31,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
SCH-463,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,1
SCH-253,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
SCH-522,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
SCH-794,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,1
SCH-187,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,1
SCH-615,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
SCH-911,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
SCH-807,0,0,0,0,0,0,0,0,0,0,...,0,0,2,2,0,0,0,0,1,1


In [13]:
# Select features to learn from (genotyping only)
drugs = ['Aripiprazole','Olanzapine','Quetiapine','Risperidone']
X = df_merged.drop(columns=drugs)

# Iterate through drugs as outcome features
for i in drugs:
    Y = df_merged[i]
    X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
    rf = RandomForestClassifier(n_estimators=100)
    rf.fit(X_train,y_train)
    y_pred = rf.predict(X_test)
    score = accuracy_score(y_test,y_pred)
    print('Outcome: '+str(i))
    print('Classifier: '+'RandomForest')
    print('Accuracy Score: '+str(score))
    print('----------------------------------')

Outcome: Aripiprazole
Classifier: RandomForest
Accuracy Score: 0.4117647058823529
----------------------------------
Outcome: Olanzapine
Classifier: RandomForest
Accuracy Score: 0.7647058823529411
----------------------------------
Outcome: Quetiapine
Classifier: RandomForest
Accuracy Score: 0.5588235294117647
----------------------------------
Outcome: Risperidone
Classifier: RandomForest
Accuracy Score: 1.0
----------------------------------


In [15]:
# Basic Optimization using RandomSearchCV

n_estimators = [int(i) for i in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(i) for i in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False]}


In [16]:
for i in drugs:
    Y = df_merged[i]
    X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
    rf = RandomForestClassifier()
    rf_rand = RandomizedSearchCV(estimator=rf,
                                 param_distributions=random_grid,
                                 n_iter=100,
                                 cv=3,
                                 verbose=10,
                                 random_state=42,
                                 n_jobs=4)
    rf_rand.fit(X_train,y_train)
    print('Outcome: '+str(i))
    print('Classifier: '+'RandomForest')
    print(rf_rand.best_params_)
    print('----------------------------------')

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:   22.9s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   35.8s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   48.4s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   59.7s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:  2.9min
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  3.4min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  3.8min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  4.2min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  4.5min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  5.1min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  5.9min
[Parallel(

Outcome: Aripiprazole
Classifier: RandomForest
{'n_estimators': 800, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 30, 'bootstrap': False}
----------------------------------


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:   14.6s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   27.7s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   41.0s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   53.1s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:  2.3min
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  3.7min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  4.3min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  4.9min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  5.5min
[Parallel(

Fitting 3 folds for each of 100 candidates, totalling 300 fits
Outcome: Olanzapine
Classifier: RandomForest
{'n_estimators': 400, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 30, 'bootstrap': True}
----------------------------------
Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:   14.8s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   28.0s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   41.5s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   52.2s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:  2.3min
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  3.3min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  3.8min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  4.2min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  4.6min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  5.2min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  5.9min
[Parallel(

Outcome: Quetiapine
Classifier: RandomForest
{'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'auto', 'max_depth': 10, 'bootstrap': True}
----------------------------------
Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    9.3s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   14.9s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   22.4s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   29.0s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   38.1s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   49.2s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   59.9s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  3.2min
[Parallel(

Outcome: Risperidone
Classifier: RandomForest
{'n_estimators': 400, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 30, 'bootstrap': True}
----------------------------------


In [17]:
print('Randomly Optimized Models')
print('*************************')
print('')
Y = df_merged['Aripiprazole']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
rf = RandomForestClassifier(n_estimators=800, min_samples_split=5, min_samples_leaf=4, max_features='sqrt', 
                            max_depth=30, bootstrap=False)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
score = accuracy_score(y_test,y_pred)
print('Outcome: Aripiprazole')
print('Accuracy Score: '+str(score))
print('----------------------------------')

Y = df_merged['Olanzapine']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
rf = RandomForestClassifier(n_estimators=400, min_samples_split=5, min_samples_leaf=1, max_features='sqrt',
                            max_depth=30, bootstrap=True)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
score = accuracy_score(y_test,y_pred)
print('Outcome: Olanzapine')
print('Accuracy Score: '+str(score))
print('----------------------------------')

Y = df_merged['Quetiapine']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
rf = RandomForestClassifier(n_estimators=200, min_samples_split=5, min_samples_leaf=4, max_features='auto', 
                            max_depth=10, bootstrap=True)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
score = accuracy_score(y_test,y_pred)
print('Outcome: Quetiapine')
print('Accuracy Score: '+str(score))
print('----------------------------------')

Randomly Optimized Models
*************************

Outcome: Aripiprazole
Accuracy Score: 0.5
----------------------------------
Outcome: Olanzapine
Accuracy Score: 0.7647058823529411
----------------------------------
Outcome: Quetiapine
Accuracy Score: 0.5
----------------------------------


In [23]:
# Basic Optimization using GridSearchCV

param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]}

for i in drugs:
    Y = df_merged[i]
    X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
    rf = RandomForestClassifier()
    rf_grid = GridSearchCV(estimator=rf,
                                 param_grid=param_grid,
                                 cv=3,
                                 verbose=10,
                                 n_jobs=4)
    rf_grid.fit(X_train,y_train)
    print('Outcome: '+str(i))
    print('Classifier: '+'RandomForest')
    print(rf_grid.best_params_)
    print('----------------------------------')

Fitting 3 folds for each of 288 candidates, totalling 864 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    6.4s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   10.7s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   14.8s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   20.1s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   26.1s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   33.4s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   41.5s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   50.0s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:   59.6s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  2.2min
[Parallel(

Outcome: Aripiprazole
Classifier: RandomForest
{'bootstrap': True, 'max_depth': 80, 'max_features': 3, 'min_samples_leaf': 4, 'min_samples_split': 8, 'n_estimators': 100}
----------------------------------
Fitting 3 folds for each of 288 candidates, totalling 864 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    5.5s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    9.7s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   14.1s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   19.9s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   25.8s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   32.3s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   40.5s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   48.7s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:   58.0s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  2.1min
[Parallel(

Outcome: Olanzapine
Classifier: RandomForest
{'bootstrap': True, 'max_depth': 80, 'max_features': 2, 'min_samples_leaf': 3, 'min_samples_split': 8, 'n_estimators': 100}
----------------------------------
Fitting 3 folds for each of 288 candidates, totalling 864 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   10.1s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   14.7s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   20.3s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   26.4s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   33.4s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   41.9s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   50.5s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:   60.0s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  2.2min
[Parallel(

Outcome: Quetiapine
Classifier: RandomForest
{'bootstrap': True, 'max_depth': 90, 'max_features': 3, 'min_samples_leaf': 5, 'min_samples_split': 8, 'n_estimators': 100}
----------------------------------
Fitting 3 folds for each of 288 candidates, totalling 864 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   10.2s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   14.6s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   20.4s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   26.8s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   33.8s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   42.3s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   50.6s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  2.2min
[Parallel(

Outcome: Risperidone
Classifier: RandomForest
{'bootstrap': True, 'max_depth': 80, 'max_features': 2, 'min_samples_leaf': 3, 'min_samples_split': 8, 'n_estimators': 100}
----------------------------------


In [24]:
print('Grid Search Optimized Models')
print('*************************')
print('')
Y = df_merged['Aripiprazole']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
rf = RandomForestClassifier(bootstrap=True, max_depth=80, max_features=3, min_samples_leaf=4,
                            min_samples_split=8, n_estimators=100)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
score = accuracy_score(y_test,y_pred)
print('Outcome: Aripiprazole')
print('Accuracy Score: '+str(score))
print('----------------------------------')

Y = df_merged['Olanzapine']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
rf = RandomForestClassifier(bootstrap=True, max_depth=80, max_features=2, min_samples_leaf=3,
                            min_samples_split=8, n_estimators=100)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
score = accuracy_score(y_test,y_pred)
print('Outcome: Olanzapine')
print('Accuracy Score: '+str(score))
print('----------------------------------')

Y = df_merged['Quetiapine']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
rf = RandomForestClassifier(bootstrap=True, max_depth=90, max_features=3, min_samples_leaf=5,
                            min_samples_split=8, n_estimators=100)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
score = accuracy_score(y_test,y_pred)
print('Outcome: Quetiapine')
print('Accuracy Score: '+str(score))
print('----------------------------------')

Grid Search Optimized Models
*************************

Outcome: Aripiprazole
Accuracy Score: 0.5588235294117647
----------------------------------
Outcome: Olanzapine
Accuracy Score: 0.7647058823529411
----------------------------------
Outcome: Quetiapine
Accuracy Score: 0.6470588235294118
----------------------------------
