In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [37]:
df=pd.read_csv('../DataCleaning/train_imputed.csv')

df.columns

Index(['admission_age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp',
       'dbp', 'acuity', 'stay_length_minutes', 'pain_cleaned_advanced',
       'gender_F', 'gender_M', 'arrival_transport_AMBULANCE',
       'arrival_transport_HELICOPTER', 'arrival_transport_OTHER',
       'arrival_transport_UNKNOWN', 'arrival_transport_WALK IN',
       'race_condensed_AMERICAN INDIAN/ALASKA NATIVE', 'race_condensed_ASIAN',
       'race_condensed_BLACK', 'race_condensed_HISPANIC/LATINO',
       'race_condensed_Missing',
       'race_condensed_NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER',
       'race_condensed_OTHER', 'race_condensed_White'],
      dtype='object')

In [38]:
df_pca=np.load('../chief_complaint_data/train_pca.npy')
cc_columns= [f'cc_{i}' for i in range(100)]
df[cc_columns]=df_pca

  df[cc_columns]=df_pca


In [43]:
features=['admission_age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp',
       'dbp', 'acuity', 'pain_cleaned_advanced',
       'gender_F', 'gender_M', 'arrival_transport_AMBULANCE',
       'arrival_transport_HELICOPTER', 'arrival_transport_OTHER',
       'arrival_transport_UNKNOWN', 'arrival_transport_WALK IN',
       'race_condensed_AMERICAN INDIAN/ALASKA NATIVE', 'race_condensed_ASIAN',
       'race_condensed_BLACK', 'race_condensed_HISPANIC/LATINO',
       'race_condensed_Missing',
       'race_condensed_NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER',
       'race_condensed_OTHER', 'race_condensed_White'
       ]+cc_columns

triage_physical_features=['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp',
       'dbp']
triage_other_features=['pain_cleaned_advanced','acuity', 'arrival_transport_AMBULANCE',
       'arrival_transport_HELICOPTER', 'arrival_transport_OTHER',
       'arrival_transport_UNKNOWN', 'arrival_transport_WALK IN']
demographic_features=['admission_age',
       'gender_F', 'gender_M',
       'race_condensed_AMERICAN INDIAN/ALASKA NATIVE', 'race_condensed_ASIAN',
       'race_condensed_BLACK', 'race_condensed_HISPANIC/LATINO',
       'race_condensed_Missing',
       'race_condensed_NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER',
       'race_condensed_OTHER', 'race_condensed_White']

In [39]:
from sklearn.model_selection import KFold

kfold = KFold(n_splits = 5,
              shuffle = True,
              random_state = 111)


In [None]:
#Trying out linear regression with various predictor variable sets.

from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error

mean=np.mean(df['stay_length_minutes'])
mlr_phys = LinearRegression()
mlr_phys_other=LinearRegression()
mlr_phys_other_dem=LinearRegression()
mlr_all=LinearRegression()

#rmses will hold the cross validation root mean squared errors of each model. 
rmses = np.zeros((5, 5))

for i, (train_index, test_index) in enumerate(kfold.split(df)):
    ## get the kfold training data
    X_train_train = df[features].iloc[train_index,:]
    y_train_train = df['stay_length_minutes'].iloc[train_index]
    
    ## get the holdout data
    X_holdout = df[features].iloc[test_index,:]
    y_holdout = df['stay_length_minutes'].iloc[test_index]

    ## Fit models
    mlr_phys.fit(X_train_train[triage_physical_features], y_train_train)
    mlr_phys_other.fit(X_train_train[triage_physical_features+triage_other_features], y_train_train)
    mlr_phys_other_dem.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)
    mlr_all.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features+cc_columns], y_train_train)
    
    ## Use models to generate predictions on the holdout set
    mean_preds = mean*np.ones(len(y_holdout))
    mlr_phys_preds = mlr_phys.predict(X_holdout[triage_physical_features])
    mlr_phys_other_preds = mlr_phys_other.predict(X_holdout[triage_physical_features+triage_other_features])
    mlr_phys_other_dem_preds = mlr_phys_other_dem.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    mlr_all_preds = mlr_all.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features+cc_columns])


    ## Record the rmses
    rmses[0,i] = root_mean_squared_error(y_holdout, mean_preds)
    rmses[1,i] = root_mean_squared_error(y_holdout, mlr_phys_preds)
    rmses[2,i] = root_mean_squared_error(y_holdout, mlr_phys_other_preds)
    rmses[3,i] = root_mean_squared_error(y_holdout, mlr_phys_other_dem_preds)
    rmses[4,i] = root_mean_squared_error(y_holdout, mlr_all_preds)

rmses

array([[1.00183184, 1.00403026, 1.00182546, 0.98867385, 1.00355666],
       [0.99812281, 1.00133898, 0.99832365, 0.98559571, 1.00046329],
       [0.97253621, 0.97361566, 0.97287354, 0.95961193, 0.97224261],
       [0.96859696, 0.96977646, 0.96917978, 0.95586109, 0.96837689],
       [0.93349465, 0.93194395, 0.93080743, 0.91640876, 0.93057846]])

In [45]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.compose import ColumnTransformer

In [46]:
# Create a ColumnTransformer
polyprocessor = ColumnTransformer(
    transformers=[
        ('poly', PolynomialFeatures(degree=2), triage_physical_features+triage_other_features),
        ('passthrough', 'passthrough', demographic_features+cc_columns)
    ],
    remainder='drop')

In [None]:
#Adding in polynomial terms up to degree 3 for the non-chief-complaint terms.

mean=np.mean(df['stay_length_minutes'])
mlr_phys = LinearRegression()
mlr_phys_other=LinearRegression()
mlr_phys_other_dem=LinearRegression()
mlr_all=LinearRegression()
mlr_poly=LinearRegression()

#rmses will hold the cross validation root mean squared errors of each model. 
rmses = np.zeros((6, 5))

for i, (train_index, test_index) in enumerate(kfold.split(df)):
    ## get the kfold training data
    X_train_train = df[features].iloc[train_index,:]
    y_train_train = df['stay_length_minutes'].iloc[train_index]
    X_train_transformed=polyprocessor.fit_transform(X_train_train)
    
    ## get the holdout data
    X_holdout = df[features].iloc[test_index,:]
    y_holdout = df['stay_length_minutes'].iloc[test_index]
    X_holdout_transformed=polyprocessor.fit_transform(X_holdout)

    ## Fit models
    mlr_phys.fit(X_train_train[triage_physical_features], y_train_train)
    mlr_phys_other.fit(X_train_train[triage_physical_features+triage_other_features], y_train_train)
    mlr_phys_other_dem.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)
    mlr_all.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features+cc_columns], y_train_train)
    mlr_poly.fit(X_train_transformed, y_train_train)

    ## Use models to generate predictions on the holdout set
    mean_preds = mean*np.ones(len(y_holdout))
    mlr_phys_preds = mlr_phys.predict(X_holdout[triage_physical_features])
    mlr_phys_other_preds = mlr_phys_other.predict(X_holdout[triage_physical_features+triage_other_features])
    mlr_phys_other_dem_preds = mlr_phys_other_dem.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    mlr_all_preds = mlr_all.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features+cc_columns])
    mlr_poly_preds=mlr_poly.predict(X_holdout_transformed)

    ## Record the rmses
    rmses[0,i] = root_mean_squared_error(y_holdout, mean_preds)
    rmses[1,i] = root_mean_squared_error(y_holdout, mlr_phys_preds)
    rmses[2,i] = root_mean_squared_error(y_holdout, mlr_phys_other_preds)
    rmses[3,i] = root_mean_squared_error(y_holdout, mlr_phys_other_dem_preds)
    rmses[4,i] = root_mean_squared_error(y_holdout, mlr_all_preds)
    rmses[5,i] = root_mean_squared_error(y_holdout, mlr_poly_preds)

rmses

array([[1.00183184, 1.00403026, 1.00182546, 0.98867385, 1.00355666],
       [0.99812281, 1.00133898, 0.99832365, 0.98559571, 1.00046329],
       [0.97253621, 0.97361566, 0.97287354, 0.95961193, 0.97224261],
       [0.96859696, 0.96977646, 0.96917978, 0.95586109, 0.96837689],
       [0.93349465, 0.93194395, 0.93080743, 0.91640876, 0.93057846],
       [0.9174154 , 0.91518426, 0.91069693, 0.90204174, 0.91144705]])

In [52]:
np.mean(rmses, axis=1)

array([0.99998361, 0.99676889, 0.97017599, 0.96635824, 0.92864665,
       0.91135708])

In [53]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
#Applying decision trees with a range of max depths.

tree2 = DecisionTreeRegressor(max_depth=2, random_state=108)
tree2_nocc = DecisionTreeRegressor(max_depth=2, random_state=108)
tree5 = DecisionTreeRegressor(max_depth=5, random_state=108)
tree5_nocc = DecisionTreeRegressor(max_depth=5, random_state=108)
tree10 = DecisionTreeRegressor(max_depth=10, random_state=108)
tree10_nocc = DecisionTreeRegressor(max_depth=10, random_state=108)

rmses = np.zeros((7, 5))

for i, (train_index, test_index) in enumerate(kfold.split(df)):
    ## get the kfold training data
    X_train_train = df[features].iloc[train_index,:]
    y_train_train = df['stay_length_minutes'].iloc[train_index]
    
    ## get the holdout data
    X_holdout = df[features].iloc[test_index,:]
    y_holdout = df['stay_length_minutes'].iloc[test_index]

    ## Fit models
    tree2.fit(X_train_train, y_train_train)
    tree2_nocc.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features],y_train_train)
    tree5.fit(X_train_train, y_train_train)
    tree5_nocc.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)
    tree10.fit(X_train_train, y_train_train)
    tree10_nocc.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)

    ## Use models to generate predictions on the holdout set
    mean_preds = mean*np.ones(len(y_holdout))
    tree2_preds = tree2.predict(X_holdout)
    tree2_nocc_preds=tree2_nocc.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    tree5_preds = tree5.predict(X_holdout)
    tree5_nocc_preds=tree5_nocc.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    tree10_preds = tree10.predict(X_holdout)
    tree10_nocc_preds=tree10_nocc.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    

    ## Record the rmses
    rmses[0,i] = root_mean_squared_error(y_holdout, mean_preds)
    rmses[1,i] = root_mean_squared_error(y_holdout, tree2_preds)
    rmses[2,i]=root_mean_squared_error(y_holdout, tree2_nocc_preds)
    rmses[3,i] = root_mean_squared_error(y_holdout, tree5_preds)
    rmses[4,i] = root_mean_squared_error(y_holdout, tree5_nocc_preds)
    rmses[5,i] = root_mean_squared_error(y_holdout, tree10_preds)
    rmses[6,i] = root_mean_squared_error(y_holdout, tree10_nocc_preds)


rmses


array([[1.00183184, 1.00403026, 1.00182546, 0.98867385, 1.00355666],
       [0.94999768, 0.94783958, 0.94585772, 0.93617221, 0.94363494],
       [0.94999768, 0.94783958, 0.94585772, 0.93617221, 0.94363494],
       [0.88142499, 0.88222369, 0.88233063, 0.8753496 , 0.88258166],
       [0.86943762, 0.87109872, 0.86894691, 0.86147213, 0.86848175],
       [0.77082506, 0.76955162, 0.76735617, 0.76565038, 0.76195427],
       [0.70438008, 0.71555458, 0.7106086 , 0.7004532 , 0.70405932]])

In [None]:
#More decision trees for different depths. This time I also compare performance on 
# the test set to the training set in the cross validation.

tree5 = DecisionTreeRegressor(max_depth=5, random_state=108)
tree5_nocc = DecisionTreeRegressor(max_depth=5, random_state=108)
tree10 = DecisionTreeRegressor(max_depth=10, random_state=108)
tree10_nocc = DecisionTreeRegressor(max_depth=10, random_state=108)
tree20 = DecisionTreeRegressor(max_depth=20, random_state=108)
tree20_nocc = DecisionTreeRegressor(max_depth=20, random_state=108)

rmses = np.zeros((12, 5))

for i, (train_index, test_index) in enumerate(kfold.split(df)):
    ## get the kfold training data
    X_train_train = df[features].iloc[train_index,:]
    y_train_train = df['stay_length_minutes'].iloc[train_index]
    
    ## get the holdout data
    X_holdout = df[features].iloc[test_index,:]
    y_holdout = df['stay_length_minutes'].iloc[test_index]

    ## Fit models
    tree5.fit(X_train_train, y_train_train)
    tree5_nocc.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)
    tree10.fit(X_train_train, y_train_train)
    tree10_nocc.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)
    tree20.fit(X_train_train, y_train_train)
    tree20_nocc.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)


    ## Use models to generate predictions on the holdout set
    tree5_preds = tree5.predict(X_holdout)
    tree5_preds_train = tree5.predict(X_train_train)
    tree5_nocc_preds=tree5_nocc.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    tree5_nocc_preds_train=tree5_nocc.predict(X_train_train[triage_physical_features+triage_other_features+demographic_features])
    tree10_preds = tree10.predict(X_holdout)
    tree10_preds_train = tree10.predict(X_train_train)
    tree10_nocc_preds=tree10_nocc.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    tree10_nocc_preds_train=tree10_nocc.predict(X_train_train[triage_physical_features+triage_other_features+demographic_features])
    tree20_preds = tree20.predict(X_holdout)
    tree20_preds_train = tree20.predict(X_train_train)
    tree20_nocc_preds=tree20_nocc.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
    tree20_nocc_preds_train=tree20_nocc.predict(X_train_train[triage_physical_features+triage_other_features+demographic_features])
    

    ## Record the rmses
    rmses[0,i] = root_mean_squared_error(y_holdout, tree5_preds)
    rmses[1,i] = root_mean_squared_error(y_train_train, tree5_preds_train)
    rmses[2,i] = root_mean_squared_error(y_holdout, tree5_nocc_preds)
    rmses[3,i] = root_mean_squared_error(y_train_train, tree5_nocc_preds_train)
    rmses[4,i] = root_mean_squared_error(y_holdout, tree10_preds)
    rmses[5,i] = root_mean_squared_error(y_train_train, tree10_preds_train)
    rmses[6,i] = root_mean_squared_error(y_holdout, tree10_nocc_preds)
    rmses[7,i] = root_mean_squared_error(y_train_train, tree10_nocc_preds_train)
    rmses[8,i] = root_mean_squared_error(y_holdout, tree20_preds)
    rmses[9,i] = root_mean_squared_error(y_train_train, tree20_preds_train)
    rmses[10,i] = root_mean_squared_error(y_holdout, tree20_nocc_preds)
    rmses[11,i] = root_mean_squared_error(y_train_train, tree20_nocc_preds_train)


rmses

array([[0.88142499, 0.88222369, 0.88233063, 0.8753496 , 0.88258166],
       [0.87725392, 0.87779858, 0.87782022, 0.87929478, 0.87714591],
       [0.86943762, 0.87109872, 0.86894691, 0.86147213, 0.86848175],
       [0.86420684, 0.86449354, 0.86527167, 0.86659834, 0.86447826],
       [0.77082506, 0.76955162, 0.76735617, 0.76565038, 0.76195427],
       [0.74847335, 0.74765683, 0.74829975, 0.74869558, 0.74515222],
       [0.70438008, 0.71555458, 0.7106086 , 0.7004532 , 0.70405932],
       [0.68788935, 0.69587668, 0.69456647, 0.69504697, 0.68455973],
       [0.70751588, 0.69332476, 0.68553154, 0.70162094, 0.70690797],
       [0.40732721, 0.40012604, 0.40673543, 0.40261036, 0.39656905],
       [0.63183937, 0.63065551, 0.64236393, 0.62494255, 0.63003946],
       [0.34198266, 0.35625367, 0.35597227, 0.36987347, 0.35979516]])

In [None]:
#A more thorough check of different max depths, including comparison of performance on test and training.
rmses = np.zeros((60, 5))

for i, (train_index, test_index) in enumerate(kfold.split(df)):
    ## get the kfold training data
    X_train_train = df[features].iloc[train_index,:]
    y_train_train = df['stay_length_minutes'].iloc[train_index]
    
    ## get the holdout data
    X_holdout = df[features].iloc[test_index,:]
    y_holdout = df['stay_length_minutes'].iloc[test_index]

    ## Fit models
    for k in range(15):
        # Initialize models with dynamic max_depth values
        model_tree_k = DecisionTreeRegressor(max_depth=k+5, random_state=108)
        model_tree_nocc_k = DecisionTreeRegressor(max_depth=k+5, random_state=108)
    
        # Fit the models to the training data
        model_tree_k.fit(X_train_train, y_train_train)
        model_tree_nocc_k.fit(X_train_train[triage_physical_features+triage_other_features+demographic_features], y_train_train)

        ## Use models to generate predictions on the holdout set
        model_tree_preds_k = model_tree_k.predict(X_holdout)
        model_tree_preds_train_k = model_tree_k.predict(X_train_train)

        model_tree_nocc_preds_k = model_tree_nocc_k.predict(X_holdout[triage_physical_features+triage_other_features+demographic_features])
        model_tree_nocc_preds_train_k = model_tree_nocc_k.predict(X_train_train[triage_physical_features+triage_other_features+demographic_features])

        ## Record the rmses
        rmses[4*k,i] = root_mean_squared_error(y_holdout, model_tree_preds_k)
        rmses[4*k+1,i] = root_mean_squared_error(y_train_train, model_tree_preds_train_k)
        rmses[4*k+2,i] = root_mean_squared_error(y_holdout, model_tree_nocc_preds_k)
        rmses[4*k+3,i] = root_mean_squared_error(y_train_train, model_tree_nocc_preds_train_k)

        print('Round', k, 'completed')
        k=k+1

rmses


Round 0 completed
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed
Round 6 completed
Round 7 completed
Round 8 completed
Round 9 completed
Round 10 completed
Round 11 completed
Round 12 completed
Round 13 completed
Round 14 completed
Round 0 completed
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed
Round 6 completed
Round 7 completed
Round 8 completed
Round 9 completed
Round 10 completed
Round 11 completed
Round 12 completed
Round 13 completed
Round 14 completed
Round 0 completed
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed
Round 6 completed
Round 7 completed
Round 8 completed
Round 9 completed
Round 10 completed
Round 11 completed
Round 12 completed
Round 13 completed
Round 14 completed
Round 0 completed
Round 1 completed
Round 2 completed
Round 3 completed
Round 4 completed
Round 5 completed
Round 6 completed
Round 7 completed
Round 8 completed
Round 9 compl

array([[0.88142499, 0.88222369, 0.88233063, 0.8753496 , 0.88258166],
       [0.87725392, 0.87779858, 0.87782022, 0.87929478, 0.87714591],
       [0.86943762, 0.87109872, 0.86894691, 0.86147213, 0.86848175],
       [0.86420684, 0.86449354, 0.86527167, 0.86659834, 0.86447826],
       [0.86340532, 0.8651636 , 0.86648112, 0.85680474, 0.86400179],
       [0.85822781, 0.85860002, 0.85850904, 0.8606057 , 0.85781784],
       [0.83955299, 0.83616671, 0.8367221 , 0.83683891, 0.83670047],
       [0.83301221, 0.83469761, 0.83423929, 0.83442696, 0.83333844],
       [0.84363622, 0.84458914, 0.84359773, 0.83725371, 0.84214111],
       [0.83571415, 0.8362921 , 0.83690867, 0.83815312, 0.83509821],
       [0.80651324, 0.80660989, 0.80449923, 0.80163389, 0.80325781],
       [0.79941313, 0.80165528, 0.80059583, 0.79908011, 0.79749225],
       [0.82372958, 0.81839448, 0.82336112, 0.81995529, 0.81922862],
       [0.8115156 , 0.81267704, 0.81189198, 0.81245876, 0.81105757],
       [0.76532426, 0.77010456, 0.

In [88]:
rmses_means=np.mean(rmses, axis=1)

In [89]:
rmses_means

array([0.88078211, 0.87786269, 0.86788743, 0.86500973, 0.86317132,
       0.85875208, 0.83719624, 0.8339429 , 0.84224358, 0.83643325,
       0.80450281, 0.79964732, 0.82093382, 0.81192019, 0.76540381,
       0.75685912, 0.80124903, 0.7886608 , 0.73566896, 0.72373727,
       0.7670675 , 0.74765555, 0.70701116, 0.69158784, 0.74496117,
       0.71387647, 0.67975494, 0.65989531, 0.7234476 , 0.68137784,
       0.6556011 , 0.62588506, 0.70438876, 0.64615554, 0.63440772,
       0.59307397, 0.6896951 , 0.60873158, 0.62134028, 0.56251209,
       0.68091038, 0.57284768, 0.61238617, 0.53088858, 0.67549006,
       0.53729157, 0.60742624, 0.49754215, 0.67682036, 0.50191976,
       0.60846823, 0.46357455, 0.68185945, 0.46719068, 0.61333792,
       0.4285637 , 0.6882715 , 0.43402984, 0.61816833, 0.39306346])

In [None]:
#Looking at the percent change in RMSE going from test data to train data, from above cross validation, 
#for different math depths. Ideally we would get some minimum (I think) but here we just get that 
#the percent changes are increasing monotonically.

for i in range(15):
    diff_i=(rmses_means[4*i+1]-rmses_means[4*i])/rmses_means[4*i]
    diff_nocc_i=(rmses_means[4*i+3]-rmses_means[4*i+2])/rmses_means[4*i+2]
    print("Percent drop in RMSE from test to training for depth", i+5,"full model:", diff_i)
    print("Percent drop in RMSE from test to training for depth", i+5,"no CC model:", diff_nocc_i)
    i=i+1

Percent drop in RMSE from test to training for depth 5 full model: -0.003314587403475457
Percent drop in RMSE from test to training for depth 5 no CC model: -0.003315746257876459
Percent drop in RMSE from test to training for depth 6 full model: -0.005119765098484738
Percent drop in RMSE from test to training for depth 6 no CC model: -0.00388598710747646
Percent drop in RMSE from test to training for depth 7 full model: -0.0068986340740119214
Percent drop in RMSE from test to training for depth 7 no CC model: -0.006035394372794602
Percent drop in RMSE from test to training for depth 8 full model: -0.010979724283583464
Percent drop in RMSE from test to training for depth 8 no CC model: -0.011163632398782063
Percent drop in RMSE from test to training for depth 9 full model: -0.015710759375987766
Percent drop in RMSE from test to training for depth 9 no CC model: -0.01621882387846026
Percent drop in RMSE from test to training for depth 10 full model: -0.025306704199588588
Percent drop in 