In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.linear_model import LinearRegression

In [2]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

In [3]:
training = pd.read_csv("acute_vars.csv")
testing = pd.read_csv("mgh_vars.csv")

In [4]:
var_names = ["lnm_tot", "lnm_pos", "lnm_neg", "vlsm_tot", "wm_total", "lnm_tot_avg", "lnm_pos_avg", "lnm_neg_avg", "vlsm_tot_avg", "vlsm_pos_avg", "wm_avg", "sp_corr_lnm", "sp_corr_vlsm", "sp_corr_wm", "l_size"]
full_names = ["LNM Total", "LNM Pos", "LNM Neg", "VLSM Total", "WM Total", "LNM Total Avg", "LNM Pos Avg", "LNM Neg Avg", "VLSM Total Avg", "VLSM Pos Avg", "WM Total Avg", "Sp Corr LNM", "Sp Corr VLSM", "Sp Corr WM", "Lesion Size"]

In [5]:
tr_y = np.array(training["nih"])
te_y = np.array(testing["nih"])

In [9]:
single_var_output = pd.DataFrame(columns=["formula", "training_r2", "testing_r2", "cv_shrinkage"])

In [10]:
for p in var_names:
    tr_x = np.array(training[p]).reshape(-1, 1)
    model = sklearn.linear_model.LinearRegression().fit(tr_x, tr_y)
    te_x = np.array(testing[p]).reshape(-1, 1)
    row = ["NIHSS="+str(model.intercept_)[:7]+"+"+str(model.coef_[0])[:10]+"*x1", model.score(tr_x, tr_y), model.score(te_x, te_y), (model.score(tr_x, tr_y)-model.score(te_x, te_y))/model.score(tr_x, tr_y)*100]
    single_var_output.loc[p] = row

In [11]:
single_var_output

Unnamed: 0,formula,training_r2,testing_r2,cv_shrinkage
lnm_tot,NIHSS=2.84956+0.00019213*x1,0.41053,0.20569,49.896527
lnm_pos,NIHSS=2.64695+0.00018513*x1,0.416424,0.241724,41.952441
lnm_neg,NIHSS=4.19804+-0.0002889*x1,0.032559,0.100816,-209.646053
vlsm_tot,NIHSS=2.25699+0.00069485*x1,0.522224,0.157803,69.782408
wm_total,NIHSS=2.53737+0.00020911*x1,0.415963,0.244378,41.250034
lnm_tot_avg,NIHSS=3.08428+1.14866737*x1,0.180912,0.000951,99.47432
lnm_pos_avg,NIHSS=0.67834+1.86186921*x1,0.224842,0.049759,77.869403
lnm_neg_avg,NIHSS=5.32448+0.77852515*x1,0.008311,-0.038081,558.219064
vlsm_tot_avg,NIHSS=3.83523+0.94552950*x1,0.045455,-0.048492,206.680629
vlsm_pos_avg,NIHSS=1.66804+1.20621646*x1,0.222215,0.051953,76.620243


In [15]:
two_var_output = pd.DataFrame(columns=["predictors", "formula", "training_r2", "testing_r2", "cv_shrinkage"])

In [16]:
ind = 0
for i in range(len(var_names)):
    for ii in range(len(var_names)):
        if i < ii:
            tr_x = training[[var_names[i], var_names[ii]]].to_numpy()
            model = sklearn.linear_model.LinearRegression().fit(tr_x, tr_y)
            te_x = testing[[var_names[i], var_names[ii]]].to_numpy()
            row = [str(full_names[i]+" & "+full_names[ii]), "NIHSS = "+str(model.intercept_)[:7]+" + "+str(model.coef_[0])[:10]+"*x1 + "+str(model.coef_[1])+"*x2", round(model.score(tr_x, tr_y), 3), round(model.score(te_x, te_y), 3), round(model.score(tr_x, tr_y)-model.score(te_x, te_y), 3)]
            two_var_output.loc[ind] = row
            ind += 1

In [12]:
sr_training = pd.read_csv("acute_seed_region_-36_-4.csv")
sr_testing = pd.read_csv("mgh_seed_region.csv")

In [13]:
sr_tr_y = np.array(training["nih"])
sr_te_y = np.array(testing["nih"])

In [14]:
sr_vars = ["sr_total", "sr_pos", "sr_neg", "sr_total_avg", "sr_pos_avg", "sr_neg_avg", "sr_sp_corr"]

In [15]:
sr_var_output = pd.DataFrame(columns=["formula", "training_r2", "testing_r2", "cv_shrinkage"])

In [16]:
for p in sr_vars:
    tr_x = np.array(sr_training[p]).reshape(-1, 1)
    model = sklearn.linear_model.LinearRegression().fit(tr_x, sr_tr_y)
    te_x = np.array(sr_testing[p]).reshape(-1, 1)
    row = ["NIHSS="+str(model.intercept_)[:7]+"+"+str(model.coef_[0])[:10]+"*x1", model.score(tr_x, sr_tr_y), model.score(te_x, sr_te_y), (model.score(tr_x, sr_tr_y)-model.score(te_x, sr_te_y))/model.score(tr_x, sr_tr_y)*100]
    sr_var_output.loc[p] = row

In [17]:
sr_var_output

Unnamed: 0,formula,training_r2,testing_r2,cv_shrinkage
sr_total,NIHSS=2.96077+0.00801017*x1,0.370487,0.218843,40.93106
sr_pos,NIHSS=2.81843+0.00714283*x1,0.373297,0.249997,33.030013
sr_neg,NIHSS=3.90282+-0.0158193*x1,0.095071,0.157486,-65.651668
sr_total_avg,NIHSS=3.22022+46.2927174*x1,0.132896,0.01672,87.418978
sr_pos_avg,NIHSS=1.04315+69.5060282*x1,0.190444,0.080272,57.850086
sr_neg_avg,NIHSS=4.56698+-1.9739547*x1,7.3e-05,-0.008981,12390.774498
sr_sp_corr,NIHSS=2.91147+5.47093157*x1,0.156068,0.031855,79.58886
