# Support Vector Regression

We will use SVR to predict risk factors based on the subject's parameters produced by the HDDM.

In [109]:
from sklearn import svm
from sklearn.preprocessing import scale
import pandas as pd

## Importing Data

##### Behavioral Analysis Parameters

In [110]:
behav_params = pd.read_csv('cleaned_data/behavioral_analysis_parameters.csv', index_col=0)
behav_params.head(5)

Unnamed: 0_level_0,sw_costRT,sw_avg_ac,sw_inc_ac,sw_acc,sw_iacc
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5001,179.882353,0.755556,0.736264,0.747253,0.704545
5002,301.574983,0.955556,0.921348,0.934066,0.883721
5003,48.140212,0.916667,0.911111,0.923077,0.906977
5004,221.275594,0.972222,0.966667,0.967033,0.933333
5005,504.860435,0.638889,0.641304,0.488889,0.54


In [111]:
behav_params = pd.DataFrame(scale(behav_params), index=behav_params.index, columns=behav_params.columns)
behav_params.head(5)

Unnamed: 0_level_0,sw_costRT,sw_avg_ac,sw_inc_ac,sw_acc,sw_iacc
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5001,0.332148,-0.771636,-0.445791,-0.610195,-0.476258
5002,1.479069,1.074566,1.006806,1.029839,0.921216
5003,-0.909488,0.715582,0.926462,0.933366,1.102599
5004,0.722268,1.228416,1.362478,1.319256,1.308166
5005,3.394981,-1.848587,-1.19106,-2.878373,-1.759625


##### Constrained Model Parameters

In [112]:
con_params = pd.read_csv('cleaned_data/constrained_parameters.csv', index_col=0)
con_params.head(5)

Unnamed: 0,a_mean,a_std,t_mean,t_std,v_ConStay_mean,v_ConStay_std,v_ConSwitch_mean,v_ConSwitch_std,v_IncStay_mean,v_IncStay_std,v_IncSwitch_mean,v_IncSwitch_std
5001,1.654276,0.050684,0.009033,0.004389,0.752308,0.191112,0.675413,0.168773,0.799912,0.187332,0.55332,0.175814
5002,2.162169,0.139902,0.256666,0.01941,2.397274,0.240931,1.419568,0.174521,2.103839,0.247926,1.136038,0.183101
5003,2.019375,0.128783,0.388972,0.019373,1.555974,0.225187,1.469078,0.198279,1.820493,0.230318,1.611098,0.223071
5004,2.003204,0.135867,0.257029,0.015485,2.329027,0.275167,1.628504,0.198968,2.793962,0.274371,1.568955,0.215547
5005,2.155007,0.109106,0.323836,0.0231,1.694907,0.221075,0.427972,0.186792,1.147412,0.20147,0.54257,0.162237


In [113]:
con_params = pd.DataFrame(scale(con_params), index=con_params.index, columns=con_params.columns)
con_params.head(5)

Unnamed: 0,a_mean,a_std,t_mean,t_std,v_ConStay_mean,v_ConStay_std,v_ConSwitch_mean,v_ConSwitch_std,v_IncStay_mean,v_IncStay_std,v_IncSwitch_mean,v_IncSwitch_std
5001,-0.611438,-1.446421,-3.579457,-1.436717,-1.89006,-1.51575,-1.40006,-1.059698,-0.947642,-1.200592,-0.878557,-0.861569
5002,1.065821,1.171093,-0.459354,0.101134,1.089382,0.083619,0.316908,-0.872078,1.216455,0.695172,0.576932,-0.577119
5003,0.59426,0.844871,1.207668,0.097341,-0.43442,-0.421833,0.431142,-0.096659,0.746192,0.144282,1.763516,0.983137
5004,0.540857,1.052718,-0.454772,-0.300665,0.965771,1.182708,0.79898,-0.074153,2.361834,1.522561,1.658252,0.689424
5005,1.04217,0.267568,0.38697,0.478919,-0.182778,-0.553832,-1.970974,-0.471576,-0.370904,-0.758245,-0.905409,-1.391539


##### Unconstrained Model Parameters

In [114]:
uncon_params = pd.read_csv('cleaned_data/unconstrained_parameters.csv', index_col=0)
uncon_params.head(5)

Unnamed: 0,a_mean,a_std,t_mean,t_std,v_mean,v_std
5001,1.650353,0.050208,0.008911,0.004356,0.651766,0.091077
5002,2.101413,0.130796,0.254941,0.018952,1.628839,0.128445
5003,2.028412,0.134851,0.387673,0.019372,1.624307,0.136343
5004,1.979925,0.135455,0.254277,0.016284,1.987344,0.148719
5005,2.054512,0.098032,0.325563,0.025738,0.823684,0.097384


In [115]:
uncon_params = pd.DataFrame(scale(uncon_params), index=uncon_params.index, columns=uncon_params.columns)
uncon_params.head(5)

Unnamed: 0,a_mean,a_std,t_mean,t_std,v_mean,v_std
5001,-0.538052,-1.26771,-3.547986,-1.457964,-1.357838,-1.575656
5002,0.935731,0.796316,-0.469228,0.020779,0.766522,0.233223
5003,0.697207,0.900185,1.191747,0.063267,0.75667,0.615574
5004,0.538784,0.915655,-0.477537,-0.249593,1.545988,1.214642
5005,0.782486,-0.042834,0.414507,0.708259,-0.984051,-1.270324


##### Repetitive Negative Thinking Risk Factor

In [116]:
RNT = pd.read_csv('cleaned_data/RNT_risk_factor.csv', index_col=0)
RNT.head(5)

Unnamed: 0_level_0,RNT_primaryRisk
ID,Unnamed: 1_level_1
5001,40
5002,41
5003,40
5004,30
5005,26


In [117]:
RNT = pd.DataFrame(scale(RNT), index=RNT.index, columns=RNT.columns)
RNT.head(5)

Unnamed: 0_level_0,RNT_primaryRisk
ID,Unnamed: 1_level_1
5001,1.178059
5002,1.256172
5003,1.178059
5004,0.396925
5005,0.084471


## Regression Models

In [118]:
from sklearn.model_selection import train_test_split

bp_train, bp_test, cp_train, cp_test, up_train, up_test, RNT_train, RNT_test = train_test_split(behav_params.values, con_params.values, uncon_params.values, RNT.values, test_size=0.2, random_state=5100)

In [119]:
svr_clf_behav = svm.SVR()
svr_clf_con = svm.SVR()
svr_clf_uncon = svm.SVR()

##### Adjusted R<sup>2</sup> Values for Both Mean and Std Dev Parameters

In [120]:
svr_clf_behav.fit(bp_train, RNT_train.ravel())
svr_clf_behav.score(bp_test, RNT_test.ravel())

-0.094429644364719678

In [121]:
svr_clf_con.fit(cp_train, RNT_train.ravel())
svr_clf_con.score(cp_test, RNT_test.ravel())

0.0063312595122545501

In [122]:
con_mean_params = con_params.iloc[:, ::2].copy()
con_mean_params.head(5)

Unnamed: 0,a_mean,t_mean,v_ConStay_mean,v_ConSwitch_mean,v_IncStay_mean,v_IncSwitch_mean
5001,-0.611438,-3.579457,-1.89006,-1.40006,-0.947642,-0.878557
5002,1.065821,-0.459354,1.089382,0.316908,1.216455,0.576932
5003,0.59426,1.207668,-0.43442,0.431142,0.746192,1.763516
5004,0.540857,-0.454772,0.965771,0.79898,2.361834,1.658252
5005,1.04217,0.38697,-0.182778,-1.970974,-0.370904,-0.905409


In [123]:
uncon_mean_params = uncon_params.iloc[:, ::2]
uncon_mean_params.head(5)

Unnamed: 0,a_mean,t_mean,v_mean
5001,-0.538052,-3.547986,-1.357838
5002,0.935731,-0.469228,0.766522
5003,0.697207,1.191747,0.75667
5004,0.538784,-0.477537,1.545988
5005,0.782486,0.414507,-0.984051


In [124]:
bp_mean_train, bp_mean_test, cp_mean_train, cp_mean_test, up_mean_train, up_mean_test, RNT_mean_train, RNT_mean_test = train_test_split(behav_params.values, con_mean_params.values, uncon_mean_params.values, RNT.values, test_size=0.2, random_state=5100)

In [125]:
svr_clf_behav_mean = svm.SVR()
svr_clf_con_mean = svm.SVR()
svr_clf_uncon_mean = svm.SVR()

##### Adjusted R<sup>2</sup> Values for Mean Parameters

In [126]:
svr_clf_behav_mean.fit(bp_mean_train, RNT_mean_train.ravel())
svr_clf_behav_mean.score(bp_mean_test, RNT_mean_test.ravel())

-0.094429644364719678

In [127]:
svr_clf_con_mean.fit(cp_mean_train, RNT_mean_train.ravel())
svr_clf_con_mean.score(cp_mean_test, RNT_mean_test.ravel())

0.071180496576256136

In [128]:
con_std_params = con_params.iloc[:, 1::2].copy()
con_std_params.head(5)

Unnamed: 0,a_std,t_std,v_ConStay_std,v_ConSwitch_std,v_IncStay_std,v_IncSwitch_std
5001,-1.446421,-1.436717,-1.51575,-1.059698,-1.200592,-0.861569
5002,1.171093,0.101134,0.083619,-0.872078,0.695172,-0.577119
5003,0.844871,0.097341,-0.421833,-0.096659,0.144282,0.983137
5004,1.052718,-0.300665,1.182708,-0.074153,1.522561,0.689424
5005,0.267568,0.478919,-0.553832,-0.471576,-0.758245,-1.391539


In [129]:
uncon_std_params = uncon_params.iloc[:, 1::2]
uncon_std_params.head(5)

Unnamed: 0,a_std,t_std,v_std
5001,-1.26771,-1.457964,-1.575656
5002,0.796316,0.020779,0.233223
5003,0.900185,0.063267,0.615574
5004,0.915655,-0.249593,1.214642
5005,-0.042834,0.708259,-1.270324


In [130]:
bp_std_train, bp_std_test, cp_std_train, cp_std_test, up_std_train, up_std_test, RNT_std_train, RNT_std_test = train_test_split(behav_params.values, con_std_params.values, uncon_std_params.values, RNT.values, test_size=0.2, random_state=5100)

In [131]:
svr_clf_behav_std = svm.SVR()
svr_clf_con_std = svm.SVR()
svr_clf_uncon_std = svm.SVR()

##### Adjusted R<sup>2</sup> Values for Std Dev Parameters

In [132]:
svr_clf_behav_std.fit(bp_std_train, RNT_std_train.ravel())
svr_clf_behav_std.score(bp_mean_test, RNT_std_test.ravel())

-0.094429644364719678

In [133]:
svr_clf_con_std.fit(cp_std_train, RNT_std_train.ravel())
svr_clf_con_std.score(cp_std_test, RNT_std_test.ravel())

-0.083911497457235029

In [134]:
con_switch_mean_params = con_params[['v_ConSwitch_mean', 'v_IncSwitch_mean']].copy()
con_switch_mean_params.head(5)

Unnamed: 0,v_ConSwitch_mean,v_IncSwitch_mean
5001,-1.40006,-0.878557
5002,0.316908,0.576932
5003,0.431142,1.763516
5004,0.79898,1.658252
5005,-1.970974,-0.905409


In [135]:
bp_switch_mean_train, bp_switch_mean_test, cp_switch_mean_train, cp_switch_mean_test, RNT_switch_mean_train, RNT_switch_mean_test = train_test_split(behav_params.values, con_switch_mean_params.values, RNT.values, test_size=0.2, random_state=5100)

In [136]:
svr_clf_con_switch_mean = svm.SVR()

##### Adjusted R<sup>2</sup> Value for Only Using Switch Mean Parameters

In [137]:
svr_clf_con_switch_mean.fit(cp_switch_mean_train, RNT_switch_mean_train.ravel())
svr_clf_con_switch_mean.score(cp_switch_mean_test, RNT_switch_mean_test.ravel())

-0.1175623258351659