In [65]:
%pylab inline
import pandas as pd
import statsmodels.api as sm

Populating the interactive namespace from numpy and matplotlib


In [2]:
SUBJECTS = ['RS02','RS03','RS04']

Neeraj's super clear naming scheme:

* 1 = familiarisation
* 2 = baseline
* 3 = day 1 training rotation
* 4 = day 1 end recall
* 5 = day 2 recall
* 6 = day 2 retraining


Let's take the following sets of trials (direction at vmax) from each subject:

* `recall1` End day 1 no feedback   (session 4)
* `trainnof1` Train day 1 no feedback trials (session 3 but no Feedback only)
* `trainfb1` Train day 1 last feedback trials (session 3 but with Feedback and taking only the last N trials)

And also for everyone:

* `recall2` Recall day 2 no feedback (session 5)

In [3]:
PREDICTORS = ['recall1','trainnof1','trainfb1'] # these are the "samples" we use as predictors

In [4]:
TARGET = 'recall2'  # what we are trying to predict

In [5]:
# How many trials to take from the end of the day1 feedback trials 
N_TRIALS = 20

In [6]:
alldata = []
samples = {} # for each subject we put a set of samples
for subj in SUBJECTS:
    tab = pd.read_csv('{}_data.csv'.format(subj))
    alldata.append(tab)
    samples[subj]={
        "trainnof1":list(tab[ (tab['session']==3) & (tab['trialtype']=='noFeedback') ]['handAngle']),
        "trainfb1": list(tab[ (tab['session']==3) & (tab['trialtype']=='feedback') ]['handAngle'])[-N_TRIALS:],
        "recall1":list(tab[ tab['session']==4 ]['handAngle']),
        "recall2":list(tab[ tab['session']==5 ]['handAngle']) 
    }
alldata = pd.concat(alldata)
alldata.to_csv('tmp.csv')

In [7]:
#samples

In [8]:
alldata.head()

Unnamed: 0,cumultrial,groupid,groupnum,handAngle,session,subjectid,subjectnum,sx,sy,trial,trialtype,trialvalid,tx,ty,vmax,vmax_x,vmax_y
0,1,recall,1,-8.482259,1,RS02,2,0.023,-0.05,1,feedback,1,0.023,0.1,0.288874,0.009687,0.039271
1,2,recall,1,-5.948008,1,RS02,2,0.023,-0.05,2,feedback,1,0.023,0.1,0.365116,0.013358,0.042547
2,3,recall,1,-8.41348,1,RS02,2,0.023,-0.05,3,feedback,1,0.023,0.1,0.408926,0.015271,0.002254
3,4,recall,1,0.138782,1,RS02,2,0.023,-0.05,4,feedback,1,0.023,0.1,0.394741,0.023158,0.015186
4,5,recall,1,-5.988708,1,RS02,2,0.023,-0.05,5,feedback,1,0.023,0.1,0.398419,0.014179,0.034089


# First approach: taking average
For each predictor, we can take the average, and try to predict the movements during the target phase.

In [43]:
preds = []
for subj,values in samples.items():
    d = {"subject":subj}
    for predictor in PREDICTORS:
        d[predictor]=mean(samples[subj][predictor])
    preds.append(d)
preds = pd.DataFrame(preds)

In [44]:
preds

Unnamed: 0,recall1,subject,trainfb1,trainnof1
0,-16.830574,RS02,-30.54686,-28.87492
1,-16.820204,RS03,-29.372498,-27.860196
2,-24.256984,RS04,-31.714755,-32.345843


In [57]:
targ = []
for subj,values in samples.items():
    d = pd.DataFrame()
    d[TARGET]=values[TARGET]
    d["subject"]=subj
    targ.append(d)
targ = pd.concat(targ)

In [59]:
#targ

In [63]:
alldat = pd.merge(preds,targ)

In [76]:
alldat.head()

Unnamed: 0,recall1,subject,trainfb1,trainnof1,recall2
0,-16.830574,RS02,-30.54686,-28.87492,-2.195953
1,-16.830574,RS02,-30.54686,-28.87492,-18.054329
2,-16.830574,RS02,-30.54686,-28.87492,-15.169154
3,-16.830574,RS02,-30.54686,-28.87492,-16.131772
4,-16.830574,RS02,-30.54686,-28.87492,-9.773434


In [77]:
PREDICTORS

['recall1', 'trainnof1', 'trainfb1']

In [95]:
predictor = 'recall1'

In [108]:
Y = alldat[TARGET] 
X = alldat[predictor]
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                recall2   R-squared:                       0.041
Model:                            OLS   Adj. R-squared:                  0.024
Method:                 Least Squares   F-statistic:                     2.475
Date:                Wed, 10 Apr 2019   Prob (F-statistic):              0.121
Time:                        16:24:38   Log-Likelihood:                -178.91
No. Observations:                  60   AIC:                             361.8
Df Residuals:                      58   BIC:                             366.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         21.6965     20.028      1.083      0.2

In [101]:
results.params

recall1    0.491253
dtype: float64

In [109]:
for predictor in PREDICTORS:
    Y = alldat[TARGET] 
    X = alldat[predictor]
    #X = sm.add_constant(X)
    model = sm.OLS(Y,X)
    results = model.fit()
    print(predictor)
    print(list(results.params))
    print(list(results.tvalues))
    print()
    #print("{} {}".format(predictor,results.params))
    #print(results.summary())

recall1
[0.4912532717814287]
[14.29641460123765]

trainnof1
[0.3294066154095447]
[15.492577344843667]

trainfb1
[0.3213691553011379]
[15.648330411305666]



In [None]:
import statsmodels.api as sm

In [None]:
# TODO: 