## Imports and Data Loading

In [None]:

from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd  # optional

%matplotlib notebook
ltpFR2_events = np.load('/data/exp_events/ltpFR2_events.npy')
ltpFR2_df = pd.DataFrame(ltpFR2_events)
print('loading done')

## Question 1

In [None]:
ltpFR2_word = ltpFR2_df[(ltpFR2_events['type']=='WORD') & (ltpFR2_events['session']!=24) ]
prec_by_serialpos = ltpFR2_df.groupby(by='serialpos')['recalled'].mean()
prec_by_serialpos
plt.figure()
plt.plot(prec_by_serialpos,marker='o')
plt.ylabel('Probability of Later Recall')
plt.xlabel('Serial Position')
plt.title('Mean Serial Position Curve (N=76)')
plt.xlim(1,24)
plt.savefig('SR_mean')

## Question 2

In [None]:
ltpFR2_rec_word = ltpFR2_df[(ltpFR2_events['type']=='REC_WORD')]
ltpFR2_word_corr = ltpFR2_rec_word[((ltpFR2_rec_word['intrusion']==0))]

TRtoIRT = defaultdict(list)
subject_count = len(set(ltpFR2_word_corr.subject))


for i, sub in enumerate(set(ltpFR2_word_corr.subject)):
    
    print(str(i)+"/"+str(subject_count))
    #loop progress
    
    for si in set(ltpFR2_word_corr.session):
        
        if (si==24): continue #take out session 24
            
        for ti in set(ltpFR2_word_corr.trial):


            df = ltpFR2_word_corr[(ltpFR2_word_corr['subject']==sub)
                       & (ltpFR2_word_corr['session']==si)
                       & (ltpFR2_word_corr['trial']==ti)]
            TR = df.shape[0]
            IRT = []
            for wi in range(1,TR):
                row_A = df.iloc[wi]
                row_B = df.iloc[wi-1]
                IRT.append((row_A['rectime']-row_B['rectime'])/1000)

            TRtoIRT[TR].append(IRT)
            
            

plt.figure
for key in dd:
    if key>24: continue
    aa = np.array(dd[key])
    avg = np.mean(aa, axis=0)
    
    plt.plot(avg)


plt.title('Output Position vs. Inter Response Time ')
plt.ylabel('IRT (s)')
plt.xlabel('Index of Output Position Interval')
plt.savefig('Q2')

## Question 3
#### The code for individual subjects is modified with additional query 'type' = subjectname



In [None]:
data2 = ltpFR2_df[(ltpFR2_df['type']=='REC_WORD')]

lagArray = data2.groupby(by=['subject','session', 'trial'])['serialpos'].diff()
#resArray = data2.groupby(by=['subject','session', 'trial'])['rectime'].diff()

preArray=data2.groupby(by=['session', 'trial'])['serialpos']


#calculate the denonimator for the CRP analysis
all_chances=[]
def getPossible(group):
    recalled = []

    for x in group:
        if np.isnan(x): continue
        low_bound=int(1-x)
        up_bound=int(24-x)

        chances=list(range(low_bound,0))+list(range(1,up_bound+1))
        
        for each in recalled: #take out repititions
            if each-x in chances:
                chances.remove(each-x)
        recalled.append(x)
        for ch in chances:
            all_chances.append(ch)
            
preArray.apply(getPossible)

possibleLags = np.histogram(all_chances,np.arange(-16,16))
actualLags=np.histogram(lagArray.dropna(), np.arange(-16,16))

plt.gcf().clear()

yvals = actualLags[0]/possibleLags[0]

plt.plot(np.arange(-15,1),yvals[:16])
plt.plot(np.arange(1,15), yvals[17:])

plt.title('Lag CRP Curve')
plt.xlabel('Lag (Position)')
plt.ylabel('Conditional Probability')
plt.savefig('Lag CRP - Everyone')

## Question 4

In [None]:
#uncomment the follwing lines to run it just for a certain output position
#pos = 4
#lagArray = data2.groupby(by=['subject','session', 'trial'])['serialpos'].apply(lambda x: x.iloc[:pos].diff())
#responseArray = data2.groupby(by=['subject','session', 'trial'])['rectime'].apply(lambda x: x.iloc[::pos].diff())

lagArray = data2.groupby(by=['subject','session', 'trial'])['serialpos'].diff()
responseArray = data2.groupby(by=['subject','session', 'trial'])['rectime'].diff()

lag = list(lagArray)
res = list(responseArray)

lagsToSums = {}
lagsToCounts = {}
j=0
for num in lag:
    if (np.isnan(num)):
        j+=1
        continue
        
    if num not in lagsToSums: 
        lagsToSums[num] = 0
        lagsToCounts[num] = 0
 
    
        
    if (np.isnan(res[j])):
        j+=1
        continue
        
    lagsToSums[num]+=(res[j])
    lagsToCounts[num] +=1
    j = j+1
    

xvals = []
yvals = []
keylist = list(lagsToSums.keys())
keylist.sort()
for key in keylist:
    xvals.append(key)
    yvals.append(lagsToSums[key]/lagsToCounts[key]/1000) 
    #mean=sum/count, and 1000 for msec->sec
    
    
yvals[24] = np.nan #definitionally at 0 lag, and to create empty space on plot for lag=0

plt.figure()
plt.plot(xvals,yvals)
plt.xlim(-15,15)
plt.title('Lag CRL')
plt.ylabel('IRT(s)')
plt.xlabel('Lag in Transition')
plt.savefig('CRL')



## Question 5

In [None]:
ltpFR2_rec_word = ltpFR2_df[(ltpFR2_events['type']=='REC_WORD')]
intrusionData = ltpFR2_rec_word[(ltpFR2_rec_word['trial'] > 9)]

intrusionData = ltpFR2_rec_word[(ltpFR2_rec_word['trial'] > 9) &
                                (ltpFR2_rec_word['intrusion'] > 0) &
                               (ltpFR2_rec_word['intrusion'] < 9)]

toPlot = intrusionData.groupby(by='intrusion')['subject'].count()
plt.figure()
plt.plot(toPlot)
plt.show()
plt.title('Prior List Intrusions (PLIs)')
plt.ylabel('Number of Intrusions')
plt.xlabel('Source of Intrusion Word (# of Lists Back)')
plt.savefig('PLIs')

