In [None]:
def tempClustScore(list_serialpos, rec_words_serialpos):

    from scipy.spatial.distance import euclidean
    from scipy.stats import percentileofscore, zscore, pearsonr
    #Function to return the percentile ranks of each transition between recalled words in a list,
    #given the serial positions of the recalled words of that list
   
    if len(rec_words_serialpos) > 1:
       
        rec_words_serialpos = np.array(rec_words_serialpos)
        rec_dists = np.array([np.abs(rec_words_serialpos[x+1]-rec_words_serialpos[x]) for x in range(len(rec_words_serialpos)-1)])
 
        #compute percentileRanks
        percentileRank = np.zeros(len(rec_words_serialpos)-1); percentileRank[:] = np.nan
 
        #For each recalled word (except the last), get the percentile rank of its transition to the following word
        for i in range(len(rec_words_serialpos)-1):
            #get list of yet-to-be-seen serial positions
            possrec_words_serpos = [x not in rec_words_serialpos[0:i+2] for x in list_serialpos]
            possibles = list_serialpos[possrec_words_serpos]
           
            if len(possibles) > 0:
                list_dists_comparison = [np.abs(rec_words_serialpos[i]-possibles[j]) for j in range(len(possibles))]
                percentileRank[i] = 1.0-percentileofscore(list_dists_comparison,rec_dists[i],'strict')/100. # should really be 'mean' but ethan did 'strict' but actually it doesn't matter as long as you're not comparing the transition with itself, because there are probably no ties
                    #print(percentileRank[i])
 
               
    else:
        percentileRank = np.nan
       
    return percentileRank
 
 
def semClustScore(list_vecs_reduced, rec_words_serialpos, metric='euc'):
    #Function to return the percentile ranks of each transition between recalled words in a list,
    #given the wordvecs of the list, the indices of that list that were recalled
   
    if len(rec_words_serialpos) > 1:
 
        list_serialpos = np.arange(1,13)
        rec_words_serialpos = np.array(rec_words_serialpos, dtype=int)
        rec_vecs_reduced = np.zeros((len(rec_words_serialpos),list_vecs_reduced.shape[1]))
 
        for i, serpos in enumerate(rec_words_serialpos):
            if serpos in list_serialpos:
                rec_vecs_reduced[i,:] = list_vecs_reduced[serpos-1,:]
 
        if metric=='euc':
            rec_dists = np.array([ euclidean(rec_vecs_reduced[x,:],rec_vecs_reduced[x+1,:]) for x in range(len(rec_words_serialpos)-1) ])
        elif metric=='cos':
            rec_dists = np.array([ cosine(rec_vecs_reduced[x,:],rec_vecs_reduced[x+1,:]) for x in range(len(rec_words_serialpos)-1) ])
 
        percentileRank = np.zeros(len(rec_words_serialpos)-1); percentileRank[:] = np.nan
        percentileRank_samecat = np.zeros(len(rec_words_serialpos)-1); percentileRank_samecat[:] = np.nan
        percentileRank_diffcat = np.zeros(len(rec_words_serialpos)-1); percentileRank_diffcat[:] = np.nan
 
       #For each recalled word (except the last), get the percentile rank of its transition to the following word
        for i in range(len(rec_words_serialpos)-1):
 
            possrec_words_serpos = [x not in rec_words_serialpos[0:i+2] for x in list_serialpos]
            possibles = list_vecs_reduced[possrec_words_serpos]
 
            if len(possibles) > 0:   
                if metric=='euc':
                    list_dists_comparison = [euclidean(rec_vecs_reduced[i,:],possibles[j,:]) for j in range(len(possibles))]
                elif metric=='cos':
                    list_dists_comparison = [cosine(rec_vecs_reduced[i,:],possibles[j,:]) for j in range(len(possibles))]
                percentileRank[i] = 1.0-percentileofscore(list_dists_comparison,rec_dists[i],'strict')/100.
               
    else:
        percentileRank = np.nan
   
    return percentileRank
 
 
def getClustRankZscore(percentileRanks, percentileRanks_perm):
    #Function to Z-score the semantic or temporal clustering ranks, at the session-level
    #percentileRanks should be of shape sessions x list x item [x pcadim]
    nsessions = percentileRanks.shape[0]
 
    #For semantic clustering scores
    if len(percentileRanks.shape) == 4:
        ndim = percentileRanks.shape[3]
        nperm = percentileRanks_perm.shape[4]-1
        sessClustZscore = np.zeros([nsessions,ndim]); sessClustZscore[:] = np.nan
        clustScore_avg = np.zeros([nsessions,ndim]); clustScore_avg[:] = np.nan
        clustScore_avg_perm = np.zeros([nsessions,ndim,nperm+1]); clustScore_avg_perm[:] = np.nan
       
        for s in range(nsessions):
            for k in range(ndim):
                #first average within lists
                listlevelavg = np.nanmean(percentileRanks[s,:,:,k],axis=1) #Order is session x list x item x pcadimension
                #then average across lists
                clustScore_avg[s,k] = np.nanmean(listlevelavg[:])
 
                for p in range(nperm+1):
                    listlevelavg = np.nanmean(percentileRanks_perm[s,:,:,k,p],axis=1)
                    clustScore_avg_perm[s,k,p] = np.nanmean(listlevelavg[:])
 
                nullDist = clustScore_avg_perm[s,k,:]
                sessClustZscore[s,k] = (clustScore_avg[s,k] - np.nanmean(nullDist))/np.nanstd(nullDist)
 
 
        clustScore_zscores = np.nanmean(sessClustZscore,axis=0)
 
    #For temporal clustering scores
    elif len(percentileRanks.shape) == 3:
        nperm = percentileRanks_perm.shape[3]-1
        sessClustZscore = np.zeros(nsessions); sessClustZscore[:] = np.nan
        clustScore_avg = np.zeros(nsessions); clustScore_avg[:] = np.nan
        clustScore_avg_perm = np.zeros([nsessions,nperm+1]); clustScore_avg_perm[:] = np.nan
 
        for s in range(nsessions):
            #first average within each list
            listlevelavg = np.nanmean(percentileRanks[s,:,:],axis=1)
            #then average across lists
            clustScore_avg[s] = np.nanmean(listlevelavg)
            for p in range(nperm+1):
                listlevelavg = np.nanmean(percentileRanks_perm[s,:,:,p],axis=1)
                clustScore_avg_perm[s,p] = np.nanmean(listlevelavg[:])
 
            nullDist = clustScore_avg_perm[s,:]
            sessClustZscore[s] = (clustScore_avg[s] - np.nanmean(nullDist))/np.nanstd(nullDist)
 
        clustScore_zscores = np.nanmean(sessClustZscore)
 
 
    return clustScore_zscores

def getTransCatAvail(rec_words, rec_words_serialpos, list_words, list_serialpos):
    # Returns an array of length(rec_words) with True if only if all four categories of transitions are available from word i to word i+1
    rec_words_catnum = np.array(rec_words.category_num)
    catEncList = np.array(list_words.category_num)
    pairs = [(1,2),(3,4),(5,6),(7,8),(9,10),(11,12)]
    pairs_rec = np.zeros(len(pairs))
 
    transCatAvailList = np.ones(len(rec_words)-1,dtype='bool')
 
    for i in range(len(transCatAvailList)):
        # first get the serial positions of potential words that can be recalled next
        possrec_words_serpos = list_serialpos[[x not in rec_words_serialpos[0:i+1] for x in list_serialpos]]
 
        # then get the categories of the words that can potentially be recalled next
        possrec_words_cat = [catEncList[sp-1] for sp in possrec_words_serpos]
 
        # do the same for the current word
        curr_word_serpos = rec_words_serialpos[i]
        curr_word_cat = rec_words_catnum[i]
 
        # then check if any are adjsame, adjdiff, nonadjsame, or nonadjdiff
        adj = np.abs(curr_word_serpos-possrec_words_serpos) == 1
        nonadj = np.logical_not(adj)
        samecat = curr_word_cat == possrec_words_cat
        diffcat = np.logical_not(samecat)
 
        adjsame_num = np.sum(adj & samecat)
        adjdiff_num = np.sum(adj & diffcat)
        nonadjsame_num = np.sum(nonadj & samecat)
        nonadjdiff_num = np.sum(nonadj & diffcat)
 
        #transType_num = np.array([adjsame_num, adjdiff_num, nonadjsame_num, nonadjdiff_num]) # saved as timed2
        transType_num = np.array([nonadjsame_num, nonadjdiff_num]) # saved as timed3
        #transType_num = np.array([nonadjsame_num, adjsame_num]) # saved as timed4
 
        if np.any(transType_num == 0):
            transCatAvailList[i] = False


    return transCatAvailList