In [None]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pdb
import scipy
from scipy.optimize import minimize, fmin
from scipy.stats import multivariate_normal
from tqdm.notebook import tqdm
from scipy import io
import os

import glmnet_python
import scipy, importlib, pprint, matplotlib.pyplot as plt, warnings
from glmnet import glmnet; from glmnetPlot import glmnetPlot 
from glmnetPrint import glmnetPrint; from glmnetCoef import glmnetCoef; from glmnetPredict import glmnetPredict
from cvglmnet import cvglmnet; from cvglmnetCoef import cvglmnetCoef
from cvglmnetPlot import cvglmnetPlot; from cvglmnetPredict import cvglmnetPredict

from PIL import Image
from sklearn.model_selection import train_test_split,KFold
from pyglmnet import GLM, GLMCV, datasets, utils

import matplotlib
from mpl_toolkits import mplot3d
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = ['arial']

def makeAxesPretty(ax):
    for axis in ['bottom','left']:
        ax.spines[axis].set_linewidth(1)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

In [None]:
def extractData(csv_test, csv_data, exptTotalLength, exptLengthWithBreaks):   
    n_trials = csv_data.shape[0]-47

    test_columns = list(csv_test.columns)
    test_tones_name = test_columns.index('Name')
    test_tones_col_idx = test_columns.index('Tones')
    df_names = (csv_test.iloc[0:exptTotalLength,test_tones_name]).values
    df_tones = (csv_test.iloc[0:exptTotalLength,test_tones_col_idx]).values

    tones_array_orig = np.zeros((n_trials,n_tones))
    tones_array_idxs_keep = []

    for i_wav in range(exptLengthWithBreaks):
        if isinstance(csv_data['Name'][i_wav+46],str):
            tones_array_orig[i_wav,:] = np.array(df_tones[np.where(csv_data['Name'][i_wav+46]\
                                                              ==df_names)[0]][0][1:-1].split(',')).astype(float)  
            tones_array_idxs_keep += [i_wav]


    exptTones = np.copy(tones_array_orig[tones_array_idxs_keep,:])
    exptCorrans = np.copy(csv_data['corrAns'][46:csv_data.shape[0]])[tones_array_idxs_keep]
    exptKeys = np.copy(csv_data['test_resp.keys'][46:csv_data.shape[0]])[tones_array_idxs_keep]
    
    return exptTones, exptCorrans, exptKeys
    

In [None]:
def identifyResponseTrials(keysPressed, correctAns, tonesPlayed, exptTotalLength):
    no_response = np.intersect1d(np.where(keysPressed!='h')[0],
                                 np.where(keysPressed!='l')[0])
    #print("Did not respond to: ",no_response)

    """
    Convert keys ['l','h'] to [0,1]
    """

    corrans_num_orig = np.zeros_like(correctAns)
    corrans_num_orig[correctAns == 'h'] = 1

    keys_num_orig = np.zeros_like(keysPressed)
    keys_num_orig[keysPressed == 'h'] = 1

    corrans_num = corrans_num_orig[:exptTotalLength]
    keys_num = keys_num_orig[:exptTotalLength]
    tones_array = tonesPlayed[:exptTotalLength]
    #print("Got correct: ", np.sum(keys_num==corrans_num)/len(tones_array))
    #print("No. of minority category correct: ", np.sum(keys_num*corrans_num)/np.sum(corrans_num))

    trial_tones = np.repeat(tones_array,1,axis = 0)
    trial_behaviour = np.reshape(keys_num,np.prod(keys_num.shape)) 
    idxs_with_response = np.delete(np.arange(len(trial_tones)),no_response)
    trialTonesResponded = trial_tones[idxs_with_response,:]
    trialBehaviourResponded = trial_behaviour[idxs_with_response]
    corransResponded = corrans_num[idxs_with_response]
    #print(f"Total trials played are {len(trial_tones)}, and total trials responded to are {len(trialTonesResponded)}")
    
    return trialTonesResponded, trialBehaviourResponded, corransResponded
    

In [None]:
def betaValuesSim(tones_array,keys_num,
                  alpha,noTau, 
                  permutations):
    
    glmBetas = np.zeros((permutations,91))
    betas = np.zeros((permutations,91))
    GLMScoreTt = 0
    PyGLMScoreTt = 0
    
    kf = KFold(n_splits=permutations)
    nn = 0
    
    for train_index, test_index in kf.split(tones_array):
        # print("TEST:", test_index)
        Xtrain, Xtest = tones_array[train_index,:], tones_array[test_index,:]
        ytrain, ytest = keys_num[train_index], keys_num[test_index]
    
        """
        Train Variables
        """
        trial_tonesTr = np.repeat(Xtrain,1,axis = 0)
        trial_behaviourTr = np.reshape(ytrain,np.prod(ytrain.shape)) 

        """
        Creating lookup table
        """
        uniqueTonesTr = np.unique(tones_array)
        freqTableTr = np.zeros((len(trial_tonesTr),len(uniqueTonesTr)*3))
        for ii in range(len(trial_tonesTr)):
            for jj_pos in range(len(trial_tonesTr[ii])):
                jj = trial_tonesTr[ii][jj_pos]
                freqTableTr[ii, np.where(uniqueTonesTr==jj)[0]+jj_pos*len(uniqueTonesTr)] += 1      

        """
        Test Variables
        """
        trial_tonesTt = np.repeat(Xtest,1,axis = 0)
        trial_behaviourTt = np.reshape(ytest,np.prod(ytest.shape)) 

        """
        Creating lookup table
        """
        uniqueTonesTt = np.unique(tones_array)
        freqTableTt = np.zeros((len(trial_tonesTt),len(uniqueTonesTt)*3))
        for ii in range(len(trial_tonesTt)):
            for jj_pos in range(len(trial_tonesTt[ii])):
                jj = trial_tonesTt[ii][jj_pos]
                freqTableTt[ii, np.where(uniqueTonesTt==jj)[0]+jj_pos*len(uniqueTonesTt)] += 1 

        """
        Cross-validation using pyglmnet
        """

        n_samples = freqTableTr.shape[0]
        """
        tau possibilities 
        """
        #Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

        Tau = np.zeros((len(uniqueTonesTr)*3,len(uniqueTonesTr)*3))
        Tau_simple = np.zeros((len(uniqueTonesTr),len(uniqueTonesTr)))
        for ii in range(len(uniqueTonesTr)):
            Tau_simple[ii,ii] = 2
            if ii < len(uniqueTonesTr)-1:
                Tau_simple[ii,ii+1] = -1
            if ii > 0:
                Tau_simple[ii,ii-1] = -1
        
        Tau[0:len(uniqueTonesTr),0:len(uniqueTonesTr)] = Tau_simple
        Tau[len(uniqueTonesTr):2*len(uniqueTonesTr),len(uniqueTonesTr):2*len(uniqueTonesTr)] = Tau_simple
        Tau[2*len(uniqueTonesTr):3*len(uniqueTonesTr),2*len(uniqueTonesTr):3*len(uniqueTonesTr)] = Tau_simple

        # use the default value for reg_lambda
        glm = GLMCV(distr='binomial', alpha=alpha, Tau=(1-noTau)*Tau, score_metric='accuracy',tol=1e-4,
                   max_iter=3000)
                
        # fit model
        glm.fit(freqTableTr, trial_behaviourTr.astype(float))

        # score the test set prediction
        #print("PyGLMNet train score: %f" % glm.score(freqTableTr, trial_behaviourTr))
        PyGLMScoreTt += glm.score(freqTableTt, trial_behaviourTt)

        glmBetas[nn,0] = glm.beta0_
        glmBetas[nn,1:] = glm.beta_
        
        nn+=1
    
    print(alpha, PyGLMScoreTt/permutations)
    
    return(uniqueTonesTr, np.array(glmBetas), PyGLMScoreTt/permutations)


In [None]:
n_tones = 3

""" 
Obtaining data from a given expt
"""
Test = pd.read_csv('subjectDataForPlots/allTrials_noBias.csv')
SubjectFiles = os.listdir('subjectDataForPlots/noContextData')
weightTonePositions = np.zeros((56,3))

for subjectIdx in range(56):  
    filename = 'subjectDataForPlots/noContextData/'+SubjectFiles[subjectIdx]  
    print(filename)
    Data = pd.read_csv(filename)

    """
    Get tones and values of keys pressed from no context expt
    """
    df_tones, df_corrans, df_keys = extractData(csv_test=Test, 
                                                csv_data=Data, 
                                                exptTotalLength=600, 
                                                exptLengthWithBreaks=603) 

    """
    Find no response cases in the no context expt
    """
    trial_tones_expt, trial_behaviour_expt, corrans_expt = identifyResponseTrials(keysPressed = df_keys, 
                                                                                correctAns = df_corrans, 
                                                                                tonesPlayed = df_tones, 
                                                                                exptTotalLength = 600)

    """
    GLM with chosen alpha = 0 since that is the most common alpha value
    """
    [uniqueTonesSmall, pyGLMbetasSmall_no, score_no] = betaValuesSim(trial_tones_expt,
                                                                     trial_behaviour_expt,
                                                                     alpha = 0,
                                                                     noTau=0,
                                                                     permutations=10)

    meanShift_no = np.mean(np.mean(pyGLMbetasSmall_no[:,1:],axis=0))
    glmWeights_no = np.mean(pyGLMbetasSmall_no[:,1:],axis=0) - meanShift_no

    weightTonePositions[subjectIdx,0] = np.sum(np.abs(glmWeights_no[0:30]))
    weightTonePositions[subjectIdx,1] = np.sum(np.abs(glmWeights_no[30:60]))
    weightTonePositions[subjectIdx,2] = np.sum(np.abs(glmWeights_no[60:90]))



In [None]:
"""
Stats for weights of tone positions.
"""

weightOfTonePositions = {"weightOfTonePositions": weightTonePositions}

scipy.io.savemat("weightOfTonePositions.mat", weightOfTonePositions)


In [None]:
import pingouin as pg

df = pd.DataFrame(columns=['Subject','TonePosition','Weight'])
df['Weight'] = np.concatenate([weightTonePositions[:,0],weightTonePositions[:,1],weightTonePositions[:,2]])
df['TonePosition'] = [0]*56 + [1]*56 + [2]*56
subjectList = []
for isubj in range(1,57):
    subjectList += [isubj]
df['Subject'] = subjectList*3
df.index += 1
print(pg.rm_anova(dv='Weight',within=['TonePosition'],
                  subject='Subject', data=df, effsize='np2'))

print("Wilcoxon test on weights of tone positions 0 and 1", 
      pg.wilcoxon(weightTonePositions[:,0], weightTonePositions[:,1]))

print("Wilcoxon test on weights of tone positions 1 and 2", 
      pg.wilcoxon(weightTonePositions[:,1], weightTonePositions[:,2]))

print("Wilcoxon test on weights of tone positions 0 and 2", 
      pg.wilcoxon(weightTonePositions[:,0], weightTonePositions[:,2]))

fig, ax = plt.subplots(1,1,figsize=(8,6))
ax.errorbar([1,2,3], np.nanmean(weightTonePositions,axis=0), 
            yerr=np.nanstd(weightTonePositions,axis=0)/np.sqrt(56),color='k',linewidth=1)

ax.set_xticks(ticks=[1,2,3])
ax.set_xticklabels(labels=['First\nTone','Second\nTone','Third\nTone'])
ax.set_yticks(ticks=np.arange(0,40,10))
ax.set_yticklabels(labels=np.around(np.arange(0,40,10),1))
ax.tick_params(axis='both',labelsize=35,length=10,width=1)
ax.set_xlabel('Tone Position',fontsize=37)
ax.set_ylabel('Influence Of Tone Position',fontsize=37)
makeAxesPretty(ax)
fig.savefig('figures/FromProlific/illustrations/InfluenceOfTonePosition_glmAnalysis.pdf',
             bbox_inches='tight',transparent=True)