### General purpose functions, used in multiple notebooks stored in multple directories

In [2]:
def subidivide_by_group_and_condition(df):
    '''
    Subidivide df by group and condition
    Arg1, df, Pandas DataFrame
    Returns: 
    ctrls - list of CTRL Pandas DataFrames subdivisions
    nphrs - list of NPHR Pandas DataFrames subdivisions
    '''
    #NphR- group
    ctrl_no = df[(df['group']=='CTRL') & (df['stim_condition']==0)]
    ctrl_sample = df[(df['group']=='CTRL') & (df['stim_condition']==1)] 
    ctrl_test = df[(df['group']=='CTRL') & (df['stim_condition']==3)]
    
    # NpHR+ group
    nphr_no = df[(df['group']=='NPHR') & (df['stim_condition']==0)]
    nphr_sample = df[(df['group']=='NPHR') & (df['stim_condition']==1)]
    nphr_test = df[(df['group']=='NPHR') & (df['stim_condition']==3)]
    ctrls = {'None':ctrl_no, 'Sample':ctrl_sample, 'Test':ctrl_test}
    nphrs ={'None':nphr_no, 'Sample':nphr_sample, 'Test':nphr_test}
    return ctrls, nphrs

In [3]:
from scipy.stats import skew, kurtosis

def check_variables_distribution_parameters(df, col):
    '''
    Computes their distribution (histogram), kurtosis and skewness
    Arg1, df, Pandas DataFrame
    Arg1, col - The column variable
    Returns: Void.
    '''
    
    sns.set(context='talk')
    sns.displot(kind='hist', data=df, x=col)
    
    # Calculate skewness and kurtosis of the distributions
    skewness=skew(df[col])
    kurt = kurtosis(df[col], fisher=True)
    print('{}: skew={}, kurtosis={}'.format(col, skewness, kurt))
    
def add_session_nr(group):
    """
    Add a session number label to each session in group.
    arg1, group, Pandas DataFrame - contains the data to add the session number to.
    """
    session_list = np.sort(group['session'].unique())
    i=1  
    for session in session_list:
        group.loc[group['session']==session,'session_nr']=i
        i+=1
    return group

def create_var_shifted_column(df, var, shift):
    """
    Creates a 'shifted' variable column 
    arg1, df, Pandas DataFrame - Contains the data
    arg2, var, str - Column name of the previous trial variable 
    arg2, shift, int - Magnitude of shift
    """
    for i in np.arange(1,shift+1):
        df[var+'_shifted']=df.groupby(['group','rat','session'])[var].apply(
            lambda x: x.shift(i))
    return df

In [None]:
def check_linear_model_assumptions(mdf, df):
    '''
    Plot the distribution of residuals, qqplot and residuals vs. predicted values
       arg1 -  mdf, model object instance that resulted from mixedLM.fit()
       arg2 -  df, Pandas DataFrame, containing the data
    '''

    import scipy.stats as stats
    
    # Add resduals data to performance dataframe
    performance = pd.DataFrame()
    performance["residuals"] = mdf.resid.values
    performance["stim_condition"] = runs.stim_condition
    performance['group']=runs.group
    performance["predicted"] = mdf.fittedvalues
    
    fig, ax= plt.subplots(1,3, figsize=(20,5))
    #Plot residuals vs. Predicted
    sns.residplot(x = "predicted", y = "residuals", data = performance, ax=ax[0])
    #Plot distribution of residuals
    sns.histplot(x='residuals', data=performance, ax=ax[1])
    #Plot QQ plot
    stats.probplot(performance['residuals'], dist="norm", plot=plt)
    sns.despine()

In [3]:
def get_file_list(path, filetype):
    
    '''
    Creates a list containing all filenames of file type in path with
    agr1, path <str>: path to the files 
    agr2, filetype <str>: extension of the file with a wildcard. Example: "*.csv"
    '''

    os.chdir(path)
    for filename in sorted(glob.glob(filetype), key=os.path.getmtime):
        # Add all file names from session path to file list
        try:
            file_list.append(filename)
        except:
            file_list = list()
            file_list.append(filename)

    return file_list

In [None]:
def add_condition_trial_nr(group):
    
    '''
    Add a condition trial number column to each group
    '''
    
    group['cond_trial_nr'] = range(1, len(group)+1)
    
    return group


In [1]:
def remove_outliers(df, var, upper_limit):
    '''
    Remove outliers from group given corresponding upper limit
    Arg1 - df, groupby object
    Arg2 - var, str : column of the variable to remove outliers from
    Arg2 - upper_limit, DataFrame
    Return df 
    '''
    group=df['group'].head(1)
    cond = df['stim_condition'].head(1)
    rat=df['rat'].head(1)
    limit=upper_limit.loc[group, rat, cond].iloc[0]
    df = df[df[var]<limit]
    
    return df

def calculate_upper_limit_using_iqr(df, var):
    '''
    Calculate the outlier upper limit using IQR
    Arg1 - df, Pandas DataFrame
    Arg2 - var, str - variable name
    '''
    
    q3 = below15.groupby(['group','rat','stim_condition'])[var].quantile(0.75)
    q1 = below15.groupby(['group','rat', 'stim_condition'])[var].quantile(0.25)
    iqr = q3 - q1
    #Calculate outlier limit
    upper_limit=q3+(iqr*1.5)
    return upper_limit

In [5]:
def qqplot_within_condition(q1, q2, color):
    sns.set(style='white', context='talk')
    plt.figure(dpi=300, figsize=(4,4))
    g = sns.scatterplot(x=q1, y=q2, s=50, alpha=.6, color=color)
    sns.despine()
    return g

In [7]:
def qqplot_within_group(q1, q2, q3, colors):
    sns.set(style='white', context='talk')
    plt.figure(dpi=300, figsize=(3,3))
    g = sns.scatterplot(x=q1, y=q2, s=50, alpha=.6, color=colors[0])
    sns.scatterplot(x=q1, y=q3, s=50, alpha=.6, color=colors[1])
    sns.despine()
    return g