# Reliability

In [None]:
from scipy.stats import pearsonr
import matplotlib.pyplot as plt 
import random 
import os
import pandas as pd
import seaborn as sns
import numpy as np
from IPython.core.display import display, HTML, Image
%reload_ext rpy2.ipython


def df_to_r(df, varname):
    filename = "%s.feather"%random.randint(0,1000000)
    df.to_parquet(filename, index = False, use_deprecated_int96_timestamps=True)
    %R command = sprintf('%s <- as.data.frame(read_parquet("%s"))', varname, filename) -i varname -i filename
    %R require('arrow')
    %R eval(parse(text = command))
    os.remove(filename)
    
    
def df_from_r(varname):
    filename = "%s.feather"%random.randint(0,1000000)
    %R require('arrow')
    %R command = sprintf('write_parquet(%s, "%s")', varname, filename) -i varname -i filename
    %R eval(parse(text = command))
    df = pd.read_parquet(filename)
    os.remove(filename)
    return df

def get_psych_iccs(biases):
    """This function calculates ICCs using the psych package.
    """
    df_to_r(biases, 'biases')
    %R require(psych)
    %R options(warn=-1)
    #%R -i biases 
    %R result = psych::ICC(biases)
    # Adding the quotes seems to be import because of glitch that leaks python globals into r
    %R iccs = result$"results"# -o iccs
    %R lmers = result$"lme"# -o lmers
    %R options(warn=0)
    iccs = df_from_r('iccs')
    lmers = df_from_r('lmers')
    lmers['type'] = pd.Series(['Participant','Session','Residual','Total'])
    lmers = lmers[['type','variance','Percent']]
    return iccs,lmers

def icc_description_dict(iccs):
    """A helper function for formatting icc output.
    """
    format_icc = lambda x: ("%.2f"%x)[1:]
    iccs[0]['description'] = iccs[0]['ICC'].apply(format_icc) + ' (' + iccs[0]['lower bound'].apply(format_icc) + ', ' + iccs[0]['upper bound'].apply(format_icc) + ')'
    return {k:v for k,v in zip(iccs[0].type,iccs[0]['description'])}

def describe_iccs(iccs, description = ""):
    """A helper function for formatting icc output.
    """
    description_dict = icc_description_dict(iccs)
    description_dict['description'] = description
    display(HTML("The ICC(1) for %(description)s was %(ICC1)s.  The ICC(1,k) for %(description)s was %(ICC1k)s."%description_dict))
    display(iccs[0].round(3))
    display(iccs[1].round(3))
    return description_dict
    
def corrfunc(x, y, ax=None, **kws):
    """Plot the correlation coefficient in the top left hand corner of a plot."""
    r, _ = pearsonr(x, y)
    ax = ax or plt.gca()
    ax.annotate(f'ρ = {r:.2f}', xy=(.1, .9), xycoords=ax.transAxes)   
    
def spearman_brown(x, y):
    """Calculates Spearman-Brown corrected split-half reliability."""
    r, _ = pearsonr(x, y)
    r_sb = (2*r)/(1+r)
    return r_sb
    
def retest_plot(df, add_jitter):
    #fig, axs = plt.subplots(1,3, figsize = (16,4))
    fig = plt.figure(figsize = (10,8))
    gs = fig.add_gridspec(3,2)
    axs = []
    axs.append(fig.add_subplot(gs[0, 0]))
    axs.append(fig.add_subplot(gs[0, 1]))
    axs.append(fig.add_subplot(gs[1:, :]))
    for i in range(2):
        df.iloc[:,i].hist(ax = axs[i])
        axs[i].title.set_text(df.columns[i])
        axs[i].grid(False)
    if add_jitter:
        df[df.columns[0]+'_j'] = jitter(df.iloc[:,0])
        df[df.columns[1]+'_j'] = jitter(df.iloc[:,1])
        ax = df.plot.scatter(x=df.columns[0]+'_j',y=df.columns[1]+'_j', ax = axs[2], alpha = .8)
        ax.grid(False)
    else:
        ax = df.plot.scatter(x=df.columns[0],y=df.columns[1], ax = axs[2])
        ax.grid(False)
    sns.despine()
    return fig

def jitter(arr):
    return arr + np.random.randn(len(arr)) * arr.std()/10

def analyze_reliability(df, dependent, add_jitter = False, title = ''):
    ssrt_reg = df.set_index(['participant','baseline_session']).unstack()[dependent].dropna()
    fig = retest_plot(ssrt_reg, add_jitter)
    title = title if len(title) else "Test-retest reliability: %s"%dependent
    fig.suptitle(title);
    plt.show()
    iccs = get_psych_iccs(ssrt_reg)
    describe_iccs(iccs,dependent)
    return {'fig':fig, 'iccs':iccs}

def split_half_plot(df, even, uneven, add_jitter):
    s1 = df.query("baseline_session=='Session 1'")
    s2 = df.query("baseline_session=='Session 2'")
    #fig, axs = plt.subplots(1,3, figsize = (16,4))
    df_1 = pd.DataFrame({"Even":s1[even],"Uneven":s1[uneven]})
    df_2 = pd.DataFrame({"Even":s2[even],"Uneven":s2[uneven]})

    fig = plt.figure(figsize = (10,8))
    gs = fig.add_gridspec(3,2)
    axs = []
    axs.append(fig.add_subplot(gs[0, 0]))
    axs.append(fig.add_subplot(gs[0, 1]))
    axs.append(fig.add_subplot(gs[1:, :]))
    for i in range(2):
        df_1.iloc[:,i].hist(ax = axs[i], alpha = .5)
        df_2.iloc[:,i].hist(ax = axs[i], alpha = .5)

        axs[i].title.set_text(df_1.columns[i])
        axs[i].grid(False)
    if add_jitter:
        df_1[df_1.columns[0]+'_j'] = jitter(df_1.iloc[:,0])
        df_1[df_1.columns[1]+'_j'] = jitter(df_1.iloc[:,1])
        df_2[df_2.columns[0]+'_j'] = jitter(df_2.iloc[:,0])
        df_2[df_2.columns[1]+'_j'] = jitter(df_2.iloc[:,1])
        df_1.plot.scatter(x=df_1.columns[0]+'_j',y=df_1.columns[1]+'_j', ax = axs[2], alpha = .8, label = "Session 1")
        df_2.plot.scatter(x=df_2.columns[0]+'_j',y=df_2.columns[1]+'_j', ax = axs[2], alpha = .8, color = sns.color_palette()[1], label = "Session 2")
    else:
        df_1.plot.scatter(x=df_1.columns[0],y=df_1.columns[1], ax = axs[2])
        df_12plot.scatter(x=df_1.columns[0],y=df_2.columns[1], ax = axs[2], color = sns.color_palette()[1])
    plt.legend(bbox_to_anchor=(1.2, 1.5))
    sns.despine()
    return fig
    

def analyze_split_half(df, even, uneven, add_jitter = False, description = '', title = ''):
    fig = split_half_plot(df, even, uneven, add_jitter)
    title = title if len(title) else "Split-half reliability: %s"%description
    fig.suptitle(title);
    plt.show()
    s1 = df.query("baseline_session=='Session 1'")
    s2 = df.query("baseline_session=='Session 2'")
    sb_1 = spearman_brown(s1[even], s1[uneven])
    sb_2 = spearman_brown(s2[even], s2[uneven])

    display(HTML("<p><i>r_sb<sub>_session_1</sub></i> = %.3f</p><p><i>r_sb_<sub>session_2</sub></i> = %.3f</p>"%(sb_1, sb_2)))
    return {'fig':fig, 'spearman_browns':[sb_1,sb_2]}
    

R[write to console]: Loading required package: Matrix

R[write to console]: 
Attaching package: ‘lmerTest’


R[write to console]: The following object is masked from ‘package:lme4’:

    lmer


R[write to console]: The following object is masked from ‘package:stats’:

    step


