### p-value-walk

##### Template:
https://colab.research.google.com/github/pachterlab/bibecs183/blob/master/Colab_Notebooks/p_value_walk.ipynb#scrollTo=ZH2kLPq7Ol5c
*(Programming language: R)*

#### reinterpretation in python 3:
Interactive visualization of statistical t-tests.

**interactive buttons:**
*Test selection: one or two sample t-test or dependent t-test* Normal distribution: sample size, expected value, standard deviation
*Limit: Slider for visualizing the limit* Recalculation: calculate new values ​​with the current settings
*Chart: test results versus sample size* *Calculation process:* *
- Based on the template, 2 vectors are iteratively filled with any, normally divided elements from an unknown population.
- In each iteration, the elements that were previously present in the vectors are checked in a t-test.
- The vector size is determined using the 'Sample size' slider.

In [None]:
#resouces
#
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import ipywidgets as widgets

%matplotlib nbagg

In [None]:
# Warnings are issued while the samples are being calculated and the t-test is being performed.
# These are switched off here
import warnings

warnings.filterwarnings("ignore")

In [None]:
# Initialize interactive buttons
#
# Sample size
style = {'description_width': 'initial'}
n_total = widgets.IntSlider(min=50, max=5000, value=1000, description="Stichprobengröße",
                            style=style, continuous_update=False)
# Limit of the t-test
limit = widgets.SelectionSlider(options=[0.001, 0.005, 0.010, 0.020, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5],
                                value=0.001,    description='limit',    disabled=False,
                                continuous_update=False,  orientation='vertical', readout=True)
#
testForm = ['One-sample t-test', 'Two-sample t-test', 'Dependent t-test']
testDropdown = widgets.Dropdown(description='select test form', options=testForm, value='Two-sample t-test')
#
# Expected value and standard deviation of the standard distribution
mu = widgets.FloatSlider(min=-15, max=15, value=0.0, description="µ:",
                         continuous_update=False, tooltips="expected value of the normal distribution")
sigma = widgets.FloatSlider(min=0.1, max=5, value=1, description='$\sigma$:',
                            continuous_update=False, tooltips="standard deviation")
#
# buttons
# 
BtnReload = widgets.Button(description="recalculate",button_style='info', tooltip="calculate new values with the current settings")
BtnReset = widgets.Button(description="Reset", button_style='warning', tooltip="reset to default settings")
#
# output info
info_1 = widgets.Label(value="")
info_2 = widgets.Label(value="") # no multi-line output possible, therefore 2 buttons

In [None]:
# Single sample test https://de.wikipedia.org/wiki/Einstichample-t-Test
#
# Carry out a one-sample t-test for a normally distributed sample
# - based on the template, 2 vectors are iteratively filled with any, normally divided elements from an unknown population.
# - The vector size (sample size) is determined via 'n _total'.
# - In each iteration, the elements that were previously present in the vectors are checked in a t-test.
#
# Calculation methods:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html#scipy.stats.norm
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_1samp.html#scipy.stats.ttest_1samp
# - in the one-sample t-test, the expected value, which also applies to the determination of the elements, is checked on both sides.
#
def one_sample_t_test(samples, mu, sigma):
    #
# The t-test provides usable results from a sample size> 3.
# These elements are added to the vectors
    #   
    x=np.zeros(samples+3) #sample vector
    p= np.array([np.zeros(samples+3),np.zeros(samples+3)], ndmin=2)
    p[0]=np.zeros(samples+3) #test result
    p[1]=np.zeros(samples+3)
    for i in range(2, (samples+3) ,1):
        x[i] = stats.norm.rvs(mu, sigma)        
        p[0,i] = stats.ttest_1samp(x[1:i], mu).pvalue        

# In python, array objects are immutable; a single item cannot be deleted. You have to define a new array object.
    pNew = np.delete(p[0],[0,1,2])
    return pNew

In [None]:
# Two-sample t-test https://de.wikipedia.org/wiki/Zumar-samples-t-test
#
# - based on the template, 2 vectors are iteratively filled with any, normally divided elements from an unknown population.
# - The vector size (sample size) is determined via 'n _total'.
# - In each iteration, the elements that were previously present in the vectors are checked in a t-test.
#
# Calculation methods:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html#scipy.stats.norm
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
# - the two-sample t-test checks whether the mean of the two samples is the same.
# The total deviation is saved as a result.
#
def independent_t_test(samples,mu, sigma):
    #
# The t-test provides usable results from a sample size> 3.
# These elements are added to the vectors
    #
    x=np.zeros(samples+3) #sample vector_1
    y=np.zeros(samples+3) #sample vector_2
    p=np.zeros(samples+3) #test result
    for i in range(2, (samples+3) ,1):
        x[i] = stats.norm.rvs(mu, sigma,size=1)
        y[i] = stats.norm.rvs(mu, sigma,size=1)
        p[i] = stats.ttest_ind(x[1:i], y[1:i], equal_var = True).pvalue
        #
# In python, array objects are immutable; a single item cannot be deleted. You have to define a new array object.
    pNew = np.delete(p,[0,1,2])
    return pNew

In [None]:
# Dependent t test
#
# - based on the template, 2 vectors are iteratively filled with any, normally divided elements from an unknown population.
# - The vector size (sample size) is determined via 'n _total'.
# - In each iteration, the elements that were previously present in the vectors are checked in a t-test.
#
# Calculation methods:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html#scipy.stats.norm
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
# - the dependent-t test checks whether the mean of dependent or repeated samples is the same.
# The total deviation is saved as a result.
def paired_t_test(samples, mu, sigma):
    #
# The t-test provides usable results from a sample size> 3.
# These elements are added to the vectors
    #
    x=np.zeros(samples+3)   #store x-values
    y=np.zeros(samples+3) #store y-values
    p=np.zeros(samples+3) #store p-values
    for i in range(2, (samples+3) ,1):
        x[i] = stats.norm.rvs(mu, sigma)
        y[i] = stats.norm.rvs(mu, sigma)
        p[i] =  stats.ttest_rel(x[1:i], y[1:i]).pvalue
# In python, array objects are immutable; a single item cannot be deleted. You have to define a new array object.
    pNew = np.delete(p,[0,1,2])
    return pNew

In [None]:
#
# Visualization
#
# - draw diagram:
fig, ax = plt.subplots()
#
limit_old = limit.value
pVal = independent_t_test(n_total.value, mu.value, sigma.value)
#
# 1. Define the callback function that is executed when the value of a switch changes
def update_view(*args):
    global limit_old
    global pVal
    
    # Test method
    testMode = testDropdown.value
    #
    # Calculate new values
    # - if only the limit changes, no calculation should be made,
    # only the limit line is redrawn.
    if limit_old == limit.value:
        # One-sample t-test
        if testMode == testForm[0]:
            pVal_new = one_sample_t_test(n_total.value, mu.value, sigma.value)
        # Two-sample-t-Test
        if testMode == testForm[1]:
            pVal_new = independent_t_test(n_total.value, mu.value, sigma.value)
        # Dependent-t-Test
        if testMode == testForm[2]:
            pVal_new = paired_t_test(n_total.value, mu.value, sigma.value)
        pVal = pVal_new
    
    
    pMin = pVal.argmin()
    pUnderLimit = ((pVal < limit.value).nonzero()[0] + 10)
    #
    #----------------------------------------------
    ax.clear()   
    ax.plot(pVal, 'r-', lw=3)
    ax.set_xlim(0, n_total.value)
    ax.set_ylim(0,1.5)
    ax.set_ylabel("p-value")
    ax.set_xlabel("Stichprobengröße")
    #
    # Draw a horizontal line at the limit
    ax.axhline(y=limit.value, xmin=0, xmax=n_total.value, ls='--', color="darkgrey", linewidth=2)
    #
    # vertical line sign for the minimum value and at the point where the limit was fallen below for the first time
    yLim = plt.ylim()
    yPos = yLim[0] + yLim[-1] / 2
    bbox_props = dict(boxstyle="round", fc=(0.1, 0.1, 1), ec="0.5", alpha=0.9)
    if pMin:
        ax.axvline(x=pMin, ymin=0, ymax=0.5, ls='-', color="blue",lw=2)
        # labeling:        
        ax.annotate('minimum', xy=(pMin,yPos), xycoords='data',\
                    xytext=(-90,-50), textcoords='offset points',\
#                     ha = "center", va= "center",\
                    bbox=dict(boxstyle="round", fc=(0.1,0.1,1), ec="0.5", alpha=0.9),\
                    arrowprops=dict(arrowstyle="->", connectionstyle="arc,angleA=0, armA=50,rad=10"))
    #
    if (len(pUnderLimit) >  0):
        ax.axvline(x=(pUnderLimit[0] + 10), ymin=0, ymax=0.5, ls='-', lw=2, color="darkgreen")
        ax.annotate('first time < limit',\
                    xy=(pUnderLimit[0] + 10,yPos),\
                    xycoords='data', xytext=(-90,50),\
#                     ha = "center", va= "center",\
                    textcoords='offset points',\
                    bbox=dict(boxstyle="round", fc=(0.2,0.8,0.3), ec="0.5", alpha=0.9),\
                    arrowprops=dict(arrowstyle="->", connectionstyle="arc,angleA=0, armA=50,rad=10"))        
    #
    # x-axis ticks
    ax.set_xticks(np.arange(10, (n_total.value+10), (n_total.value/4)))
    #
    # horizontal grid lines
    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
               alpha=0.5)
    #   
    plt.show()
    # --------------------
    # Output text
    if pMin:
        s1 = "The smallest test result was found at the location {}.".format(pMin)
        info_1.value = ""
        info_1.value = s1
    #
    if (len(pUnderLimit)> 0):
        s2 = "For the first time the limit of {} at the point {} was undershot.".format((limit.value),(pUnderLimit[0] + 10))
    else:
        s2 = "No test result fell below the limit of {}".format(limit.value)
    #
    info_2.value = ""
    info_2.value = s2
    
    limit_old = limit.value
    
def reset_controls(*btn):
    # Reset Button:
    n_total.value = 1000
    limit.value = 0.001
    mu.value = 0
    sigma.value = 1
    info_1.value = ""
    info_2.value = ""
    testDropdown.value = 'Two-sample t-test'
    update_view()

#--------------------------------------------------------
# 2. Assign the callback function to the individual buttons using the 'observe' method
n_total.observe(update_view, 'value')
mu.observe(update_view,'value')
sigma.observe(update_view,'value')
limit.observe(update_view, 'value')
testDropdown.observe(update_view, 'value')
BtnReload.on_click(update_view)
BtnReset.on_click(reset_controls)

# ------------------------------------------------- -------
# Start the application
#
# Draw the diagram once
update_view()
#
# Arrange buttons with 'widgets.VBox / .HBox'
box_layout = widgets.Layout(display='flex', flex_flow='column', align_items='stretch')
#
widgets.VBox([info_1,info_2,\
              widgets.HBox([limit, testDropdown, widgets.VBox([BtnReload,n_total,mu,sigma])]),\
              BtnReset],\
             layout = box_layout)

Copyright © 2020 IUBH Internationale Hochschule