In [2]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sst

In [3]:
###p_adjust functionality
def test_p_adjust():
    """
    The purpose of this test is evaluating the p_adjust with more real world data using Pandas dataframes under
    different environments.
    """

    ##basic vector functionality"
    d = {"p_value": [0.07], "adjusted": [0.07]}
    df = pd.DataFrame(data=d)
    df = df[["p_value", "adjusted"]]
    assert df.equals(p_adjust(data=[0.07], method="bonf")), "p_adjust 1 values vector for bonferoni"
    assert df.equals(p_adjust(data=[0.07], method="bh")), "p_adjust 1 values vector for bh"


    d = {"p_value": [0.07, 0.2], "adjusted": [0.14, 0.4]}
    df = pd.DataFrame(data=d)
    df = df[["p_value", "adjusted"]]
    assert df.equals(p_adjust(data=[0.07, 0.2], method="bh")), "p_adjust 2 values vector for bonferoni"

    d = {"p_value": [0.07, 0.2], "adjusted": [0.14, 0.2]}
    df = pd.DataFrame(data=d)
    df = df[["p_value", "adjusted"]]
    assert p_adjust(data=[0.07, 0.2], method="bh") == df, "p_adjust 2 values vector value for bh"


In [72]:
def p_adjust(data, pv_index=0, method='bonf', alpha=0.05):                                         
    """                                                                                            
    A summary dataframe with columns for the p-values, adjusted p-values for both Bonferroni and   
    Benjamini-Hochberg (BH), adjusted significancelevel for Bonferroni and the critical value for B
                                                                                                   
   Args:                                                                                           
        - data (dataframe): dataframe containing at least a column of p-values to be adjusted      
        - pv_index (int): original p-value column index from existing input dataframe              
        - alpha (int): significance level as a value between 0 and 1                               
                                                                                                   
   Returns:                                                                                        
        Dataframe: appends to input dataframe both adjusted p-values and significance levels (Bonfe
        in ascending raw p-value order.Includes following columns:                                 
            - bonf_val (int): Bonferroni adjusted significance level (same for all)                
            - Bonf_significant (bool): True if significant p-value or False if not                 
            - bh_val (int): Benjamini-Hochberg (BH) critical value                                 
            - BH_significant (bool): True if significant p-value or False if not                   
    """                                                                                            
                                                                                                   
    #     ####if it's a pd.dataframe, rename to col header                                         
    #     if isinstance(data, pd.DataFrame):                                                       
    #         data.rename({pv_index: "p_value"})                                                   
    #         if np.issubdtype(data['p_value'].dtypes, np.number):                                 
                                                                                                   
    #     ###or make a vector a pd.dataframe                                                       
    #     else:                                                                                    
    #         data = pd.DataFrame({"p_value": data})  
    
    if isinstance(data, pd.DataFrame):                                                             
        data.rename({pv_index: "p_value"})                                                         
        ## error for non-numeric data frame column                                                 
        if not (np.issubdtype(data['p_value'].dtypes, np.number)):                                 
            raise TypeError("Please ensure you have specified the column index of numeric p-values.")
    else:                                                                                          
        data = pd.DataFrame({"p_value": data})                                                     
        # set the size of the data                                                                 
                                                                                                   
    m = data.shape[0]                                                                              
                                                                                                   
    # sort p-values                                                                                
    df = data.sort_values(by=['p_value'])                                                          
    df["rank"] = round(df.rank(axis=0, method='min')["p_value"])                                   
    df["bh_value"] = alpha * df["rank"] / m                                                        
                                                                                                   
    ### generate final data frame                                                                  
    df["bonf_pvalue"] = np.where(df['p_value'] * m < 1, df['p_value'] * m, 1)                      
    df["bh_pvalue"] = df['p_value'] / df['rank']* m                                               
                                                                                                   
                                                                                                   
    if method == 'bh' or method == 'fdr':                                                          
        df["adjusted"] = df['p_value'] / df['rank']* m  
        return (df[['p_value', 'adjusted']])                                                       
    if method == 'bonf' or method == 'bonferroni':                                                 
        df["adjusted"] = df['p_value'] * m                                           
        return (df[['p_value', 'adjusted']])                                                       
    else:                                                                                          
        raise ValueError("Method should be set as 'bonf' or 'bh' corrections")
                            

In [10]:
###p_adjust functionality
def test_p_adjust():
    """
    The purpose of this test is evaluating the p_adjust with more real world data using Pandas dataframes under
    different environments.
    """

    ##basic vector functionality"
    d = {"p_value": [0.07], "adjusted": [0.07]}
    df = pd.DataFrame(data=d)
    df = df[["p_value", "adjusted"]]
    assert df.equals(p_adjust(data=[0.07], method="bonf")), "p_adjust 1 values vector for bonferoni"
    assert df.equals(p_adjust(data=[0.07], method="bh")), "p_adjust 1 values vector for bh"


    d = {"p_value": [0.07, 0.2], "adjusted": [0.14, 0.4]}
    df = pd.DataFrame(data=d)
    df = df[["p_value", "adjusted"]]
    assert df.equals(p_adjust(data=[0.07, 0.2], method="bh")), "p_adjust 2 values vector for bonferoni"

    d = {"p_value": [0.07, 0.2], "adjusted": [0.14, 0.2]}
    df = pd.DataFrame(data=d)
    df = df[["p_value", "adjusted"]]
    assert p_adjust(data=[0.07, 0.2], method="bh") == df, "p_adjust 2 values vector value for bh"


In [83]:
d = {"p_value": [0.07, 0.2], "adjusted": [0.14, 0.2]}
df = pd.DataFrame(data=d)
df = df[["p_value", "adjusted"]]
assert df.equals(p_adjust(data=[0.07, 0.2], method="bh")), "p_adjust 2 values vector for bonferoni"

In [82]:
p_adjust(data=[0.07, 0.2], method="bh")

Unnamed: 0,p_value,adjusted
0,0.07,0.14
1,0.2,0.2


In [79]:
p_adjust(data=[0.07, 0.2], method="bh")

Unnamed: 0,p_value,adjusted
0,0.07,0.14
1,0.2,0.2


In [77]:
data=[0.07, 0.2]
data = pd.DataFrame({"p_value": data})
alpha = 0.05
m = data.shape[0]
df["rank"] = round(df.rank(axis=0, method='min')["p_value"])                                   
df["bh_value"] = df['p_value'] * df["rank"] / m                                                        
                                                                                               
### generate final data frame                                                                  
df["bonf_pvalue"] = np.where(df['p_value'] * m < 1, df['p_value'] * m, 1)                      
df["bh_pvalue"] = df['p_value'] / df['rank']* m   
df = df[["p_value", "adjusted"]]

In [78]:
df


Unnamed: 0,p_value,adjusted
0,0.07,0.14
1,0.2,0.2


In [84]:
0.20 * 2 /2
0.07 *2/1

0.14

In [88]:
d = {"test": ["test 1"], "p_value": [0.05], "adjusted": [0.05]}
df = pd.DataFrame(data=d)
df = df[["p_value", "adjusted"]]
ad = {"test": ["test 1"], "p_value": [0.05], "adjusted": [0.05]}
adf = pd.DataFrame(data=ad)
adf = adf[["test","p_value", "adjusted"]]
assert adf.equals(p_adjust(data=df, method="bonf")), "bonferroni single value df under p_adjust"
assert adf.equals(p_adjust(data=df, method="bh")), "bh single value df under p_adjust"

AssertionError: bonferroni single value df under p_adjust

In [92]:
df
adf
p_adjust(data=df, method="bonf")

Unnamed: 0,p_value,adjusted
0,0.05,0.05
