In [1]:
import scipy.stats as ss
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
to_fit = pd.read_csv("Data/prob_random_effects_output.csv")

In [3]:
to_fit

Unnamed: 0.1,Unnamed: 0,variable,mean,lower,upper
0,1,pReadmission,4e-05,8e-06,7.2e-05
1,2,pLTSCSFPos,0.261986,0.114716,0.409255
2,3,pOtotoxicity,0.013475,0.001287,0.025662
3,4,pLTSAfterReadmission,0.084746,0.011358,0.158134
4,5,pDeathAfterReadmission,0.002301,0.002251,0.00235
5,6,pYellow,0.094112,0.058442,0.129782
6,7,pRed,0.03864,0.030604,0.046676
7,8,pIllAppearingBCPosYellow,0.153846,0.056264,0.251428
8,9,pIllAppearingNoChorio,0.053712,0.048688,0.058737
9,10,pIllAppearingGreen,0.014423,0.004635,0.024211


In [4]:
# fits an encompassing beta distribution to an expected value of value,
# 2.5th percentile of lower_bound, and 97.5th percentile of upper_bound
# scale_lower_bound and scale_upper_bound are guesses as to what the
# scale should be.

def fit_encompassing_beta_distr(value, lower_bound, upper_bound):
    og_alpha = value
    og_beta = 1-og_alpha
    
    for scale in np.arange(0, 1900000, 10000):
        scale = 2000000 - scale
        alpha = og_alpha * scale
        beta = og_beta * scale
        ppf_025 = ss.beta.ppf(0.025, alpha, beta)
        ppf_975 = ss.beta.ppf(0.975, alpha, beta)
        
        if ppf_975 > upper_bound and ppf_025 < lower_bound:
            print("alpha: %.2f \t beta: %.2f" % (alpha, beta))           
            return alpha, beta
    
    for scale in np.arange(0, 99000, 1000):
        scale = 100000 - scale
        alpha = og_alpha * scale
        beta = og_beta * scale
        ppf_025 = ss.beta.ppf(0.025, alpha, beta)
        ppf_975 = ss.beta.ppf(0.975, alpha, beta)
        
        if ppf_975 > upper_bound and ppf_025 < lower_bound:
            print("alpha: %.2f \t beta: %.2f" % (alpha, beta))           
            return alpha, beta
    
    for scale in np.arange(0, 1000):
        scale = 1000 - scale
        alpha = og_alpha * scale
        beta = og_beta * scale
        ppf_025 = ss.beta.ppf(0.025, alpha, beta)
        ppf_975 = ss.beta.ppf(0.975, alpha, beta)
        
        if ppf_975 > upper_bound and ppf_025 < lower_bound:
            print("alpha: %.2f \t beta: %.2f" % (alpha, beta))           
            return alpha, beta
        
    print("No match found")
    return 0, 0

In [5]:
to_fit["alpha"], to_fit["beta"] = np.vectorize(fit_encompassing_beta_distr)(to_fit["mean"], to_fit["lower"], to_fit["upper"])

alpha: 2.85 	 beta: 70997.15
alpha: 2.85 	 beta: 70997.15
alpha: 6.55 	 beta: 18.45
alpha: 1.72 	 beta: 126.28
alpha: 2.03 	 beta: 21.97
alpha: 4601.20 	 beta: 1995398.80
alpha: 19.58 	 beta: 188.42
alpha: 38.64 	 beta: 961.36
alpha: 5.23 	 beta: 28.77
alpha: 375.98 	 beta: 6624.02
alpha: 4.87 	 beta: 333.12
alpha: 779.79 	 beta: 18220.21
alpha: 2.06 	 beta: 45.94
alpha: 0.31 	 beta: 7.69
alpha: 0.31 	 beta: 12.69
alpha: 4.08 	 beta: 207.92
alpha: 3.96 	 beta: 607.04
alpha: 196.52 	 beta: 22803.48
alpha: 71.69 	 beta: 58928.31
alpha: 45.87 	 beta: 429954.13
alpha: 2.75 	 beta: 7997.25
alpha: 59.01 	 beta: 940.99


In [6]:
to_fit["beta_mean"] = ss.beta.mean(to_fit["alpha"], to_fit["beta"])
to_fit["beta_2.5pct"] = ss.beta.ppf(0.025, to_fit["alpha"], to_fit["beta"])
to_fit["beta_97.5pct"] = ss.beta.ppf(0.975, to_fit["alpha"], to_fit["beta"])

In [7]:
to_fit

Unnamed: 0.1,Unnamed: 0,variable,mean,lower,upper,alpha,beta,beta_mean,beta_2.5pct,beta_97.5pct
0,1,pReadmission,4e-05,8e-06,7.2e-05,2.854973,70997.15,4e-05,7.847246e-06,9.8e-05
1,2,pLTSCSFPos,0.261986,0.114716,0.409255,6.549638,18.45036,0.261986,0.1131239,0.44681
2,3,pOtotoxicity,0.013475,0.001287,0.025662,1.724776,126.2752,0.013475,0.001283878,0.039365
3,4,pLTSAfterReadmission,0.084746,0.011358,0.158134,2.033898,21.9661,0.084746,0.01118659,0.221674
4,5,pDeathAfterReadmission,0.002301,0.002251,0.00235,4601.199851,1995399.0,0.002301,0.002234675,0.002367
5,6,pYellow,0.094112,0.058442,0.129782,19.575228,188.4248,0.094112,0.05838101,0.137217
6,7,pRed,0.03864,0.030604,0.046676,38.63994,961.3601,0.03864,0.02759919,0.051426
7,8,pIllAppearingBCPosYellow,0.153846,0.056264,0.251428,5.230769,28.76923,0.153846,0.05527232,0.290686
8,9,pIllAppearingNoChorio,0.053712,0.048688,0.058737,375.984506,6624.015,0.053712,0.04855303,0.059113
9,10,pIllAppearingGreen,0.014423,0.004635,0.024211,4.875,333.125,0.014423,0.004627424,0.029579


In [8]:
to_fit.to_csv("Data/prob_output.csv")