In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
import math
import scipy as stats
import scipy.stats

In [2]:
from stat1 import *

In [3]:
%matplotlib inline

In [4]:
class Sampling_Distibution(Probability_Distribution):
    """
    Stratified Sampling: use when each group has small variation
        within itself but there is a wide variation between the 
        groups.
        
    Cluster Sampling: use when there is considerable variation
        within each group but the groups are essentially similar
        to each other.
    """
    
    def __init__(self):
        pass
    
    
    def standard_error(self, sigma, n):
        """
        To  get sampling distribution standard deviation
        for infinite population or from finite sample with
        replacements.
        
        
        sigma: population standard deviation
        n: sample size
        """
        return sigma/(math.sqrt(n))
    
    
    def probability(self, mu, sigma, n, N=0, X1=-math.inf, X2=math.inf, infinite=True):
        """
        mu = population mean
        sigma = population standard deviation
        infinte = bool, if true then find probaility for infinite 
        population, otherwise for finite population

        """
        if infinite is True:
            std_error = self.standard_error(sigma, n)
        elif infinite is False:
            std_error = self.standard_error_finite(sigma, N, n)
        print("std error:", std_error)
        
        return self.normal_distribution(mu, std_error, X1, X2)
    
    
    """
    CENTRAL LIMIT THEOREM:
    
    1. The mean of the sampling distribution of the mean
        will be equal to the population mean.
    2. As the sample size increases, the sampling distribution
        of the mean will approach normality.
        
    Significance:
        it permits use sample statics to make inferences about
        population parameterswithout knowing anything about
        the shape of the frequency distribution of that population
        other than what we can get from the sample.
    """
        
    def finite_population_multiplier(self, N, n):
        """
        N: Size of the population
        n: Size of sample
        
        Note: When sampling fraction(n/N) is less than 0.05, the
            finite population multiplier need not be used.
        """
        return math.sqrt((N-n)/(N-1))
    
    """
    Ststistician reccommend that in estimation, n be large
    enough, atlest n/N > 0.5 to use Normal distribution
    as a substitute for the binomial distribution.
    """
    
    def standard_error_finite(self, sigma, N, n):
        """
        To  get sampling distribution standard deviation
        for finite sample without replacements
        
        sigma: population standard deviation
        N: Size of the population
        n: Size of sample
        
        if n/N > 0.05 is True then use standard_error_finite
        otherwise use standard_error
        """
        return self.standard_error(sigma, n) * self.finite_population_multiplier(N, n)
    

In [5]:
class Estimation(Sampling_Distibution):
    
    def __init__(self, x):
        """
        x = np.array of samples
        """
        self.sample = x
    
    
    def point_estimate_mean(self):
        return (self.sample.sum())/len(self.sample)
        
        
    def point_estimate_variance(self):
        variance = ((self.sample - self.point_estimate_mean())**2).sum()/(len(self.sample)-1)
        return variance
        
    
    def point_estimate_deviation(self):
        return math.sqrt(self.point_estimate_variance())
    
    
    def interval_estimate(self, mu, sigma, n, N=0, infinite=True):
        """
        68–95–99.7 rule:
        
        In statistics, the 68–95–99.7 rule, also known as the empirical rule,
        is a shorthand used to remember the percentage of values that lie 
        within a band around the mean in a normal distribution with a width 
        of two, four and six standard deviations, respectively; more accurately,
        68.27%, 95.45% and 99.73% of the values lie within one, two and three 
        standard deviations of the mean, respectively.

        In mathematical notation, these facts can be expressed as follows, where
        Χ is an observation from a normally distributed random variable, μ is 
        the mean of the distribution, and σ is its standard deviation:
        
        Pr(μ - σ ≤ X ≤ μ + σ) ≈ 0.6827
        Pr(μ - 2σ ≤ X ≤ μ + 2σ) ≈ 0.9545
        Pr(μ - 2σ ≤ X ≤ μ + 2σ) ≈ 0.9973
        """
        #n = len(self.sample)
        result = []
        for i in range(1,4):
            if infinite is True:
                t = (mu - i*self.standard_error(sigma,n), mu + i*self.standard_error(sigma,n))
            elif infinite is False:
                t = (mu - i*self.standard_error_finite(sigma, N, n), mu + i*self.standard_error_finite(sigma, N, n))
                
            result.append(t)
        
        return result
    
    
    def standard_error_from_mean(self, confidence_lavel):
        """
        confidence_lavel = percentage of confidence
        """
        
        file = 'cumulative_from_mean_0toZ.csv'
        standard_table = self.standard_normal_table(file, 'z')


        dfr = standard_table[standard_table < confidence_lavel/200]
        max_columns = dfr.idxmax(axis=1)
        index = max_columns.last_valid_index()
        column = max_columns.loc[index]

        return round(index + float(column), 2)
    
    
    def confidence_interval_by_std_error(self, confidence, mean, std_error):
        """
        confidence = percentage
        mean = sample mean or population mean
        std_error = 
        """
        return (mean - (confidence * std_error), mean + (confidence * std_error)) 
    
    
    def confidence_interval(self, confidence_lavel, x_bar, sigma, n, N=0, infinite=True):
        """
        confidence_lavel = percentage of confidence
        x_bar: sample mean
        sigma: standard deviation
        """

        conf = self.standard_error_from_mean(confidence_lavel)
        
        if infinite is True:
            std_err = self.standard_error(sigma,n)
        elif infinite is False:
            std_err = self.standard_error_finite(sigma, N, n)
        
        return self.confidence_interval_by_std_error(conf, x_bar, std_err)
    
    """
    Ststistician reccommend that in estimation, n be large
    enough, atlest n/N > 0.5 to use Normal distribution
    as a substitute for the binomial distribution.
    """
    
    #========================================================
    # INTERVAL ESTIMATES OF THE PROPORTION FROM LARGE SAMPLES
    #========================================================
    
    def estimate_mean_of_proportion(self, p_bar):
        """
        p_bar = sample proportion in favour
        """
        return p_bar
    
    
    def estimate_standard_error_of_proportion(self, p_bar, q_bar, n, N=0, infinite=True):
        """
        p_bar = sample proportion in favour
        q_bar = sample proportion not in favour
        n = sample size
        N = Population mean
        """
        
        if infinite is True:
            return math.sqrt((p_bar * q_bar)/n)
        elif infinite is False:
            return math.sqrt((p_bar * q_bar)/n) * self.finite_population_multiplier(N, n)
    
    
    
    def confidence_interval_of_proportion(self, confidence, p_bar, q_bar, n, N=0, infinite=True):
        """
        confidence = percentage
        p_bar = sample proportion in favour
        q_bar = sample proportion not in favour
        n = sample size
        """
        return self.confidence_interval_by_std_error(self.standard_error_from_mean(confidence), p_bar, self.estimate_standard_error_of_proportion(p_bar, q_bar, n, N, infinite))
    
    
    def t_distribution(self, x_bar, sigma_hat, t):
        """
        x_bar = sample mean
        sigma_hat = sample standard deviation
        t = value
        
        Condition of usage:
            1. sample <= 30
            2. population standard deviation is unknown
            3. assume: population is nomal or approximately normal
        """
        return x_bar - (sigma_hat * t), x_bar + (sigma_hat * t)
    
    
    def degree_of_freedom(self, n):
        return n-1
    
    
    def t_value(self, file, n, confidence_interval):
        
        t_table = self.standard_normal_table(file, 'df')
        
        return t_table.loc[str(self.degree_of_freedom(n))][str(confidence_interval)]
    
    
    def t_value_confidence(self, n, confidence_interval):
        conf = str(confidence_interval)+"%"
        return self.t_value('t-table-confidence.csv', n, conf)
    
    
    def t_value_cumulative(self, n, confidence_interval):
        return self.t_value('t-table-cumulative.csv', n, confidence_interval)
    
    
    def t_value_one_tail(self, n, confidence_interval):
        return self.t_value('t-table-one-tail.csv', n, confidence_interval)
    
    
    def t_value_two_tail(self, n, confidence_interval):
        return self.t_value('t-table-two-tail.csv', n, confidence_interval)
    
    
    def confidence(self, n, t_val, file):
        t_table = self.standard_normal_table(file, 'df')
        
        t_val_series = round(t_table.loc[str(self.degree_of_freedom(n))], 3)
        t_index = t_val_series[t_val_series == t_val].index

        return t_index[0]
    
    
    def confidence_without_sample_size(self, t_val, file):
        t_table = round(self.standard_normal_table(file, 'df'), 3)
        
        t_table_na = t_table[t_table == t_val]
        t_tab = t_table_na.fillna(0.0)
        
        t_max = t_tab.max()
        t_index = t_max[t_max == t_val].index

        return int(t_tab.idxmax().max())+1, t_index[0]
    
        
    def confidence_lavel_from_t_value(self, n, t_val):
        file = 't-table-confidence.csv'
        return self.confidence(n, t_val, file)
    
    def confidence_lavel_from_t_value_without_sample_size(self, t_val):
        file = 't-table-confidence.csv'
        return self.confidence_without_sample_size(t_val, file)
    
    
    def one_tail_confidence_level_from_t_value(self, n, t_val):
        file = 't-table-one-tail.csv'
        return self.confidence(n, t_val, file)
    
    def one_tail_confidence_level_from_t_value_without_sample_size(self, t_val):
        file = 't-table-one-tail.csv'
        return self.confidence_without_sample_size(t_val, file)
    
    
    def two_tail_confidence_level_from_t_value(self, n, t_val):
        file = 't-table-two-tail.csv'
        return self.confidence(n, t_val, file)
    
    def two_tail_confidence_level_from_t_value_without_sample_size(self, t_val):
        file = 't-table-two-tail.csv'
        return self.confidence_without_sample_size(t_val, file)
    
    
    def cumulative_confidence_level_from_t_value(self, n, t_val):
        file = 't-table-cumulative.csv'
        return self.confidence(n, t_val, file)
    
    def cumulative_confidence_level_from_t_value_withou_sample_size(self, t_val):
        file = 't-table-cumulative.csv'
        return self.confidence_without_sample_size(t_val, file)
    
    
# ===========================================  
 # Determining the sample size of estimation
# ===========================================

    def sample_size_mean(self, t_val_mean, z_val, sigma):
        
        return ((z_val*sigma)/t_val_mean)**2
    
    
    def sample_size_proportion(self, t_val, z_val, p, q):
        
        return ((z_val * math.sqrt(p*q))/t_val)**2
    
    
# ===========================================  
 # Confidence level associated with interval
# ===========================================

    def confidence_level_from_z(self, z_val):
        
        return self.probability_of_normal_distribution(0, z_val) * 200

In [56]:
x = np.array([19, 25,29,13,26,28,30])*0.1

In [57]:
est = Estimation(x)

In [63]:
x_bar = est.point_estimate_mean()
s = est.point_estimate_deviation()

n = len(x)
N = 300

c = 98

x_bar, s, est.confidence_interval(c, x_bar, s, n)

(2.4285714285714284,
 0.6156684327197709,
 (1.8887055848179077, 2.968437272324949))

In [45]:
p = 40/120
q = 1-p

est.confidence_interval_of_proportion(c, p, q, n)

(0.5323431885476456, 0.8750642188597618)

In [64]:
p = np.array([40/120])
q = 1-p
#t_val = 0.05
c = 95
n = 120
N = 900

res = []
for i in range(len(p)):
    z_val = est.standard_error_from_mean(c)
    #n = est.sample_size_proportion(t_val, z_val, p[i], q[i])
    #res.append((z_val, n))
    ci = est.confidence_interval_of_proportion(c, p[i], q[i], n)
    res.append((z_val, ci))

res

[(1.95, (0.24941869416550594, 0.4172479725011607))]

In [55]:
t_val_mean = 4.6
c = 68.3
#z_val = est.standard_error_from_mean(c)
sigma = 1.4

#n = est.sample_size_mean(t_val_mean, z_val, sigma)
n = 200
#cl = est.confidence_interval(c, t_val_mean, sigma, n)
ste = est.standard_error(sigma, n)
#z_val, n
#ci = est.confidence_interval(c, t_val_mean, sigma, n)
it = est.interval_estimate(5.2, sigma, n)
ste, it

(0.09899494936611665,
 [(5.1010050506338835, 5.298994949366117),
  (5.002010101267767, 5.3979898987322334),
  (4.90301515190165, 5.49698484809835)])

In [31]:
est.confidence_level_from_z(2.4)

a5
z string: 2.4
first, last: 2.4 0


98.36

In [66]:
n = 9
x = 18.3
s = 3.6
c = 95

e = est.standard_error(s, n)

ci = est.confidence_interval(c, x, s, n)

e, ci

(1.2, (15.96, 20.64))

In [68]:
n = np.array([2,3 ,5,1])
#c = 95.5 * 0.01

sigma = 5/n
x_bar = np.array([25,15,38,20])

#ci = []
ie = []
for i in range(len(n)):
    ie.append((x_bar[i] - (1.96 * sigma[i]), x_bar[i] + (1.67 * sigma[i])))
    
ie

[(20.1, 29.175),
 (11.733333333333334, 17.78333333333333),
 (36.04, 39.67),
 (10.2, 28.35)]

In [135]:
est.point_estimate_mean()

16.933333333333334

In [136]:
est.point_estimate_variance()

125.92380952380952

In [137]:
est.point_estimate_deviation()

11.221577853573423

In [97]:
sigma = 41000
#muh = 30.3
mu = 250000
N = 12368
n = 750
#cd = np.array([95,99])
cd = 90

a = est.standard_error_finite(sigma,N, n)
b = est.confidence_interval(cd, mu, sigma, n, N, infinite=False)
c = est.confidence(cd)
"""
b = []
c = []
for i in cd:
    b.append(est.confidence_interval(i, mu, sigma, n, N, infinite=True))
    c.append(est.confidence(i))
"""
    
a,b,c

(1451.0645447337886, (247620.25414663658, 252379.74585336342), 1.64)

In [32]:
n = 800
N = 3000
p = 0.25
q = 1-p
cd = 90

a = est.estimate_standard_error_of_proportion(p, q, n)
b = est.confidence_interval_of_proportion(cd, p,q,n)
c = est.standard_error_from_mean(cd)
a,b,c

(0.015309310892394862, (0.22489273013647243, 0.27510726986352757), 1.64)

In [33]:
a = est.estimate_standard_error_of_proportion(p, q, n, N, infinite=False)
b = est.confidence_interval_of_proportion(cd, p, q, n,N, infinite=False)
c = est.standard_error_from_mean(cd)

a,b,c

(0.013112296166966972, (0.22849583428617418, 0.27150416571382585), 1.64)

In [22]:
n/N > 0.05

True

In [10]:
mu = 8.2
sigma = 2.1
N = 80
n = np.array([16,25,49])

res = []
for i in n:
    res.append(sd.standard_error_finite(sigma, N, i))
    
res

[0.4725369183889301, 0.350442757925283, 0.1879267295948145]

In [17]:
a1 = np.arange(0.00, 4.10, 0.01)
a2 = np.arange(0.00, 4.10, 0.01)
# a1<a2

In [8]:
file = 'cumulative_from_mean_0toZ.csv'
#file = 'complementary_cumulative.csv'
df_file = pd.read_csv(file)
df_file.index = df_file['z']
df_file.drop('z', axis=1, inplace=True)

In [9]:
df_file.head()

Unnamed: 0_level_0,0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09
z,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,0.0,0.00399,0.00798,0.01197,0.01595,0.01994,0.02392,0.0279,0.03188,0.03586
0.1,0.03983,0.0438,0.04776,0.05172,0.05567,0.05962,0.06356,0.06749,0.07142,0.07535
0.2,0.07926,0.08317,0.08706,0.09095,0.09483,0.09871,0.10257,0.10642,0.11026,0.11409
0.3,0.11791,0.12172,0.12552,0.1293,0.13307,0.13683,0.14058,0.14431,0.14803,0.15173
0.4,0.15542,0.1591,0.16276,0.1664,0.17003,0.17364,0.17724,0.18082,0.18439,0.18793


In [10]:
df1 = df_file[df_file < 0.90/2]

In [13]:
df1

Unnamed: 0_level_0,0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09
z,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,0.0,0.00399,0.00798,0.01197,0.01595,0.01994,0.02392,0.0279,0.03188,0.03586
0.1,0.03983,0.0438,0.04776,0.05172,0.05567,0.05962,0.06356,0.06749,0.07142,0.07535
0.2,0.07926,0.08317,0.08706,0.09095,0.09483,0.09871,0.10257,0.10642,0.11026,0.11409
0.3,0.11791,0.12172,0.12552,0.1293,0.13307,0.13683,0.14058,0.14431,0.14803,0.15173
0.4,0.15542,0.1591,0.16276,0.1664,0.17003,0.17364,0.17724,0.18082,0.18439,0.18793
0.5,0.19146,0.19497,0.19847,0.20194,0.2054,0.20884,0.21226,0.21566,0.21904,0.2224
0.6,0.22575,0.22907,0.23237,0.23565,0.23891,0.24215,0.24537,0.24857,0.25175,0.2549
0.7,0.25804,0.26115,0.26424,0.2673,0.27035,0.27337,0.27637,0.27935,0.2823,0.28524
0.8,0.28814,0.29103,0.29389,0.29673,0.29955,0.30234,0.30511,0.30785,0.31057,0.31327
0.9,0.31594,0.31859,0.32121,0.32381,0.32639,0.32894,0.33147,0.33398,0.33646,0.33891


In [35]:
def z_number(percentage):
    
    #file = 'complementary_cumulative.csv'
    file = 'cumulative_from_mean_0toZ.csv'
    df_file = pd.read_csv(file)
    df_file.index = df_file['z']
    df_file.drop('z', axis=1, inplace=True)
    
    dfr = df_file[df_file < percentage]
    df2 = dfr.idxmax(axis=1)
    n1 = df2.last_valid_index()
    n2 = df2.loc[n1]
    
    return round(n1 + float(n2), 2)

In [39]:
z_number(0.5)

4.06

In [14]:
df1.columns

Index(['0', '0.01', '0.02', '0.03', '0.04', '0.05', '0.06', '0.07', '0.08',
       '0.09'],
      dtype='object')

In [16]:
df1['0.04'].max()

0.4495

In [21]:
df2 = df1.idxmax(axis=1)

In [25]:
df2.last_valid_index()

1.6

In [27]:
df2.index

Float64Index([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2,
              1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5,
              2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8,
              3.9, 4.0],
             dtype='float64', name='z')

In [32]:
df2.loc[1.6]

'0.04'

In [53]:
def standard_deviation_from_mean(probability):
    n = len(str(probability))-2
    
    file = 'complementary_cumulative.csv'
    df_file = pd.read_csv(file)
    df_file.index = df_file['z']
    df_file.drop('z', axis=1, inplace=True)
    
    dfr = df_file.round(n)
    
    l1 = []
    sd0 = set([])
    for i in dfr.columns:
        sd = dfr[dfr[i]==probability].index.tolist()
        print(sd)
        if len(sd)>0:
            for j in sd:
                #print("value: ", j, float(i))
                t = j + float(i)
                if i not in l1:
                    l1.append(round(t, 2))
            
              
    return np.array(sorted(l1))

In [54]:
standard_deviation_from_mean(0.05)

array([1.6 , 1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69])

In [62]:
#dfr[dfr == 0.02]
sd.standard_deviation_from_mean(0.082, 'complementary_cumulative.csv')

array([1.39])

In [11]:
g = np.arange(0,20,3)
g

array([ 0,  3,  6,  9, 12, 15, 18])

In [9]:
g-g.sum()

array([-63, -60, -57, -54, -51, -48, -45])

In [13]:
((g-g.sum())**2).sum()

20664

In [19]:
f = open('t-table.txt')

In [20]:
x = []
for i in f:
    x.append(i.split())
    
x

[['cum.-prob',
  't.50',
  't.75',
  't.80',
  't.85',
  't.90',
  't.95',
  't.975',
  't.99',
  't.995',
  't.999',
  't.9995'],
 ['one-tail',
  '0.50',
  '0.25',
  '0.20',
  '0.15',
  '0.10',
  '0.05',
  '0.025',
  '0.01',
  '0.005',
  '0.001',
  '0.0005'],
 ['two-tails',
  '1.00',
  '0.50',
  '0.40',
  '0.30',
  '0.20',
  '0.10',
  '0.05',
  '0.02',
  '0.01',
  '0.002',
  '0.001'],
 ['df'],
 ['1',
  '0.000',
  '1.000',
  '1.376',
  '1.963',
  '3.078',
  '6.314',
  '12.71',
  '31.82',
  '63.66',
  '318.31',
  '636.62'],
 ['2',
  '0.000',
  '0.816',
  '1.061',
  '1.386',
  '1.886',
  '2.920',
  '4.303',
  '6.965',
  '9.925',
  '22.327',
  '31.599'],
 ['3',
  '0.000',
  '0.765',
  '0.978',
  '1.250',
  '1.638',
  '2.353',
  '3.182',
  '4.541',
  '5.841',
  '10.215',
  '12.924'],
 ['4',
  '0.000',
  '0.741',
  '0.941',
  '1.190',
  '1.533',
  '2.132',
  '2.776',
  '3.747',
  '4.604',
  '7.173',
  '8.610'],
 ['5',
  '0.000',
  '0.727',
  '0.920',
  '1.156',
  '1.476',
  '2.015',
  '2.57

In [24]:
x_cum_col  = x[0] 
x_one_tail = x[1]
x_two_tail = x[2]

In [42]:
x_index = []
for i in x[:-1]:
    try:
        ind = i[0]
        x_index.append(ind)
    except:
        x_index.append("error:1a")
        print("get arror")
    
x_index

['cum.-prob',
 'one-tail',
 'two-tails',
 'df',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '40',
 '60',
 '80',
 '100',
 '1000',
 'z',
 'Confidence-Level']

In [52]:
x_val = []
for j in x[4:]:
    x_val.append(j[1:])
    
x_val

[['0.000',
  '1.000',
  '1.376',
  '1.963',
  '3.078',
  '6.314',
  '12.71',
  '31.82',
  '63.66',
  '318.31',
  '636.62'],
 ['0.000',
  '0.816',
  '1.061',
  '1.386',
  '1.886',
  '2.920',
  '4.303',
  '6.965',
  '9.925',
  '22.327',
  '31.599'],
 ['0.000',
  '0.765',
  '0.978',
  '1.250',
  '1.638',
  '2.353',
  '3.182',
  '4.541',
  '5.841',
  '10.215',
  '12.924'],
 ['0.000',
  '0.741',
  '0.941',
  '1.190',
  '1.533',
  '2.132',
  '2.776',
  '3.747',
  '4.604',
  '7.173',
  '8.610'],
 ['0.000',
  '0.727',
  '0.920',
  '1.156',
  '1.476',
  '2.015',
  '2.571',
  '3.365',
  '4.032',
  '5.893',
  '6.869'],
 ['0.000',
  '0.718',
  '0.906',
  '1.134',
  '1.440',
  '1.943',
  '2.447',
  '3.143',
  '3.707',
  '5.208',
  '5.959'],
 ['0.000',
  '0.711',
  '0.896',
  '1.119',
  '1.415',
  '1.895',
  '2.365',
  '2.998',
  '3.499',
  '4.785',
  '5.408'],
 ['0.000',
  '0.706',
  '0.889',
  '1.108',
  '1.397',
  '1.860',
  '2.306',
  '2.896',
  '3.355',
  '4.501',
  '5.041'],
 ['0.000',
  '0.70

In [56]:
x_index = x_index[3:]

In [64]:
"""
x_cum_col  = x[0] 
x_one_tail = x[1]
x_two_tail = x[2]
"""
t_table_cum = pd.DataFrame(data=x_val[:-1], index=x_index[1:], columns=x_cum_col[1:])
t_table_one_tail = pd.DataFrame(data=x_val[:-1], index=x_index[1:], columns=x_one_tail[1:])
t_table_two_tail = pd.DataFrame(data=x_val[:-1], index=x_index[1:], columns=x_two_tail[1:])

In [67]:
t_table_cum.to_csv('t-table-cumulative.csv') #index=False)
t_table_one_tail.to_csv('t-table-one-tail.csv')
t_table_two_tail.to_csv('t-table-two-tail.csv')

In [74]:
t_table_conf = pd.DataFrame(data=x_val[:-2], index=x_index[1:-1], columns=x_val[-2])

In [75]:
t_table_conf.to_csv('t-table-confidence.csv')

In [71]:
x_val[-2]

['0%',
 '50%',
 '60%',
 '70%',
 '80%',
 '90%',
 '95%',
 '98%',
 '99%',
 '99.8%',
 '99.9%']

In [11]:
dft1 = pd.read_csv('t-table-confidence.csv')

In [15]:
dft1.head()

Unnamed: 0_level_0,0%,50%,60%,70%,80%,90%,95%,98%,99%,99.80%,99.90%
df,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,1.0,1.376,1.963,3.078,6.314,12.71,31.82,63.66,318.31,636.62
2,0,0.816,1.061,1.386,1.886,2.92,4.303,6.965,9.925,22.327,31.599
3,0,0.765,0.978,1.25,1.638,2.353,3.182,4.541,5.841,10.215,12.924
4,0,0.741,0.941,1.19,1.533,2.132,2.776,3.747,4.604,7.173,8.61
5,0,0.727,0.92,1.156,1.476,2.015,2.571,3.365,4.032,5.893,6.869


In [13]:
dft1.index = dft1['df']

In [14]:
dft1.drop('df', axis=1, inplace=True)

In [16]:
dft2 = dft1.loc['4']

In [20]:
dft3 = round(dft2, 3)

In [25]:
dft4 = dft3[dft3 == 2.132]. index

In [27]:
dft4[0]

'90%'

In [73]:
def t_val1(cl, n):
    file = 't-table-confidence.csv'
    dft1 = pd.read_csv(file)
    dft1.index = dft1['df']
    dft1.drop('df', axis=1, inplace=True)
    
    dft2 = dft1.loc[str(n-1)]
    dft3 = round(dft2, 3)
    dft4 = dft3[dft3 == cl]. index
    
    return dft4[0]


def t_val2(cl):
    file = 't-table-confidence.csv'
    dft1 = pd.read_csv(file)
    dft1.index = dft1['df']
    dft1.drop('df', axis=1, inplace=True)
    
    dft5 = round(dft1, 3)
    dft6 = dft5[dft5 == cl]
    dft7 = dft6.fillna(0.0)
    dft8 = dft7.max()
    dft9 = dft8[dft8 == cl].index
    
    return int(dft7.idxmax().max())+1, dft9[0]

In [74]:
t_val1(2.132, 5)

'90%'

In [76]:
t_val2(2.056)

(27, '95%')

In [68]:
dft5 = round(dft1, 3)
dft5

Unnamed: 0_level_0,0%,50%,60%,70%,80%,90%,95%,98%,99%,99.80%,99.90%
df,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1.0,0,1.0,1.376,1.963,3.078,6.314,12.71,31.82,63.66,318.31,636.62
2.0,0,0.816,1.061,1.386,1.886,2.92,4.303,6.965,9.925,22.327,31.599
3.0,0,0.765,0.978,1.25,1.638,2.353,3.182,4.541,5.841,10.215,12.924
4.0,0,0.741,0.941,1.19,1.533,2.132,2.776,3.747,4.604,7.173,8.61
5.0,0,0.727,0.92,1.156,1.476,2.015,2.571,3.365,4.032,5.893,6.869
6.0,0,0.718,0.906,1.134,1.44,1.943,2.447,3.143,3.707,5.208,5.959
7.0,0,0.711,0.896,1.119,1.415,1.895,2.365,2.998,3.499,4.785,5.408
8.0,0,0.706,0.889,1.108,1.397,1.86,2.306,2.896,3.355,4.501,5.041
9.0,0,0.703,0.883,1.1,1.383,1.833,2.262,2.821,3.25,4.297,4.781
10.0,0,0.7,0.879,1.093,1.372,1.812,2.228,2.764,3.169,4.144,4.587


In [34]:
dft6 = dft5[dft5 == 2.132]

In [37]:
dft7 = dft6.fillna(0.0)

In [41]:
dft8 = dft7.max()

In [44]:
dft8[dft8 == 2.132].index

Index(['90%'], dtype='object')

In [56]:
dft10 = dft7.idxmax()

In [58]:
dft10.max()

'4'

In [60]:
dft10[dft10==dft10.max()].index

Index(['90%'], dtype='object')

In [32]:
dft2 = dft1[dft1 == 1.156]

In [37]:
dl2 = dft1.loc['5'].to_list()

In [38]:
dl2.index(1.156)

3

In [41]:
dft1.columns

Index(['0%', '50%', '60%', '70%', '80%', '90%', '95%', '98%', '99%', '99.80%',
       '99.90%'],
      dtype='object')

In [10]:
bq = np.array([1,2,3,4,5])

In [11]:
1-bq

array([ 0, -1, -2, -3, -4])