In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
import math
import scipy as stats
import scipy.stats

In [2]:
from stat1 import *

In [3]:
%matplotlib inline

In [7]:
class Sampling_Distibution(Probability_Distribution):
    """
    Stratified Sampling: use when each group has small variation
        within itself but there is a wide variation between the 
        groups.
        
    Cluster Sampling: use when there is considerable variation
        within each group but the groups are essentially similar
        to each other.
    """
    
    def __init__(self):
        pass
    
    
    def standard_error(self, sigma, n):
        """
        To  get sampling distribution standard deviation
        for infinite population or from finite sample with
        replacements
        
        sigma: population standard deviation
        n: sample size
        """
        return sigma/(math.sqrt(n))
    
    
    def probability(self, mu, sigma, n, X1=-math.inf, X2=math.inf, infinite=True):
        """
        mu = population mean
        sigma = population standard deviation
        infinte = bool, if true then find probaility for infinite 
        population, otherwise for finite population
        """
        if infinite is True:
            std_error = self.standard_error(sigma, n)
        else:
            std_error = self.standard_error_finite(sigma, N, n)
        
        return self.normal_distribution(mu, std_error, X1, X2)
    
    
    """
    CENTRAL LIMIT THEOREM:
    
    1. The mean of the sampling distribution of the mean
        will be equal to the population mean.
    2. As the sample size increases, the sampling distribution
        of the mean will approach normality.
        
    Significance:
        it permits use sample statics to make inferences about
        population parameterswithout knowing anything about
        the shape of the frequency distribution of that population
        other than what we can get from the sample.
    """
        
    def finite_population_multiplier(self, N, n):
        """
        N: Size of the population
        n: Size of sample
        
        Note: When sampling fraction(n/N) is less than 0.05, the
            finite population multiplier need not be used.
        """
        return math.sqrt((N-n)/(N-1))
    
    
    def standard_error_finite(self, sigma, N, n):
        """
        To  get sampling distribution standard deviation
        for finite sample without replacements
        
        sigma: population standard deviation
        N: Size of the population
        n: Size of sample
        """
        return self.standard_error(sigma, n) * self.finite_population_multiplier(N, n)

In [8]:
sd = Sampling_Distibution()

In [13]:
mu = 105
sigma = 17
N = 125
n = 64

print(sd.standard_error_finite(sigma, N, n))
print("-------------------------------")
print(sd.probability(mu, sigma, n, 107.5, 109, infinite=False))

1.4904348926923439
-------------------------------
1.68 2.68
a6
z string: 2.68
first, last: 2.6 8
z string: 1.68
first, last: 1.6 8
probabilities 1 & 2 0.9963200000000001 0.95352
0.04280000000000006


In [14]:
n = np.array([35])
mu = 64
sigma = math.sqrt(17.6)
x1 = np.array([72,64,64,74,100])
x2 = np.array([math.inf, 72,64,math.inf,100])

prob = []
for i in range(len(n)):
    pr = []
    for j in range(len(x1)):
        try:
            pr.append(sd.probability(mu,sigma,n[i],x1[j],x2[j]))
        except:
            pr.append("Can't be determined")
        print("------------------------------")
    prob.append(pr)
prob

11.28 inf
a3
z string: 11.28
first, last: 11.2 8
------------------------------
0.0 11.28
a5
z string: 11.28
first, last: 11.2 8
------------------------------
0.0 0.0
------------------------------
14.1 inf
a3
z string: 14.1
first, last: 14.0 1
------------------------------
50.77 50.77
a6
z string: 50.77
first, last: 50.7 7
z string: 50.77
first, last: 50.7 7
probabilities 1 & 2 0.0 0.0
------------------------------


[[0.0, 0.0, None, 0.0, 0.0]]

In [14]:
sd.probability(150, 16,16, -math.inf, 160)

-inf 2.5
a1
z string: 2.5
first, last: 2.5 0


0.99379

In [25]:
sd.standard_error(12,60)

1.5491933384829668

In [29]:
scipy.stats.norm()

4.1952353926806065