In [2]:
# required imports and initial settings
from scipy.optimize import curve_fit, minimize
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
plt.style.use('ggplot')
%matplotlib inline

 list of 5 distributions to analyze: 'normal', 'poisson', 'binomial', 'chi', 'triangular'

In [28]:
# set a global population mean & sd that will be the same across all distributions and samples
pop_mean = 500
pop_sd = 100

# dictionary to hold the various distributions and samples
# each dictionary value will be a list that contains 100 dictionaries
# ...each with a sample size, mean, and standard deviation
distro = {'normal':[], 'poisson':[], 'binomial':[], 'chi':[], 'triangular':[]}

# create an array of 100 different sample sizes between 10 and 2000 to use for ecah distribution
sample_sizes = np.linspace(10, 2000, 100).astype(int)

because each distribution has unique paramters, we need special syntax for each of the five distributions

In [29]:
# NORMAL
for n in sample_sizes:
    # loc = center  ~ mean
    # scale = standard deviation
    sample = np.random.normal(loc=pop_mean, scale=pop_sd, size=n)
    sample_dict = {'size':n, 'mean':sample.mean(), 'sd':sample.std()}
    distro['normal'].append(sample_dict)

In [31]:
# POISSON
for n in sample_sizes:
    # lam = lamda ~ expectation for interval ~ mean
    sample = np.random.poisson(lam=pop_mean, size=n)
    sample_dict = {'size':n, 'mean':sample.mean(), 'sd':sample.std()}
    distro['poisson'].append(sample_dict)

In [37]:
# BINOMIAL
for n in sample_sizes:
    # n = number of events
    # p = probability of success --> @ 0.5 success, we expect 1000 events to have 500 successes (mean)
    sample = np.random.binomial(n=pop_mean*2, p=0.5, size=n)
    sample_dict = {'size':n, 'mean':sample.mean(), 'sd':sample.std()}
    distro['binomial'].append(sample_dict)

In [38]:
# CHI SQUARED
for n in sample_sizes:
    # df = degrees of freedom ~ mean for this test
    sample = np.random.chisquare(df=pop_mean, size=n)
    sample_dict = {'size':n, 'mean':sample.mean(), 'sd':sample.std()}
    distro['chi'].append(sample_dict)

In [41]:
# TRIANGULAR
for n in sample_sizes:
    # left = left edge of triangle
    # mode = peak of distribution ~ mean
    # right = right edge of triangle
    sample = np.random.triangular(left=0, mode=pop_mean, right = 1000, size=n)
    sample_dict = {'size':n, 'mean':sample.mean(), 'sd':sample.std()}
    distro['triangular'].append(sample_dict)

In [27]:
n=50
sample = np.random.normal(loc=pop_mean, scale=pop_sd, size=n)
print sample
print sample.mean()
print sample.std()

[ 663.93610487  597.55731231  481.82945437  552.67251787  659.56254013
  496.15370488  408.18488312  396.10477024  446.76834485  421.95615359
  430.38669299  411.92137695  517.22143659  413.09752151  579.30534241
  402.68681993  417.82770947  363.04301201  479.99389801  596.74574547
  441.4869502   637.66315554  427.93430077  513.99717112  495.13090676
  646.13205879  549.88456422  475.55817316  589.71383662  446.53853601
  563.41949649  640.15108522  550.44184385  323.73995702  470.89565618
  462.38422783  569.86240113  404.2026564   461.32589171  521.95635899
  614.03123255  625.84299735  547.01192726  531.18335128  579.25942574
  516.37965549  451.87021654  578.95925783  438.9100122   726.38169847]
510.784086885
89.3980285637


In [40]:
distro['chi']
#s = np.random.binomial(10, 0.5, 1000)
#print s

[{'mean': 488.86296125344387, 'sd': 25.608384714176179, 'size': 10},
 {'mean': 499.54839960021786, 'sd': 29.933933964573598, 'size': 30},
 {'mean': 498.68165329736524, 'sd': 29.721718897503351, 'size': 50},
 {'mean': 493.83349152721559, 'sd': 31.281288165415717, 'size': 70},
 {'mean': 498.32163363880085, 'sd': 32.433635406819491, 'size': 90},
 {'mean': 502.5017547582159, 'sd': 31.204368489968225, 'size': 110},
 {'mean': 495.41345708361041, 'sd': 31.508032915838051, 'size': 130},
 {'mean': 499.32302806752887, 'sd': 32.599624668185356, 'size': 150},
 {'mean': 501.35955956681556, 'sd': 30.165976413095265, 'size': 170},
 {'mean': 498.82930642684897, 'sd': 31.99857333333102, 'size': 190},
 {'mean': 503.64662325024727, 'sd': 32.044154055379472, 'size': 211},
 {'mean': 498.06301615818035, 'sd': 30.46420554403019, 'size': 231},
 {'mean': 503.10880162581242, 'sd': 31.267464221543399, 'size': 251},
 {'mean': 499.17789515682216, 'sd': 29.972456576419987, 'size': 271},
 {'mean': 498.21751172742501

In [17]:
dict = {'a':[1,2], 'b':[3,4]}

In [13]:
type(dict['a'])

list

In [15]:
dict['a'] = dict['a'].append(3)

In [18]:
dict['a'].append(3)

In [19]:
dict

{'a': [1, 2, 3], 'b': [3, 4]}