In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as scs
%matplotlib inline

https://docs.scipy.org/doc/scipy/reference/tutorial/

# NumPy Polynomials

In [4]:
# Create polynomial
p = np.poly1d([3,4,5])
print p

   2
3 x + 4 x + 5


In [7]:
# Polynomial operations
print p*p
print 2*p

   4      3      2
9 x + 24 x + 46 x + 40 x + 25
   2
6 x + 8 x + 10


### Integral

In [None]:
# infinite integral, choose constant C = k, by order m)
print p.integ(m=1, k=6)

### Derivative

In [17]:
print p.deriv()

 
6 x + 4


### Vectorized Functions

In [18]:
def addsubtract(a,b):
    if a > b:
        return a - b
    else:
        return a + b

In [20]:
vec_addsubtract = np.vectorize(addsubtract)
vec_addsubtract([0,3,6,9],[1,3,5,7])

array([1, 6, 1, 2])

Note that vectorized functions use pair-wise operations

# Normal Distributions

### Random Variables

In [27]:
norm_dist = scs.norm
print 'lower-bound: %s, upper-bound: %s' % (norm_dist.a, norm_dist.b)

lower-bound: -inf, upper-bound: inf


### Common Methods

In [33]:
norm_dist = scs.norm

- rvs: Random Variates
- pdf: Probability Density Function
- cdf: Cumulative Distribution Function
- sf: Survival Function (1-cdf) (?)
- ppf: Percent Point (inverse cdf)
- isf: Inverse Survival Function ()
- stats: retun mean, variance, skew, kurtosis
- moment: non-central moments of the distribution

In [35]:
# CDF value at 0: half the bell curve; at infinity: full bell curve
print norm_dist.cdf(0)
print norm_dist.cdf(99999999)

0.5
1.0


In [38]:
# To computer cdf at several points, we ca pass a list of a np array
print norm_dist.cdf(np.array([-1, 0, 1]))

[ 0.15865525  0.5         0.84134475]


Note that the basic methods such as pdf, cdf, etc. are vectorized functions

In [40]:
# mean, standard deviation, variance
print norm_dist.mean(), norm_dist.std(), norm_dist.var()

 0.0 1.0 1.0


In [42]:
# View the moments (mean and variance in this case) of the distribution
print norm_dist.stats(moments = 'mv')

(array(0.0), array(1.0))


In [44]:
# Find the median
print norm_dist.median()

# Or find median using ppf
print norm_dist.ppf(0.5)

0.0
0.0


In [71]:
# ppf is used to find z-score
print norm_dist.ppf(0.025), norm_dist.ppf(0.975)

-1.95996398454 1.95996398454


In [52]:
# Generate random variables
norm_dist.rvs(size=3)

array([ 2.19264645, -0.98050994,  0.99610267])

In [59]:
# To generate reproducible rv, use seed
norm_dist.rvs(size=5, random_state=1234)

array([ 0.47143516, -1.19097569,  1.43270697, -0.3126519 , -0.72058873])

### Shifting and Scaling

In [64]:
# To generate normal distribution with mean=3, std=2
new_norm_dist = scs.norm(loc=3, scale=2)
print new_norm_dist.stats(moments='mv')

(array(3.0), array(4.0))


In [75]:
# Trying out the similar methods
print new_norm_dist.mean(), new_norm_dist.var(), new_norm_dist.median()

print new_norm_dist.ppf(0.025) # this doesn't scale z-score, instead use:
print (new_norm_dist.ppf(0.025)-new_norm_dist.mean())/new_norm_dist.std()



3.0 4.0 3.0
-0.91992796908
-1.95996398454


### Fitting Distributions