# Random sampling and pseudo random numbers
See https://docs.python.org/dev/library/random.html#random.random

In [1]:
import random

#random.seed(123)              # initialize the random number generator
random.sample(range(10), k=6)  # sampling (without replacement) k length list of unique elements chosen from the population sequence or set
random.sample({'a','b','c','d','e'}, k=1)

# make random choices with replacement
seq = [i+1 for i in range(20)]
print(seq)
print(random.choices(seq, k=3))

letters = ["A", "B", "C", "D", "E", "F", "G"]
print(random.choices(letters, k=3))

# Pseudo random numbers
#random.gauss(mu=0,sigma=1)
# sample of random Gaussian numbers
for i in range(10):
    value = random.gauss(mu=0, sigma=1)
    print(value)

#random.uniform(a=0, b=3)
# sample of random uniform numbers between a <= x < b
for i in range(10):
    value = random.uniform(a=0, b=3)
    print(value)


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
[9, 5, 18]
['C', 'E', 'C']
0.5861338785846442
-0.9677060884435433
-2.1654994516112116
1.6013078329054355
-2.1312052668266657
-0.46923216096207176
0.7002121692978481
-0.04894610671913604
-1.598107032022175
-0.5264576191995657
1.7960397428920907
2.5560296834078353
1.191752788809563
1.004760720341837
2.4310064432525857
2.3665468426279244
2.166459753531627
2.988510185071689
0.32255525697907483
1.1644595153971826


# Statistical functions
Complete list at https://docs.python.org/3.7/library/statistics.html
    
Let $X_1,\ldots,X_n$ be an independent and identically distributed (i.i.d.) random sample from distribution function $F(x)=\mathbb{P}(X\leq x)$.
    
    
* Sample mean

$$\bar{X} = \frac{1}{n}\sum_{i=1}^{n}X_i$$

* Sample median

$$\mathrm{Med}(X_1,\ldots,X_n) =
\begin{cases}
\frac{1}{2}(X_{n/2:n}+X_{n/2+1:n}),&n\ \mathrm{even}\\
X_{(n+1)/2:n},&n\ \mathrm{odd}
\end{cases}
$$

* Sample variance

$$\widehat{\mathrm{Var}(X)}= S^2 = \frac{1}{n-1}\sum_{i=1}^{n}\left(X_i - \bar{X}\right)^2$$

* Sample standar deviation

$$\sqrt{\widehat{\mathrm{Var}(X)}}=S=\sqrt{S^2}$$

* Minimum

$$\min \left\lbrace X_1,\ldots,X_n\right\rbrace$$

* Maximum

$$\max \left\lbrace X_1,\ldots,X_n\right\rbrace$$

* Quantile

$$q(\alpha)=F^{-1}(\alpha)=\inf \left\{x\in\mathbb{R}\colon F(x)\geq \alpha \right\},\quad 0\leq\alpha\leq1.$$

* Sample covariance

$$
\widehat{\mathrm{Cov}(X,Y)}=\frac{1}{n-1}\sum_{i=1}^{n}\left(X_i-\bar{X}\right)\left(Y_i-\bar{Y}\right)
$$

* Sample correlation

$$\widehat{\mathrm{Corr}(X,Y)}=\frac{\widehat{\mathrm{Cov}(X,Y)}}{\sqrt{\widehat{\mathrm{Var}(X)}}\sqrt{\widehat{\mathrm{Var}(Y)}}}=\frac{\sum_{i=i}^{n}\left(X_i-\bar{X}\right)\left(Y_i-\bar{Y}\right)}{\sqrt{\sum_{i=n}^{n}\left(X_{i}-\bar{X}\right)^{2}}\sqrt{\sum_{i=n}^{n}\left(Y_{i}-\bar{Y}\right)^{2}}}$$



In [2]:
import pandas as pd
import numpy as np

d = pd.Series([random.gauss(mu=0, sigma=1) for i in range(1000)])

d = np.random.randn(1000)

#from matplotlib import pyplot
#pyplot.hist(d)
#pyplot.show()

d.mean()
#d.median()
#d.var()
#d.std()
#d.quantile([0, 0.25, 0.5, 0.75, 1]) # Quartiles
#d.describe()

0.006469584102866545

In [3]:
import statistics
statistics.mean(d)       # aritmetic mean
#statistics.median(d)    # median
##statistics.mode(d)     # mode
#statistics.variance(d)  # variance
#statistics.stdev(d)     # standar deviation
#min(d)                  # minimum
#max(d)                  # maximum

0.006469584102866547

In [4]:
# Also (see https://docs.scipy.org/doc/numpy/reference/routines.statistics.html)
import numpy as np
np.mean(d)
#np.median(d)
#np.var(d)
#np.std(d)
#np.amin(d)
#np.amax(d)
#np.quantile(d, [0, 0.25, 0.5, 0.75, 1])
#np.cov(d,d)      # covariance matrix
#np.corrcoef(d,d) # correlation matrix

0.006469584102866545