In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import math
import statistics
import numpy as np
import scipy.stats
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
data = [25, 30, 22, 27, 24, 28, 31, 29, 26, 23]
np_data = np.array(data)
nan_data = [25, 30, 22, 27, 24, 28, 31, 29, 26, math.nan,23]
pd_nan_data = pd.Series(nan_data)


## Variance:
### According to Bessel's correction the sample variance
### $ \sigma^2 = \frac{1}{n-1}\sum_{i=1}^{n}(x_i - mean(x))^2$
### where x_i is the ith element of the dataset x with n elements
### (for calculating the population variance replace n-1 with n)

In [5]:
mean1 = sum(data) / len(data) 
variance = sum((item-mean1)**2 for item in data) / (len(data) - 1 )


#the variance function of the statistics module allows you to provide your own mean to the formula

variance, statistics.variance(data, mean1), statistics.pvariance(data) 

(9.166666666666666, 9.166666666666666, 8.25)

In [6]:
#in numpy ddof (delta degrees of freedom) will replace n with n-1 in the formula

np.var(data, ddof=1), np_data.var(ddof=1), np.var(data)

(9.166666666666666, 9.166666666666666, 8.25)

In [7]:
#dealing with nan values in your data

statistics.variance(nan_data), np.var(nan_data, ddof=1), np.nanvar(nan_data, ddof=1)

(nan, nan, 9.166666666666666)

In [8]:
#pd.Series.var will have ddof=1 by default and skips nan values

pd_nan_data.var(ddof=0),  pd_nan_data.var(skipna=False)

(8.25, nan)

-----------

In [9]:
#The sample standart Deviation s is the positive square root of the sample variance s²

standard_deviation = variance ** 0.5

standard_deviation, statistics.stdev(data)

(3.0276503540974917, 3.0276503540974917)

In [10]:
np.std(data, ddof=1), np_data.std(ddof=1), np.nanstd(data, ddof=1)

(3.0276503540974917, 3.0276503540974917, 3.0276503540974917)

In [11]:
#pd.Series same behaviour as before

pd_nan_data.std(),  pd_nan_data.std(skipna=False)

(3.0276503540974917, nan)

In [12]:
#for the population standart deviation

statistics.pstdev(data), np.std(data), pd_nan_data.std() 

(2.8722813232690143, 2.8722813232690143, 3.0276503540974917)

-------