### Variance Covariance Matrix

In [38]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing

In [39]:
X =  fetch_california_housing().data
Y = fetch_california_housing().target
X = pd.DataFrame(data = X, columns=fetch_california_housing().feature_names)
X['Price'] = list(Y)
X1 = X.values

In [40]:
def calc_cov(X):
    m,n = X.shape
    cov = np.zeros((n,n))
    for i in range(n):
        #Mean of column i
        mean_i = np.sum(X[:,i])/m

        for j in range(n):
            #Mean of column j
            mean_j = np.sum(X[:,j])/m

            #Covariance of i,j
            cov[i,j] = np.sum((X[:,i]-mean_i)*(X[:,j]-mean_j))/(m-1)
    return cov

In [41]:
cov = calc_cov(X1)

In [43]:
df = pd.DataFrame(cov, columns = ['A','B','C','D','E','F','G','H','I'])
print(df)

           A            B           C          D             E           F  \
0   3.609323    -2.846140    1.536568  -0.055858  1.040098e+01    0.370289   
1  -2.846140   158.396260   -4.772882  -0.463718 -4.222271e+03    1.724298   
2   1.536568    -4.772882    6.121533   0.993868 -2.023337e+02   -0.124689   
3  -0.055858    -0.463718    0.993868   0.224592 -3.552723e+01   -0.030424   
4  10.400979 -4222.270582 -202.333712 -35.527225  1.282470e+06  821.712002   
5   0.370289     1.724298   -0.124689  -0.030424  8.217120e+02  107.870026   
6  -0.323860     0.300346    0.562235   0.070575 -2.631378e+02    0.052492   
7  -0.057765    -2.728244   -0.136518   0.012670  2.263778e+02    0.051519   
8   1.508475     1.533988    0.433826  -0.025539 -3.221249e+01   -0.284494   

            G           H          I  
0   -0.323860   -0.057765   1.508475  
1    0.300346   -2.728244   1.533988  
2    0.562235   -0.136518   0.433826  
3    0.070575    0.012670  -0.025539  
4 -263.137814  226.37783

In [44]:
#Verification with numpy covariance matrix
np_covar = np.cov(X1, rowvar=False)
print("Difference between numpy covariance matrix and custom covariance matrix: {:.15f}".format(np.sum(np.abs(cov-np_covar))))

Difference between numpy covariance matrix and custom covariance matrix: 0.000000000699255


In [45]:
#Verification with pandas covariance matrix
pd_covar = X.cov().values
print("Difference between numpy covariance matrix and custom covariance matrix: {:.15f}".format(np.sum(np.abs(cov-pd_covar))))

Difference between numpy covariance matrix and custom covariance matrix: 0.000000000699255


#### The result of custom covariance matrix conform with the numpy and pandas covariance matrix.