In [8]:
import numpy as np

In [1]:
import pandas as pd
df = pd.read_csv(
    filepath_or_buffer='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
    header=None,
    sep=',')
df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
df.dropna(how="all", inplace=True) # drops the empty line at file-end
df.tail()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [3]:
X = df.iloc[:,0:4].values
y = df.iloc[:,4].values

```
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing
```

In [4]:
X.shape

(150, 4)

In [5]:
y.shape

(150,)

In [9]:
cov_mat = np.cov(X.T)
cov_mat

array([[ 0.68569351, -0.03926846,  1.27368233,  0.5169038 ],
       [-0.03926846,  0.18800403, -0.32171275, -0.11798121],
       [ 1.27368233, -0.32171275,  3.11317942,  1.29638747],
       [ 0.5169038 , -0.11798121,  1.29638747,  0.58241432]])

In [10]:
np.corrcoef(X.T)

array([[ 1.        , -0.10936925,  0.87175416,  0.81795363],
       [-0.10936925,  1.        , -0.4205161 , -0.35654409],
       [ 0.87175416, -0.4205161 ,  1.        ,  0.9627571 ],
       [ 0.81795363, -0.35654409,  0.9627571 ,  1.        ]])

#### How to convert covariance matrix to correlation matrix?

In [11]:
variance = np.diag(cov_mat)
stddev = np.sqrt(variance.real)

In [12]:
stddev

array([0.82806613, 0.43359431, 1.76442042, 0.76316074])

In [13]:
print(stddev[:, None].shape)
stddev[:, None]

(4, 1)


array([[0.82806613],
       [0.43359431],
       [1.76442042],
       [0.76316074]])

In [15]:
cov_mat

array([[ 0.68569351, -0.03926846,  1.27368233,  0.5169038 ],
       [-0.03926846,  0.18800403, -0.32171275, -0.11798121],
       [ 1.27368233, -0.32171275,  3.11317942,  1.29638747],
       [ 0.5169038 , -0.11798121,  1.29638747,  0.58241432]])

In [14]:
matrix1=cov_mat/stddev[:, None]
matrix1 # this division takes each column from the matrix, divides by this

array([[ 0.82806613, -0.04742188,  1.53814084,  0.6242301 ],
       [-0.09056497,  0.43359431, -0.74196719, -0.27210045],
       [ 0.72187009, -0.18233339,  1.76442042,  0.73473842],
       [ 0.6773197 , -0.15459549,  1.69870828,  0.76316074]])

<b> diagonal elements are variance. square of SD. <br>
see the diagonal elements are divided by stdandrd deviation. <br>
now become equal to standard deviation.
</b>

In [27]:
cov_mat[...,3].reshape(-1,1)/stddev[:, None]

array([[ 0.6242301 ],
       [-0.27210045],
       [ 0.73473842],
       [ 0.76316074]])

In [29]:
matrix1

array([[ 0.82806613, -0.04742188,  1.53814084,  0.6242301 ],
       [-0.09056497,  0.43359431, -0.74196719, -0.27210045],
       [ 0.72187009, -0.18233339,  1.76442042,  0.73473842],
       [ 0.6773197 , -0.15459549,  1.69870828,  0.76316074]])

In [30]:
stddev[None, :]

array([[0.82806613, 0.43359431, 1.76442042, 0.76316074]])

In [31]:
np.corrcoef(X.T)

array([[ 1.        , -0.10936925,  0.87175416,  0.81795363],
       [-0.10936925,  1.        , -0.4205161 , -0.35654409],
       [ 0.87175416, -0.4205161 ,  1.        ,  0.9627571 ],
       [ 0.81795363, -0.35654409,  0.9627571 ,  1.        ]])

In [36]:
matrix1[3,...]/stddev[None, :]

array([[ 0.81795363, -0.35654409,  0.9627571 ,  1.        ]])

In [47]:
deno = np.dot(stddev[None, :].T, stddev[None, :])
deno

array([[0.68569351, 0.35904476, 1.46105679, 0.63194756],
       [0.35904476, 0.18800403, 0.76504266, 0.33090216],
       [1.46105679, 0.76504266, 3.11317942, 1.3465364 ],
       [0.63194756, 0.33090216, 1.3465364 , 0.58241432]])

In [48]:
np.cov(X.T)

array([[ 0.68569351, -0.03926846,  1.27368233,  0.5169038 ],
       [-0.03926846,  0.18800403, -0.32171275, -0.11798121],
       [ 1.27368233, -0.32171275,  3.11317942,  1.29638747],
       [ 0.5169038 , -0.11798121,  1.29638747,  0.58241432]])

In [49]:
np.cov(X.T)/deno

array([[ 1.        , -0.10936925,  0.87175416,  0.81795363],
       [-0.10936925,  1.        , -0.4205161 , -0.35654409],
       [ 0.87175416, -0.4205161 ,  1.        ,  0.9627571 ],
       [ 0.81795363, -0.35654409,  0.9627571 ,  1.        ]])