Covariance (Cov) formula:

$$
\text{Cov}(X, Y) = \frac{\sum_{i=1}^{N} (X_i - \bar{X})(Y_i - \bar{Y})}{N - 1}
$$

Where:
- \( X \) and \( Y \) are two random variables.
- \( N \) is the number of samples.
- \( X_i \) and \( Y_i \) are the \( i \)-th values of \( X \) and \( Y \).
- \( \bar{X} \) and \( \bar{Y} \) are the means of \( X \) and \( Y \), respectively.


In [1]:
def calculate_covariance_matrix(vectors: list[list[float]]) -> list[list[float]]:
    n_features = len(vectors)
    n_points = len(vectors[0])
    
    feature_mean = []
    for feature in vectors:
        feature_mean.append(sum(feature)/n_points)
    
    vectors_sub_mean = []
    for i in range(n_features):
        vector_sub_mean = []
        for j in range(n_points):
            vector_sub_mean.append(vectors[i][j] - feature_mean[i])
            
        vectors_sub_mean.append(vector_sub_mean)
    
    covariance_matrix = []
    for i in range(n_features):
        covariance_matrix.append([])
        for j in range(n_features):
            _sum = 0
            for k in range(n_points):
                _sum += vectors_sub_mean[i][k] * vectors_sub_mean[j][k]
            covariance_matrix[i].append(_sum / (n_points-1))
            
    return covariance_matrix

In [2]:
vectors = [[1, 2, 3], [4, 5, 6]]
output = [[1.0, 1.0], [1.0, 1.0]]

my_output = calculate_covariance_matrix(vectors)
assert output == my_output

In [3]:
# Optimized version 
def calculate_covariance_matrix(vectors: list[list[float]]) -> list[list[float]]:
    n_features = len(vectors)
    n_points = len(vectors[0])
    
    feature_mean = [sum(feature) / n_points for feature in vectors]
    
    vectors_sub_mean = [
        [vectors[i][j] - feature_mean[i] for j in range(n_points)] for i in range(n_features)
    ]
    
    covariance_matrix = [
        [
            sum(vectors_sub_mean[i][k] * vectors_sub_mean[j][k] for k in range(n_points)) / (n_points - 1)
            for j in range(n_features)
        ]
        for i in range(n_features)
    ]

    return covariance_matrix

In [4]:
my_output = calculate_covariance_matrix(vectors)
assert output == my_output

# Numpy check

In [5]:
import numpy as np

np_output = np.cov(vectors)
assert np.allclose(output, np_output)