# Variance and Covariance - Code Examples

In [9]:
import numpy as np

# --- 1. Variance Calculation ---
# A simple list of numbers (our sample data)
data = [10, 12, 15, 15, 18, 20]

In [10]:
# --- Manual Calculation of Sample Variance (s^2) ---
# Formula: s^2 = sum((x_i - mean)^2) / (n - 1)
def manual_sample_variance(data):
    """Calculates the sample variance manually."""
    n = len(data)
    if n < 2:
        return 0.0  # Cannot calculate variance with less than 2 data points

    mean = sum(data) / n
    # Calculate the sum of squared differences from the mean
    squared_diff = [(x - mean) ** 2 for x in data]
    # Divide by n-1 for the unbiased sample variance
    variance = sum(squared_diff) / (n - 1)
    return variance

In [11]:
print("--- Variance ---")
print(f"Sample Data: {data}")
manual_var = manual_sample_variance(data)
print(f"Manual Sample Variance: {manual_var:.4f}")

--- Variance ---
Sample Data: [10, 12, 15, 15, 18, 20]
Manual Sample Variance: 13.6000


In [12]:
# --- Using NumPy for Variance ---
# NumPy's default is population variance (ddof=0)
# To get sample variance, we set degrees of freedom (ddof) to 1
np_sample_var = np.var(data, ddof=1)
print(f"NumPy Sample Variance (ddof=1): {np_sample_var:.4f}")

# Population variance (using ddof=0 or just the default)
np_population_var = np.var(data, ddof=0)
print(f"NumPy Population Variance (ddof=0): {np_population_var:.4f}")

NumPy Sample Variance (ddof=1): 13.6000
NumPy Population Variance (ddof=0): 11.3333


* `np.var(data, ddof=1)`: This line calculates the **sample variance**. The `ddof=1` argument tells NumPy to use a denominator of $n-1$ in the variance formula, which is the standard approach for estimating the variance of a **population** based on a **sample**. The result is stored in the `np_sample_var` variable.

* `np.var(data, ddof=0)`: This line calculates the **population variance**. The `ddof=0` argument (or simply omitting the `ddof` parameter, as 0 is the default) tells NumPy to use a denominator of $n$. This is the correct formula when you have the entire **population** of data and want to calculate its true variance. The result is stored in the `np_population_var` variable.

In [13]:
# --- 2. Covariance Calculation ---
# Two lists representing two variables (X and Y)
X = [2, 3, 5, 6]
Y = [1, 2, 4, 3]

In [14]:
# --- Manual Calculation of Covariance ---
# Formula: Cov(X,Y) = sum((x_i - mean_x) * (y_i - mean_y)) / (n - 1)
def manual_covariance(X, Y):
    """Calculates the sample covariance between two variables manually."""
    if len(X) != len(Y):
        raise ValueError("Inputs must have the same length.")
    n = len(X)
    if n < 2:
        return 0.0

    mean_x = sum(X) / n
    mean_y = sum(Y) / n

    # Calculate the sum of the product of the deviations
    product_of_deviations = [(x - mean_x) * (y - mean_y) for x, y in zip(X, Y)]
    # Divide by n-1 for the unbiased sample covariance
    covariance = sum(product_of_deviations) / (n - 1)
    return covariance

In [15]:
print("--- Covariance ---")
print(f"Variable X: {X}")
print(f"Variable Y: {Y}")
manual_cov = manual_covariance(X, Y)
print(f"Manual Sample Covariance: {manual_cov:.4f}")

# --- Using NumPy for Covariance ---
# np.cov returns a 2x2 covariance matrix
# C[0,0] is the variance of X
# C[1,1] is the variance of Y
# C[0,1] and C[1,0] are the covariance between X and Y
covariance_matrix = np.cov(X, Y)
print("\nNumPy Covariance Matrix:")
print(covariance_matrix)

# We can extract the covariance value from the matrix
np_cov = covariance_matrix[0, 1]
print(f"NumPy Sample Covariance: {np_cov:.4f}")


--- Covariance ---
Variable X: [2, 3, 5, 6]
Variable Y: [1, 2, 4, 3]
Manual Sample Covariance: 2.0000

NumPy Covariance Matrix:
[[3.33333333 2.        ]
 [2.         1.66666667]]
NumPy Sample Covariance: 2.0000
