In [1]:
import numpy as np
from scipy.stats import linregress
from typing import Dict

class Dict2Obj:
  def __init__(self, properties_dict):
    self.__dict__.update(properties_dict)

def get_properties(file_path) -> Dict:
  # Getting Matrix Values
  pairCSV = np.genfromtxt(file_path,
    skip_header=1, delimiter=",", dtype=float)
  
  # Extract x and y values from CSV data
  x_observed = pairCSV[:, 0]
  y_observed = pairCSV[:, 1]

  # Number of data points
  n = len(x_observed)

  # Calculate sums
  x_sum = np.sum(x_observed)
  y_sum = np.sum(y_observed)

  # Calculate means
  x_mean = np.mean(x_observed)
  y_mean = np.mean(y_observed)

  # Calculate variance (for sample)
  x_variance = np.var(x_observed, ddof=1)
  y_variance = np.var(y_observed, ddof=1)

  # Calculate covariance (for sample)
  xy_covariance = np.cov(
    x_observed, y_observed, ddof=1)[0, 1]

  # Calculate standard deviations
  x_std_dev = np.std(x_observed, ddof=1)
  y_std_dev = np.std(y_observed, ddof=1)

  # Calculate slope (m), intercept (b),
  # and other regression parameters
  m_slope, b_intercept, r_value, p_value, \
  std_err_slope = linregress(x_observed, y_observed)

  # Create regression line
  y_fit = m_slope * x_observed + b_intercept
  y_err = y_observed - y_fit

  return locals()

def display(properties: Dict) -> None:
  p = Dict2Obj(properties)

  # Output basic properties
  print(f'{f"n":10s} = {p.n:4d}')
  print(f'∑x (total) = {p.x_sum:7.2f}')
  print(f'∑y (total) = {p.y_sum:7.2f}')
  print(f'x̄ (mean)   = {p.x_mean:7.2f}')
  print(f'ȳ (mean)   = {p.y_mean:7.2f}')
  print()

  # Output statistics properties
  print(f'sₓ² (variance) = {p.x_variance:9.2f}')
  print(f'sy² (variance) = {p.y_variance:9.2f}')
  print(f'covariance     = {p.xy_covariance:9.2f}')
  print(f'sₓ (std dev)   = {p.x_std_dev:9.2f}')
  print(f'sy (std dev)   = {p.y_std_dev:9.2f}')
  print(f'm (slope)      = {p.m_slope:9.2f}')
  print(f'b (intercept)  = {p.b_intercept:9.2f}')
  print()

  print(f'Equation     y = ' \
    + f'{p.b_intercept:5.2f} + {p.m_slope:5.2f}.x')
  print()
  


In [2]:
import numpy as np

# Load properties from Properties.py helper
# and unpack them into local variables
properties = get_properties("50-samples.csv")
display(properties)
locals().update(properties)

n          =   13
∑x (total) =   78.00
∑y (total) = 2327.00
x̄ (mean)   =    6.00
ȳ (mean)   =  179.00

sₓ² (variance) =     15.17
sy² (variance) =  25768.17
covariance     =    606.67
sₓ (std dev)   =      3.89
sy (std dev)   =    160.52
m (slope)      =     40.00
b (intercept)  =    -61.00

Equation     y = -61.00 + 40.00.x



In [3]:
# Calculate R-squared (R²)
r_squared = r_value ** 2

# Output of correlation calculation

print(f'sₓ (std dev)   = {x_std_dev:9.2f}')
print(f'sy (std dev)   = {y_std_dev:9.2f}')
print(f'r (pearson)    = {r_value:9.2f}')
print(f'R²             = {r_squared:9.2f}')
print()

sₓ (std dev)   =      3.89
sy (std dev)   =    160.52
r (pearson)    =      0.97
R²             =      0.94



In [4]:
# Create regression line
y_fit = m_slope * x_observed + b_intercept
y_err = y_observed - y_fit

# Calculate variance of residuals (MSE)
var_residuals = np.sum(y_err ** 2) / (n - 2)

# Calculate t-value
t_value = m_slope / std_err_slope

# Output the results
print(f'MSE = ∑ϵ²/(n-2)     = {var_residuals:9.2f}')
print(f'SE(β₁)  = √(MSE/sₓ) = {std_err_slope:9.2f}')
print(f't-value = β̅₁/SE(β₁) = {t_value:9.2f}')
print()

MSE = ∑ϵ²/(n-2)     =   1638.00
SE(β₁)  = √(MSE/sₓ) =      3.00
t-value = β̅₁/SE(β₁) =     13.33

