In [20]:
from csv import reader
from random import seed, randrange


seed(42)


def load_csv(file_path):
  dataset = []

  with open(file_path, "r") as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if row:
        dataset.append(row)
  
  return dataset


def str_col_to_float(dataset, col):
  for row in dataset:
    row[col] = float(row[col].strip())


def mean(vals):
  return sum(vals) / float(len(vals))


def variance(vals):
  m = mean(vals)
  return sum(map(lambda v: pow(v - m, 2), vals))


def covariance(vals1, vals2):
  mean_1, mean_2 = mean(vals1), mean(vals2)
  return sum(map(lambda v1, v2: (v1 - mean_1) * (v2 - mean_2), vals1, vals2))


dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
x_vals = list(map(lambda row: row[0], dataset))
y_vals = list(map(lambda row: row[1], dataset))

mean_x, mean_y = mean(x_vals), mean(y_vals)
var_x, var_y = variance(x_vals), variance(y_vals)
display(f"x-stats: mean={mean_x:.2f}, var={var_x:.2f}")
display(f"y-stats: mean={mean_y:.2f}, var={var_y:.2f}")
display(f"covar={covariance(x_vals, y_vals)}")

'x-stats: mean=3.00, var=10.00'

'y-stats: mean=2.80, var=8.80'

'covar=8.0'

In [25]:
def coefficients(dataset):
  x_vals = list(map(lambda row: row[0], dataset))
  y_vals = list(map(lambda row: row[1], dataset))
  b1 = covariance(x_vals, y_vals) / variance(x_vals)
  b0 = mean(y_vals) - b1 * mean(x_vals)
  return b0, b1


dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
b0, b1 = coefficients(dataset)
display(f"coefficients: b0={b0:.2f}, b1={b1:.2f}; formula: y = {b1:.2f}x + {b0:.2f}")

'coefficients: b0=0.40, b1=0.80; formula: y = 0.80x + 0.40'