In [None]:
import numpy as np
import pandas as pd


def variance(x):
    # we subtract 1 from the denominator as bessel's correction.
    return np.sum((x - x.mean()) ** 2) / (len(x) - 1)


def covariance(x, y):
    if len(x) != len(y):
        raise Exception('Error in input. X, y size mismatch')
    return (np.multiply((x - x.mean()), (y - y.mean()))).sum() / (len(x) - 1)


def read_data(path):
    frame = pd.read_csv(path)
    return frame['Year'].values, frame['Sales'].values


def evaluate(a, b, x_train, y_train):
    print(f"alpha: {a}, beta: {b}")
    
    y_predict = x_train * b + a
    
    rmse = (np.sum((y_predict - y_train) ** 2) / len(x_train)) ** 0.5
    print(f"RMSE: {rmse}")
    
    ss_tot = np.sum((y_train - y_train.mean()) ** 2)
    ss_res = np.sum((y_predict - y_train) ** 2)
    r_score = 1 - (ss_res / ss_tot)
    
    print(f"R-Squared Score: {r_score}\n\n")


path = input('Enter path to CSV file: ')
X, Y = read_data(path)

beta = covariance(X, Y) / variance(X)
alpha = Y.mean() - (beta * X.mean())

evaluate(alpha, beta, X, Y)

year = int(input('Input Year (-1 to end): '))
while year != -1:
    estimate = alpha + beta * year
    print(f"Estimate for {year}: {estimate}")
    
    year = int(input('Input Year (-1 to end): '))
    



Enter path to CSV file: ~/PythonProjects/data/company_sales/sales.csv
alpha: -8988.2, beta: 4.5
RMSE: 2.0149441679609885
R-Squared Score: 0.908886894075404


Input Year (-1 to end): 2012
Estimate for 2012: 65.79999999999927
Input Year (-1 to end): 2013
Estimate for 2013: 70.29999999999927
Input Year (-1 to end): 2015
Estimate for 2015: 79.29999999999927
