#### Listing 8.1 - Define simple linear regression with one variable

In [None]:
#Listing 8.1 - function for simple linear regression with one variable
def slr(x, y):
    mean_x = np.mean(x)
    print('The average x is:', mean_x)
    mean_y = np.mean(y)
    print('The average y is:', mean_y)
    
    dot_product_1 = np.dot((x-mean_x), (y-mean_y))
    dot_product_2 = np.dot((x-mean_x), (x-mean_x))
    a1 = dot_product_1/dot_product_2
    a0 = mean_y-a1*mean_x
    print('The slope is:', np.round(a1,2))
    print('The intercept is:', np.round(a0,2))
    return a0, a1

#### Listing 8.2

In [None]:
#Listing 8.2 - use of function for simple linear regression with one variable
import numpy as np
import matplotlib.pyplot as plt

x_obs = np.array([1,2,3])
y_obs = np.array([2,4,3])

intercept, slope = slr(x_obs,y_obs)

x = np.linspace(-2, 4, 400) 
y = slope * x + intercept

plt.figure(figsize=(8, 6))
plt.scatter(x_obs, y_obs, c='red', marker='x')
plt.plot(x, y, label=f'y = {slope}x + {intercept}')

plt.xlabel('x', fontsize="15")
plt.ylabel('y', fontsize="15")
plt.legend(fontsize="15") 

plt.grid(True)
plt.savefig('line.eps',format='eps', dpi=600)

#### Listing 8.3-Linear Regression with multiple variables

In [None]:
#Listing 8.3 - Linear Regression with multiple variables
import sympy as sp

N, d = sp.symbols('N d')
X = sp.MatrixSymbol('X', N, d+1)
a = sp.MatrixSymbol('a', d+1, 1)
y = sp.MatrixSymbol('y', N, 1)

Q = (y-X*a).T * (y-X*a)
Q.diff(a)

#### Listing 8.4

In [None]:
#Listing 8.4
import sympy as sp
a, X, XT,y = sp.symbols("a, X, XT, y", commutative=False)
expr2 = 2*XT*X*a - 2*XT*y
sp.solve(expr2, a)

#### Listing 8.5 - Define simple linear regression with multiple variables

In [None]:
#Listing 8.5
def lrmvar(X,y):
    a = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)), X.T),y)
    return a

#### Listing 8.6

In [None]:
#Listing 8.6
import numpy as np

X = np.array([[1,1],[1,2],[1,3]])
y = np.array([2,4,3])
a = lrmvar(X,y)
print('The X matrix is:\n', X)
print('The y array is:',y)
print('The estimated parameters are:',a)

#### Listing 8.7 - Sklearn LinearRegression

In [None]:
#Listing 8.7 - Sklearn LinearRegression
import numpy as np
from sklearn.linear_model import LinearRegression

x_obs = np.array([1,2,3])
print("x_obs as input:\n",x_obs)
x_obs = x_obs[:,np.newaxis]
print("x_obs as converted:\n",x_obs)
y_obs = np.array([2,4,3])

reg = LinearRegression().fit(x_obs, y_obs)
estimated_y = reg.predict(x_obs)

print('\nThe fitted model is saved in an instance of ',reg)
print('The estimated coefficient (gradient) is: ',reg.coef_)
print('The estimated intercept is: ', np.round(reg.intercept_,2))
print('The estimated target values of X as given \n\tby the \'line\' of best fit are: ', estimated_y)
print('The coefficient of determination is: ',reg.score(x_obs, y_obs))
print('The predicted target value for x=2.5 is: ',reg.predict([[2.5]]))


#### Listing 8.8

In [None]:
#Listing 8.8 - Sklearn LinearRegression
import numpy as np
from sklearn.linear_model import LinearRegression

X = np.array([[1,2],[2,1],[2,2]])
print('X as input:\n',X)
y = np.array([1,2,-2])
reg = LinearRegression().fit(X, y)
estimated_y = reg.predict(X)

print('The estimated coefficients are: ',reg.coef_)
print('The estimated intercept is: ',np.round(reg.intercept_,2))
print('The estimated target values of X are: ', estimated_y)
print('The coefficient of determination is: ',reg.score(X, y))
print('The predicted target value for x=[1.5,1] is: ',reg.predict([[1.5, 1]]))

#### Listing 8.9 - Sklearn SGDRegressor

In [None]:
#Listing 8.9 - Sklearn SGDRegressor
import numpy as np  
from sklearn import preprocessing
from sklearn.linear_model import SGDRegressor 

raw_X = np.array([295,305,300]) 
raw_X = raw_X[:,np.newaxis]
y =np.array([10,30,20])

scaler = preprocessing.MinMaxScaler()
scaledX = scaler.fit_transform(raw_X)
print('The scaled raw_X are:\n ', scaledX)

init_intercept = 5
init_coef = 1
pred_y = init_intercept + init_coef*scaledX
print('The predicted values of y are:\n',pred_y)
init_error = np.sum((y-pred_y.ravel())**2)/2
print('The initial error is: ', init_error)

sgd_reg = SGDRegressor(max_iter=1, alpha=0, learning_rate='constant')
sgd_reg.fit(scaledX, y, intercept_init=init_intercept, coef_init=init_coef)
est_y = sgd_reg.predict(scaledX)
updated_error =  np.round(np.sum((y-est_y)**2)/2, 3)

print('The estimated coefficient is: ',np.round(sgd_reg.coef_,3))
print('The estimated intercept is: ',np.round(sgd_reg.intercept_,3))
print('The estimated values of y are:\n', np.round(est_y,3))
print('The updated error is: ', updated_error)

#### Listing 8.10

In [None]:
#Listing 8.10 - Sklearn SGDRegressor
import numpy as np  
from sklearn import preprocessing
from sklearn.linear_model import SGDRegressor 
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

raw_X = np.array([295,305,300]) 
raw_X = raw_X[:,np.newaxis]
y =np.array([10,30,20])

scaler = preprocessing.MinMaxScaler()
scaledX = scaler.fit_transform(raw_X)

sgd_reg = SGDRegressor(max_iter=1, alpha=0, learning_rate='constant', warm_start=True)
init_intercept = 5
init_coef = 1
n_iterations = 1000
mses = []

sgd_reg.fit(scaledX, y, intercept_init=init_intercept, coef_init=init_coef)
est_y = sgd_reg.predict(scaledX)
mse = mean_squared_error(y, est_y)
mses.append(mse)
tol = 0.1

for i in range(2, n_iterations + 1):
    sgd_reg.fit(scaledX, y)
    est_y = sgd_reg.predict(scaledX)
    mse = mean_squared_error(y, est_y)
    mses.append(mse)
    if  mse < tol:
        print(f"Training stopped at iteration {i} due to convergence")
        break

print('The estimated intercept is: ', np.round(sgd_reg.intercept_,2))
print('The estimated coefficient is: ', np.round(sgd_reg.coef_,2))
print('The coefficient of determination is: ', sgd_reg.score(scaledX, y))

plt.plot(range(1, len(mses)+1), mses, marker='o')
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error')
plt.grid(True)
plt.savefig('LearningCurve_fit.eps', dpi=600)

#### Listing 8.11

In [None]:
#Listing 8.11 - Sklearn SGDRegressor
import numpy as np  
from sklearn import preprocessing
from sklearn.linear_model import SGDRegressor 
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

raw_X = np.array([295,305,300]) 
raw_X = raw_X[:,np.newaxis]
y =np.array([10,30,20])

scaler = preprocessing.MinMaxScaler()
scaledX = scaler.fit_transform(raw_X)

sgd_reg = SGDRegressor(alpha=0, learning_rate='constant', warm_start=True)
mses = []
n_iterations = 1000
tol = 0.1

for iteration in range(n_iterations): 
    for i, x in enumerate(scaledX):
        sgd_reg.partial_fit(x[:,np.newaxis], y[i].ravel())
    est_y = sgd_reg.predict(scaledX)
    mse = mean_squared_error(y, est_y)
    mses.append(mse)
    if  mse < tol:
        print(f"Training stopped at iteration {iteration} due to convergence")
        break

print('The estimated intercept is: ', np.round(sgd_reg.intercept_,2))
print('The estimated coefficient is: ', np.round(sgd_reg.coef_,2))
print('The coefficient of determination is: ', sgd_reg.score(scaledX, y))

plt.plot(range(1, len(mses)+1), mses, marker='o')
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error')
plt.grid(True)
plt.savefig('LearningCurve_fit1.eps', format='eps', dpi=600)
