1. [Linear Regression Using Normal Equation](#Linear-Regression-Using-Normal-Equation)
2. [Detect Overfitting or Underfitting](#Detect-Overfitting-or-Underfitting)
3. [Calculate Mean Absolute Error (MAE)](#calculate-mean-absolute-error-mae)

In [1]:
import numpy as np

### Linear Regression Using Normal Equation

In [2]:
# Test case from DeepML

X = [[1, 1], [1, 2], [1, 3]], 
y = [1, 2, 3]

# Passing them into Numpy array format

X = np.array(X).squeeze()
y = np.array(y)

print(X.shape, y.shape)
print(y)
print(X)

(3, 2) (3,)
[1 2 3]
[[1 1]
 [1 2]
 [1 3]]


In [3]:
# Removing bias column and treating this as simple linear regression 

X = X[:, 1]

''' 
For calculating slope and intercept we need to implement following equation:

    m = (Σ(X - X̄)(Y - Ȳ)) / (Σ(X - X̄)²)
    c = Ȳ - mX̄

And we need to run those equations for every data point and then can be achieved through loop 

'''

class LinearRegression:

    def __init__(self):

        self.coef_ = None
        self.intercept_ = None 

    def fit(self, X, y):

        X_mean = X.mean()
        y_mean = y.mean()

        num = 0
        den = 0

        for i in range(X.shape[0]):
            num = num + (X[i] - X_mean) * (y[i] - y_mean)
            den = den + (X[i] - X_mean) * (X[i] - X_mean)

        self.coef_ = num/den
        self.intercept_ = y.mean() - (self.coef_ * X.mean())

        return self.intercept_, self.coef_
    

lr = LinearRegression()
list(lr.fit(X, y))

[0.0, 1.0]

Normal Equation (Multilinear Regression)

The coefficients are computed as $\theta = (X^\top X)^{-1} X^\top y$.


In [4]:
# __Now considering that bias columns__

X = [[1, 1], [1, 2], [1, 3]], 
y = [1, 2, 3]


def linear_regression_normal_equation(X: list[list[float]], y: list[float]) -> list[float]:

	''' 
	In this case we are gonna implement above formula 
	 '''

	X = np.array(X).squeeze()
	y = np.array(y)

	theta = np.linalg.inv(np.dot(X.T, X)).dot(X.T).dot(y)

	return np.round(theta, 4)

linear_regression_normal_equation(X, y)

array([-0.,  1.])

-----

### Detect Overfitting or Underfitting

In [5]:
training_accuracy = 0.95 
test_accuracy = 0.65

def isit_fit(X,y):

    if (X < 0.7) & (y < 0.7):
        print('Underfitting')
    elif (X - y) > 0.2:
        print('Overfitting')
    else:
        print('Nothing')

isit_fit(training_accuracy, test_accuracy)

Overfitting


---

### Calculate Mean Absolute Error (MAE)

In [6]:
import numpy as np

y_true = np.array([3, -0.5, 2, 7])
y_pred = np.array([2.5, 0.0, 2, 8])


print(y_true)
print(y_pred)



[ 3.  -0.5  2.   7. ]
[2.5 0.  2.  8. ]


![](https://miro.medium.com/v2/resize:fit:1400/format:webp/1*0j3BZ0lPRX86aTjLWDnhwQ.png)

In [7]:
sum = 0
count = 0
for i in range(len(y_true)):
    sum += abs(y_true[i] - y_pred[i])
    count += 1

print(sum/len(y_true))
print(sum, count, sum/count)

0.5
2.0 4 0.5


In [8]:
# 2D Matrix 

X = np.array([[0.5, 1], [-1, 1], [7, -6]])
y = np.array([[0, 2], [-1, 2], [8, -5]])


print(X)
print(y)

[[ 0.5  1. ]
 [-1.   1. ]
 [ 7.  -6. ]]
[[ 0  2]
 [-1  2]
 [ 8 -5]]


In [9]:
# We have to get this 

print(abs(0.5 - 0) + abs(1-2) + abs(-1 + 1) + abs(2 - 1) + abs(7-8) + abs(-6 + 5))

4.5


In [10]:
total = 0
count = 0

for i in range(len(X)):
    for j in range(len(X[0])):
        total += abs(X[i][j] - y[i][j])
        count +=1 

print(total)
print(count)

total/count

4.5
6


0.75

In [11]:
# __Logic for working both 1D and 2D array__

def get_mae(X, y):

    sum = 0
    count = 0

    if X.ndim == 1:
          for i in range(len(X)):
                sum += abs(X[i] - y[i])
                count += 1
    else:
         for i in range(len(X)):
              for j in range(len(X[0])):
                   sum += abs(X[i][j] - y[i][j])
                   count +=1 
    return round((sum/count), 3)

print(get_mae(X, y))
print(get_mae(y_true, y_pred))

0.75
0.5


----