In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import dash
import requests
%matplotlib inline

In [2]:
from sklearn import datasets

In [3]:
dir(datasets)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__getattr__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_arff_parser',
 '_base',
 '_california_housing',
 '_covtype',
 '_kddcup99',
 '_lfw',
 '_olivetti_faces',
 '_openml',
 '_rcv1',
 '_samples_generator',
 '_species_distributions',
 '_svmlight_format_fast',
 '_svmlight_format_io',
 '_twenty_newsgroups',
 'clear_data_home',
 'dump_svmlight_file',
 'fetch_20newsgroups',
 'fetch_20newsgroups_vectorized',
 'fetch_california_housing',
 'fetch_covtype',
 'fetch_kddcup99',
 'fetch_lfw_pairs',
 'fetch_lfw_people',
 'fetch_olivetti_faces',
 'fetch_openml',
 'fetch_rcv1',
 'fetch_species_distributions',
 'get_data_home',
 'load_breast_cancer',
 'load_diabetes',
 'load_digits',
 'load_files',
 'load_iris',
 'load_linnerud',
 'load_sample_image',
 'load_sample_images',
 'load_svmlight_file',
 'load_svmlight_files',
 'load_wine',
 'make_biclusters',
 'make_blobs',
 'make_checkerboard',
 'make_circl

In [4]:
X, y = datasets.make_regression(n_samples = 1000, n_features = 1, n_targets = 1, noise = 0.5)

In [5]:
from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.2)

In [6]:
from sklearn.linear_model import LinearRegression as LR

In [7]:
model = LR()

In [8]:
model.fit(X_train, y_train)

In [9]:
df = pd.DataFrame({'X_test' : X_test.flatten(), 'y_pred' : model.predict(X_test).flatten(), 'y_test': y_test.flatten()})
df.head()

Unnamed: 0,X_test,y_pred,y_test
0,0.319574,5.825591,5.365329
1,-0.385696,-7.029768,-7.518992
2,-0.63296,-11.53679,-11.372111
3,0.545318,9.940354,9.782161
4,-1.175753,-21.430582,-21.543614


In [10]:
class MeraLR:
    def __init__(self):
        self.m = None
        self.b = None

    def fit(self, X_train, y_train):
        X = X_train.flatten()
        y = y_train.flatten()

        X_mean = X.mean()
        y_mean = y.mean()

        self.m = ((X-X_mean) @ (y - y_mean)) / ((X-X_mean) @ (X-X_mean))
        self.b = y_mean - (self.m) * X_mean

        
    def predict(self, X_test):
        return X_test * self.m + self.b

In [11]:
my_model = MeraLR()

In [12]:
my_model.fit(X_train, y_train)

In [13]:
my_model.m

18.22756166515334

In [14]:
model.coef_

array([18.22756167])

In [15]:
model.intercept_

0.0005298703429479357

In [16]:
my_model.b

0.0005298703429482687

# Metrics

In [17]:
from sklearn.metrics import mean_absolute_error as MAE

In [18]:
MAE(model.predict(X_test), y_test)

0.4056803241377623

In [19]:
MAE(my_model.predict(X_test), y_test)

0.4056803241377617

In [20]:
from sklearn.metrics import mean_squared_error as MSE

In [21]:
MSE(model.predict(X_test), y_test), MSE(my_model.predict(X_test), y_test)

(0.2517675764888911, 0.2517675764888904)

In [22]:
def my_mean_absolute_error(y_actual, y_pred):
    if y_actual.shape[0] != y_pred.shape[0]:
        raise ValueError('Shapes are not equal')
    return np.sum(np.abs(y_actual - y_pred)) / y_pred.shape[0]

In [23]:
my_mean_absolute_error(model.predict(X_test), y_test)

0.4056803241377623

In [24]:
def my_mean_squared_error(y_actual,y_pred):
    y = y_actual - y_pred
    n = y.shape[0]

    return (y @ y) / n

In [25]:
my_mean_squared_error(model.predict(X_test), y_test), MSE(model.predict(X_test), y_test)

(0.2517675764888911, 0.2517675764888911)

In [26]:
try:
    my_mean_absolute_error(model.predict(X_test)[:100], y_test)
except ValueError as ex:
    print(ex)

Shapes are not equal
