In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_regression

In [2]:
X, y = make_regression(n_samples=100,n_features=2,n_informative=2,n_targets=1,noise=50)

In [3]:
X = pd.DataFrame(X,columns=['col1','col2'])

In [4]:
X

Unnamed: 0,col1,col2
0,0.126146,-0.857182
1,1.406731,0.190607
2,1.487030,-0.619915
3,-0.635435,0.229802
4,-0.171364,0.130595
...,...,...
95,-0.398137,-0.714613
96,-1.430591,1.170308
97,-0.703867,0.065812
98,-1.826715,-1.161328


In [5]:
y

array([ 2.53966595e+01,  3.22596494e+00,  9.56085678e+01,  3.65150461e+01,
        6.65032133e+00,  1.30963013e+02,  8.63160827e+01, -6.85169681e+01,
        7.34409389e+00, -8.35041521e+01,  1.17744455e+02, -2.86798741e+01,
        4.29590773e+00, -1.98567567e+02, -1.82994943e+02,  6.82769173e+01,
        4.69616096e+00,  9.75271848e+01,  2.97132174e+01, -7.71349270e+01,
       -3.50592009e+01,  1.15418473e+02,  8.64217518e+01, -5.47846680e-02,
        1.89921236e+02,  1.07533094e+02, -9.31415652e+01,  7.50830964e+01,
       -9.44141924e+00,  1.94671949e+02, -1.64056726e+02,  3.44877595e+01,
        1.64009803e+01, -1.60906441e+02,  5.04142830e+01,  7.09268253e+01,
        2.64980468e+01,  1.03231269e+01,  5.45207825e+01, -8.26867491e+00,
       -6.83219350e+01,  2.08393585e+01, -5.08480361e+00,  1.43360084e+02,
       -8.86334566e+01,  4.98912284e+00,  1.71399145e+02, -2.80661637e+01,
       -2.71826185e+02, -1.41168896e+01, -6.90402593e+01,  1.71512411e+02,
        3.32154512e+00,  

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((80, 2), (20, 2), (80,), (20,))

![Image](linear_regression.png)

In [7]:
class MultiDimLinearRegression:
    def __init__(self):
        self.intercept = None
        self.coef = None
        
    def fit(self,X_train, y_train):
        
        # 1. Insert 1 at first to all X_train
        X_train = np.insert(X_train,0,1,axis=1)

        # 2. Calculate Beta
        Beta = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
    
        # 3. Distribute as intercept and coef
        self.intercept = Beta[0]
        self.coef = Beta[1:]
        
        
    def predict(self,X_test):
        return np.dot(X_test, self.coef) + self.intercept

In [8]:
lr = MultiDimLinearRegression()

In [9]:
lr.fit(X_train, y_train)

In [10]:
lr.intercept, lr.coef

(np.float64(5.319271372461177), array([73.89939884, 23.67857714]))

In [11]:
y_pred = lr.predict(X_test)
y_pred

array([  43.97670043, -115.1538863 ,  -40.56563366,  146.93860993,
         46.38353335,    8.00150781,  100.531183  ,  115.29628904,
        -24.92176828, -207.812492  ,  150.14966705,   42.13126019,
       -140.60077376,  -90.98926735,   -5.65544823,   42.8874757 ,
        -36.1976151 ,  128.75227627,   23.9337311 ,  -27.79971407])

In [12]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8284807884630648

### For More that Two class

In [13]:
from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]], shape=(442, 10))

In [14]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((353, 10), (89, 10), (353,), (89,))

In [18]:
lr = MultiDimLinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

0.4399338661568969