In [28]:
import numpy as np
np.random.seed(42)

In [29]:
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston

In [30]:
dataset = load_boston()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=23111, test_size=0.30)

In [31]:
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [32]:
regr = LinearRegression()
regr.fit(x_train, y_train)

y_pred = regr.predict(x_test)
y_true = y_test

#### Formeln: 

<font size=4>

  - $R^2 = 1 - \frac{\sum_{i=1}^n(y_i-\hat{y})^2}{\sum_{i=1}^n(y_i-\bar{y})^2}$  
  
  - $MAPE = \frac{100}{n} \sum_{i=1}^n  \big|\frac{y_i - \hat{y}_i}{y_i}\big|$

</font>

In [33]:
def r_squared(y_true, y_pred):
    n = len(y_true)
    y_true_mean = np.mean(y_true)
    num = np.sum([(y_true[i] - y_pred[i])**2 for i in range(n)])
    denom = np.sum([(y_true[i] - y_true_mean)**2 for i in range(n)])
    return 1.0 - (num/denom)

In [34]:
def mape(y_true, y_pred):
    n = len(y_true)
    frac = np.sum([np.abs((y_true[i] - y_pred[i]) / y_true[i]) for i in range(n)])
    return (100/n) * frac

In [35]:
print("R2: ", r_squared(y_true, y_pred))
print("MAPE: ", mape(y_true, y_pred))

R2:  0.7689168230348119
MAPE:  18.24427249550329


In [36]:
from sklearn.metrics import r2_score

print("R2: ", r2_score(y_true, y_pred))

R2:  0.7689168230348119
