In [197]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score

In [198]:
import numpy as np
class Metrics:
    @staticmethod
    def  mean_absolute_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean(np.abs(y_true-predictions)))
    @staticmethod
    def mean_squared_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean((y_true-predictions)**2))
    @staticmethod
    def root_mean_squared_error(y_test, y_pred):
        return float(np.sqrt(Metrics.mean_squared_error(y_test, y_pred)))
    @staticmethod
    def mean_absolute_percentage_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean(np.abs((y_true-predictions)/y_true)))
    @staticmethod
    def r_2_score( y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        mean_value = np.mean(y_test)
        return float(1 - np.sum((y_true - predictions)**2)/ np.sum((y_true - mean_value)**2))
    

In [199]:
import numpy as np
class MyLinearRegression:
    def __init__( self, learning_rate = 0.001, iterations =500 ) :
        self.learning_rate = learning_rate        
        self.iterations = iterations        

    def transform_(self, x):
        return np.concatenate((np.ones((len(x), 1)), x), axis = 1)

    def loss_func(self, x, y):
        return sum((y - np.dot(x, self.w)) ** 2  )/x.shape[0]

    def fit(self, x, y):
        dist = np.inf
        eps = 1e-20
        X = self.transform_(x)

        self.w = np.zeros(X.shape[1])
        iter = 0

        while iter <= self.iterations:
            loss = self.loss_func(X, y)
            y_pred = self.predict(x)
            dW = ( - ( 2 * ( X.T ).dot( y - y_pred ) ) ) / X.shape[0]
            self.w = self.w - self.learning_rate * dW
            dist = np.abs(loss - self.loss_func(X, y))
            iter += 1
            
            if(dist <= eps):
                break
        print(iter)
        return self
        

    def predict(self, x):
        return np.dot(self.transform_(x), self.w)

In [200]:
data= pd.read_csv("../data/trip_duration_task_m.csv")
data.drop(["Unnamed: 0"], axis=1, inplace=True)


In [201]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199494 entries, 0 to 199493
Data columns (total 5 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   pickup_longitude   199494 non-null  float64
 1   pickup_latitude    199494 non-null  float64
 2   dropoff_longitude  199494 non-null  float64
 3   dropoff_latitude   199494 non-null  float64
 4   trip_duration      199494 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 7.6 MB


In [202]:
y = data['trip_duration']
X = data.drop(['trip_duration'], axis=1)

In [203]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [204]:
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_std = pd.DataFrame(standardScaler.transform(X_train), columns=X.columns)
X_test_std = pd.DataFrame(standardScaler.transform(X_test), columns=X.columns)
X_train_std.describe()

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
count,159595.0,159595.0,159595.0,159595.0
mean,-1.484125e-13,3.281083e-14,-1.223501e-13,6.230687e-14
std,1.000003,1.000003,1.000003,1.000003
min,-11.0326,-15.95875,-18.46559,-14.03303
25%,-0.4793743,-0.4755681,-0.49014,-0.4912182
50%,-0.2183098,0.1119779,-0.174554,0.08261896
75%,0.1551767,0.6154925,0.2818964,0.5521752
max,14.13905,20.01555,19.74854,17.33183


In [205]:
minMaxScaler = MinMaxScaler()
minMaxScaler.fit(X_train)
X_train_mms = pd.DataFrame(minMaxScaler.transform(X_train), columns=X.columns)
X_test_mms = pd.DataFrame(minMaxScaler.transform(X_test), columns=X.columns)
X_train_mms.describe()

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
count,159595.0,159595.0,159595.0,159595.0
mean,0.438295,0.443615,0.483214,0.447412
std,0.039727,0.027798,0.026168,0.031883
min,0.0,0.0,0.0,0.0
25%,0.41925,0.430396,0.470387,0.431751
50%,0.429622,0.446728,0.478646,0.450047
75%,0.444459,0.460724,0.49059,0.465017
max,1.0,1.0,1.0,1.0


In [206]:
np.set_printoptions(suppress=True)

In [207]:
myLinearRegression = MyLinearRegression()
model = myLinearRegression.fit(X_train_std, y_train)
y_pred_pf = myLinearRegression.predict(X_test_std)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred_pf)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred_pf)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred_pf)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred_pf)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred_pf):.7f}')

501
MAE: 572.1673892881752
MSE: 12680734.266096745
RMSE: 3561.0018626921196
MAPE: 0.9900343255479964
R^2: 0.0000448


In [208]:
myLinearRegression = MyLinearRegression()
model = myLinearRegression.fit(X_train_mms, y_train)
y_pred_pf = myLinearRegression.predict(X_test_mms)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred_pf)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred_pf)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred_pf)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred_pf)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred_pf):.7f}')

501
MAE: 605.4899449424622
MSE: 12654692.575091526
RMSE: 3557.343471621981
MAPE: 1.2368453899949687
R^2: 0.0000478


In [209]:
import numpy as np
class MyRidge:
    def __init__( self, learning_rate, iterations, alpha ) :
        self.learning_rate = learning_rate        
        self.iterations = iterations        
        self.alpha = alpha

    def transform_(self, x):
        return np.concatenate((np.ones((len(x), 1)), x), axis = 1)

    def loss_func(self, x, y):
        return sum(y - np.dot(x, self.w)) ** 2  + self.alpha * (np.dot(self.w, self.w))/X.shape[0]

    def fit(self, x, y):
        dist = np.inf
        eps = 1e-20
        X = self.transform_(x)

        self.w = np.zeros(X.shape[1])
        iter = 0

        while iter <= self.iterations:
            loss = self.loss_func(X, y)
            y_pred = self.predict(x)
            dW = ( - ( 2 * ( X.T ).dot( y - y_pred ) ) +               
               ( 2 * self.alpha * self.w ) ) / X.shape[0]
            self.w = self.w - self.learning_rate * dW
            dist = np.abs(loss - self.loss_func(X, y))
            iter += 1
            
            if(dist <= eps):
                break
        print(iter)
        return self
        

    def predict(self, x):
        return np.dot(self.transform_(x), self.w)

In [210]:
ridge = MyRidge(0.001, 1000, 1)
model = ridge.fit(X_train_std, y_train)
y_pred = ridge.predict(X_test_std)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred):.7f}')

1001
MAE: 566.0066954176308
MSE: 12560123.592313694
RMSE: 3544.0264660853895
MAPE: 1.3264896484737243
R^2: 0.0000447


In [212]:
ridge = MyRidge(0.001, 1000, 1)
model = ridge.fit(X_train_mms, y_train)
y_pred = ridge.predict(X_test_mms)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred):.7f}')
print(f'R^2: {r2_score(y_test, y_pred):.7f}')

1001
MAE: 639.9604852426394
MSE: 12621389.02691843
RMSE: 3552.6594301900695
MAPE: 1.4780404632534825
R^2: 0.0000507
R^2: -0.0000791
