In [72]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [73]:
import numpy as np
class Metrics:
    @staticmethod
    def  mean_absolute_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean(np.abs(y_true-predictions)))
    @staticmethod
    def mean_squared_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean((y_true-predictions)**2))
    @staticmethod
    def root_mean_squared_error(y_test, y_pred):
        return float(np.sqrt(Metrics.mean_squared_error(y_test, y_pred)))
    @staticmethod
    def mean_absolute_percentage_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean(np.abs((y_true-predictions)/y_true)))
    @staticmethod
    def r_2_score( y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        mean_value = np.mean(predictions)
        return float(Metrics.mean_absolute_error(y_true, y_pred)/ np.mean((y_true-mean_value)**2))
    

In [74]:
import numpy as np
class MyLinearRegression:
    def __init__(self, lr = 10-20, iters = 5000):
        self.lr = lr
        self.iters = iters

    def transform_(self, x):
        return np.concatenate((np.ones((len(x), 1)), x), axis = 1)

    def loss_func(self, x, y, w):
        return sum((y - np.dot(x, w)) ** 2) / x.shape[0]

    def fit(self, x, y):
        dist = np.inf
        eps = 1e-20
        X = self.transform_(x)

        w = np.zeros(X.shape[1])
        iter = 0

        while iter <= self.iters:
            loss = self.loss_func(X, y, w)
            w = w - self.lr * 2 * np.dot(X.T, np.dot(X, w) - y) / X.shape[0]
            dist = np.abs(loss - self.loss_func(X, y, w))
            iter += 1
            
            if(dist <= eps):
                break
        print(iter)
        self.w = w

    def predict(self, x):
        return np.dot(self.transform_(x), self.w)

In [75]:
data= pd.read_csv("../data/trip_duration_task_m.csv")
data.drop(["Unnamed: 0"], axis=1, inplace=True)


In [76]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199494 entries, 0 to 199493
Data columns (total 5 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   pickup_longitude   199494 non-null  float64
 1   pickup_latitude    199494 non-null  float64
 2   dropoff_longitude  199494 non-null  float64
 3   dropoff_latitude   199494 non-null  float64
 4   trip_duration      199494 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 7.6 MB


In [77]:
y = data['trip_duration']
X = data.drop(['trip_duration'], axis=1)

In [78]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [79]:
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_std = pd.DataFrame(standardScaler.transform(X_train), columns=X.columns)
X_test_std = pd.DataFrame(standardScaler.transform(X_test), columns=X.columns)
X_train_std.describe()

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
count,159595.0,159595.0,159595.0,159595.0
mean,-8.277459e-15,5.941183e-14,-1.374316e-13,5.197026e-14
std,1.000003,1.000003,1.000003,1.000003
min,-10.80097,-15.83435,-18.0399,-13.95213
25%,-0.4694727,-0.4735441,-0.4790271,-0.4853304
50%,-0.2133119,0.1093516,-0.1705333,0.08211502
75%,0.1517462,0.6089575,0.2741389,0.548689
max,87.91257,59.80931,92.6312,51.94664


In [80]:
minMaxScaler = MinMaxScaler()
minMaxScaler.fit(X_train)
X_train_mms = pd.DataFrame(minMaxScaler.transform(X_train), columns=X.columns)
X_test_mms = pd.DataFrame(minMaxScaler.transform(X_test), columns=X.columns)
X_train_mms.describe()

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
count,159595.0,159595.0,159595.0,159595.0
mean,0.109417,0.209328,0.163005,0.211721
std,0.01013,0.01322,0.009036,0.015175
min,0.0,0.0,0.0,0.0
25%,0.104661,0.203068,0.158676,0.204356
50%,0.107256,0.210774,0.161464,0.212967
75%,0.110955,0.217379,0.165482,0.220047
max,1.0,1.0,1.0,1.0


In [81]:
np.set_printoptions(suppress=True)

In [82]:
myLinearRegression = MyLinearRegression()
model = myLinearRegression.fit(X_train_std, y_train)
y_pred_pf = myLinearRegression.predict(X_test_std)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred_pf)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred_pf)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred_pf)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred_pf)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred_pf):.7f}')

  w = w - self.lr * 2 * np.dot(X.T, np.dot(X, w) - y) / X.shape[0]


5001
MAE: nan
MSE: nan
RMSE: nan
MAPE: nan
R^2: nan


In [83]:
myLinearRegression = MyLinearRegression()
model = myLinearRegression.fit(X_train_mms, y_train)
y_pred_pf = myLinearRegression.predict(X_test_mms)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred_pf)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred_pf)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred_pf)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred_pf)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred_pf):.7f}')

  w = w - self.lr * 2 * np.dot(X.T, np.dot(X, w) - y) / X.shape[0]


5001
MAE: nan
MSE: nan
RMSE: nan
MAPE: nan
R^2: nan


In [84]:
import numpy as np
class MyRidge:
    def __init__( self, learning_rate, iterations, alpha ) :
          
        self.learning_rate = learning_rate        
        self.iterations = iterations        
        self.alpha = alpha

    def transform_(self, x):
        return np.concatenate((np.ones((len(x), 1)), x), axis = 1)

    def loss_func(self, x, y, w):
        sum((y - np.dot(x, w)) ** 2) / x.shape[0] + self.alpha * sum(w**2) /x.shape[0]

    def fit(self, x, y):
        dist = np.inf
        eps = 1e-4
        X = self.transform_(x)

        w = np.zeros(X.shape[1])
        iter = 0

        while iter <= self.iterations:
            loss = self.loss_func(X, y, w)
            y_pred = self.predict(x)
            dW = ( - ( 2 * ( X.T ).dot( y - y_pred ) ) +               
               ( 2 * self.alpha * w ) ) / X.shape[0]
            w = w - self.lr * dW
            dist = np.abs(loss - self.loss_func(X, y, w))
            iter += 1
            
            if(dist <+ eps):
                break
        print(iter)
        self.w = w

    def predict(self, x):
        return np.dot(self.transform_(x), self.w)

In [85]:
ridge = MyRidge(0.01, 5000, 0.5)
model = ridge.fit(X_train_mms, y_train)
y_pred = model.predict(X_test_mms)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred):.7f}')

AttributeError: 'MyRidge' object has no attribute 'w'