In [24]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

In [25]:
df = pd.read_csv('/kaggle/input/uber-fares-dataset/uber.csv')
df

Unnamed: 0.1,Unnamed: 0,key,fare_amount,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
0,24238194,2015-05-07 19:52:06.0000003,7.5,2015-05-07 19:52:06 UTC,-73.999817,40.738354,-73.999512,40.723217,1
1,27835199,2009-07-17 20:04:56.0000002,7.7,2009-07-17 20:04:56 UTC,-73.994355,40.728225,-73.994710,40.750325,1
2,44984355,2009-08-24 21:45:00.00000061,12.9,2009-08-24 21:45:00 UTC,-74.005043,40.740770,-73.962565,40.772647,1
3,25894730,2009-06-26 08:22:21.0000001,5.3,2009-06-26 08:22:21 UTC,-73.976124,40.790844,-73.965316,40.803349,3
4,17610152,2014-08-28 17:47:00.000000188,16.0,2014-08-28 17:47:00 UTC,-73.925023,40.744085,-73.973082,40.761247,5
...,...,...,...,...,...,...,...,...,...
199995,42598914,2012-10-28 10:49:00.00000053,3.0,2012-10-28 10:49:00 UTC,-73.987042,40.739367,-73.986525,40.740297,1
199996,16382965,2014-03-14 01:09:00.0000008,7.5,2014-03-14 01:09:00 UTC,-73.984722,40.736837,-74.006672,40.739620,1
199997,27804658,2009-06-29 00:42:00.00000078,30.9,2009-06-29 00:42:00 UTC,-73.986017,40.756487,-73.858957,40.692588,2
199998,20259894,2015-05-20 14:56:25.0000004,14.5,2015-05-20 14:56:25 UTC,-73.997124,40.725452,-73.983215,40.695415,1


- **key**: a unique identifier for each trip
- **fare_amount**: the cost of each trip in usd
- **pickup_datetime**: date and time when the meter was engaged
- **pickup_longitude**: the longitude where the meter was engaged
- **pickup_latitude**: the latitude where the meter was engaged
- **dropoff_longitude**: the longitude where the meter was disengaged
- **dropoff_latitude**: the latitude where the meter was disengaged
- **passenger_count**: the number of passengers in the vehicle (driver entered value)

In [26]:
df.dtypes

Unnamed: 0             int64
key                   object
fare_amount          float64
pickup_datetime       object
pickup_longitude     float64
pickup_latitude      float64
dropoff_longitude    float64
dropoff_latitude     float64
passenger_count        int64
dtype: object

In [27]:
df = df.drop(columns=['Unnamed: 0', 'key'])
df = df.dropna()
df

Unnamed: 0,fare_amount,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
0,7.5,2015-05-07 19:52:06 UTC,-73.999817,40.738354,-73.999512,40.723217,1
1,7.7,2009-07-17 20:04:56 UTC,-73.994355,40.728225,-73.994710,40.750325,1
2,12.9,2009-08-24 21:45:00 UTC,-74.005043,40.740770,-73.962565,40.772647,1
3,5.3,2009-06-26 08:22:21 UTC,-73.976124,40.790844,-73.965316,40.803349,3
4,16.0,2014-08-28 17:47:00 UTC,-73.925023,40.744085,-73.973082,40.761247,5
...,...,...,...,...,...,...,...
199995,3.0,2012-10-28 10:49:00 UTC,-73.987042,40.739367,-73.986525,40.740297,1
199996,7.5,2014-03-14 01:09:00 UTC,-73.984722,40.736837,-74.006672,40.739620,1
199997,30.9,2009-06-29 00:42:00 UTC,-73.986017,40.756487,-73.858957,40.692588,2
199998,14.5,2015-05-20 14:56:25 UTC,-73.997124,40.725452,-73.983215,40.695415,1


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 199999 entries, 0 to 199999
Data columns (total 7 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   fare_amount        199999 non-null  float64
 1   pickup_datetime    199999 non-null  object 
 2   pickup_longitude   199999 non-null  float64
 3   pickup_latitude    199999 non-null  float64
 4   dropoff_longitude  199999 non-null  float64
 5   dropoff_latitude   199999 non-null  float64
 6   passenger_count    199999 non-null  int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 12.2+ MB


### Pickup datetime

The most important part of the pickup datetime is actually the hour it started. It can in the morning, afternoon, evening, rush time, and that influences the Uber fare.

In [29]:
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'], utc=True)

In [30]:
df['hour'] = df['pickup_datetime'].dt.hour

def get_time_of_day(hour):
    if 7 <= hour <= 9 or 16 <= hour <= 18:
        return 'Rush Hour'
    elif 0 <= hour < 6:
        return 'Early Morning'
    elif 6 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 18:
        return 'Afternoon'
    else:
        return 'Evening'

df['time_of_day'] = df['hour'].apply(get_time_of_day)
df['day_of_week'] = df['pickup_datetime'].dt.day_name()
df = pd.get_dummies(df, columns=['time_of_day', 'day_of_week'], prefix='', prefix_sep='', dtype='float')
df = df.drop(columns=['pickup_datetime'])
df

Unnamed: 0,fare_amount,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count,hour,Afternoon,Early Morning,Evening,Morning,Rush Hour,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
0,7.5,-73.999817,40.738354,-73.999512,40.723217,1,19,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,7.7,-73.994355,40.728225,-73.994710,40.750325,1,20,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,12.9,-74.005043,40.740770,-73.962565,40.772647,1,21,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,5.3,-73.976124,40.790844,-73.965316,40.803349,3,8,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,16.0,-73.925023,40.744085,-73.973082,40.761247,5,17,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,3.0,-73.987042,40.739367,-73.986525,40.740297,1,10,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
199996,7.5,-73.984722,40.736837,-74.006672,40.739620,1,1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
199997,30.9,-73.986017,40.756487,-73.858957,40.692588,2,0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
199998,14.5,-73.997124,40.725452,-73.983215,40.695415,1,14,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [31]:
training_percentage = 0.80
examples_size = len(df)
training_size = int(examples_size * training_percentage)
data = np.array(df)
np.random.shuffle(data)

training_data = data[:training_size].T
test_data = data[training_size:].T

Y_training = training_data[0].reshape(-1, 1)
X_training = np.delete(training_data, 0, axis=0).T

Y_test = test_data[0].reshape(-1, 1)
X_test = np.delete(test_data, 0, axis=0).T

In [32]:
X_training.shape, X_test.shape

((159999, 18), (40000, 18))

In [33]:
Y_training.shape, Y_test.shape

((159999, 1), (40000, 1))

## Scaling Features

In [34]:
X_scaler = StandardScaler()
Y_scaler = StandardScaler()

X_training_scaled = X_scaler.fit_transform(X_training)
X_test_scaled = X_scaler.transform(X_test)

Y_training_scaled = Y_scaler.fit_transform(Y_training.reshape(-1, 1)).ravel()
Y_test_scaled = Y_scaler.transform(Y_test.reshape(-1, 1)).ravel()

In [35]:
X_training_scaled.shape, Y_training_scaled.shape

((159999, 18), (159999,))

In [36]:
X_test_scaled.shape, Y_test_scaled.shape

((40000, 18), (40000,))

### Basic hyperparameters

In [37]:
model = XGBRegressor(objective='reg:squarederror', 
                     n_estimators=1000, # Number of boosting rounds
                     learning_rate=0.1, # Step size at each iteration
                     max_depth=10 # Maximum depth of a tree
                    )

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

KeyboardInterrupt: 

### Without scaling

In [None]:
model = XGBRegressor(objective='reg:squarederror', 
                     n_estimators=1000, # Number of boosting rounds
                     learning_rate=0.1, # Step size at each iteration
                     max_depth=10 # Maximum depth of a tree
                    )

model.fit(X_training, Y_training)
prediction = model.predict(X_test)

mse = mean_squared_error(Y_test, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test, prediction)
print(f"R²: {r2}")

### New hyperparameters

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2000,
    learning_rate=0.05,
    max_depth=6,  
    subsample=0.8,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=0.1,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Reg Alpha: 0.1 -> 10

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2000,
    learning_rate=0.05,
    max_depth=6,  
    subsample=0.8,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Learning Rate: 0.5 -> 0.1

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2000,
    learning_rate=0.01,
    max_depth=6,  
    subsample=0.8,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## N Estimators: 2000 -> 5000

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=6,  
    subsample=0.8,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Subsample: 0.8 -> 0.85

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=6,  
    subsample=0.85,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Subsample: 0.85 -> 0.9

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=6,  
    subsample=0.9,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Subsample: 0.9 -> 0.95

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=6,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.8,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Colsample bytree: 0.8 -> 0.9

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=6,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Max Depth: 6 -> 5

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=5,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.1,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Gamma: 0.1 -> 0.01

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.01,
    max_depth=5,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.01,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## Learning Rate: 0.01 -> 0.009

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=5000,
    learning_rate=0.009,
    max_depth=5,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.01,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## N Estimators: 5000 -> 6000

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=6000,
    learning_rate=0.009,
    max_depth=5,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.01,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

## N Estimators: 6000 -> 7000

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=7000,
    learning_rate=0.009,
    max_depth=5,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.01,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

In [None]:
model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=8000,
    learning_rate=0.009,
    max_depth=5,  
    subsample=0.95,  # Helps generalization
    colsample_bytree=0.9,  # Random feature selection per tree
    gamma=0.01,  # Reduces overfitting
    reg_alpha=10,  # L1 regularization
    reg_lambda=0.1,  # L2 regularization
    random_state=42
)

model.fit(X_training_scaled, Y_training_scaled)
prediction = model.predict(X_test_scaled)

mse = mean_squared_error(Y_test_scaled, prediction)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y_test_scaled, prediction)
print(f"R²: {r2}")

In [39]:
class NeuralNetwork:
    def __init__(self, X_train, Y_train, X_test, Y_test, LR, iterations, layer_dimensions):
        self.X_train = X_train
        self.Y_train = Y_train
        self.X_test = X_test
        self.Y_test = Y_test
        self.LR = LR
        self.iterations = iterations
        self.layer_dimensions = layer_dimensions
        self.n, self.m = self.X_train.shape

        W, B = self.init_params(layer_dimensions)
        self.W = W
        self.B = B

        self.Z = [None for i in range(len(layer_dimensions))]
        self.Z_test = [None for i in range(len(layer_dimensions))]
        self.A = [X_train] + [None for i in range(len(layer_dimensions) - 1)]
        self.A_test = [X_test] + [None for i in range(len(layer_dimensions) - 1)]
        self.dZ = [None for i in range(len(layer_dimensions))]
        self.dW = [None for i in range(len(layer_dimensions))]
        self.dB = [None for i in range(len(layer_dimensions))]

    def init_params(self, layer_dimensions):
        W = [None]
        B = [None]
    
        for l in range(1, len(layer_dimensions)):
            current_layer_dimension = layer_dimensions[l]
            previous_layer_dimension = layer_dimensions[l - 1]
            w = np.random.randn(current_layer_dimension, previous_layer_dimension) * np.sqrt(2 / previous_layer_dimension)
            b = np.random.randn(current_layer_dimension, 1)
            W.append(w)
            B.append(b)

        return W, B

    def LeakyReLU(self, Z, alpha=0.01):
        return np.where(Z > 0, Z, alpha * Z)
    
    def derivative_of_LeakyReLU(self, Z, alpha=0.01):
        return np.where(Z > 0, 1, alpha)

    def mean_squared_error(self, Y_true, Y_pred):
        return np.mean((Y_true.reshape(-1, 1) - Y_pred.T) ** 2)

    def r2_score(self, Y_true, Y_pred):
        residual_sum_of_squares = np.sum((Y_true - Y_pred) ** 2)
        total_sum_of_squares = np.sum((Y_true - np.mean(Y_true)) ** 2)
        return 1 - (residual_sum_of_squares / total_sum_of_squares)

    def derivative_of_mse(self, Y_true, prediction):
        # f(Ŷ) = (Y - Ŷ)²
        # u(Ŷ) = (Y - Ŷ)
        # u'(Ŷ) = -1
        # f'(Ŷ) = (u²)'•-1 = 2u•-1
        # f'(Ŷ) = 2(Ŷ - Y)
        return 2 * (prediction - Y_true)
    
    def _is_last_layer(self, layer):
        return layer == len(self.layer_dimensions) - 1

    def forward_propagation(self):
        for layer in range(1, len(self.layer_dimensions)):
            self.Z[layer] = self.W[layer].dot(self.A[layer - 1]) + self.B[layer]

            if self._is_last_layer(layer):
                self.A[layer] = self.Z[layer]
            else:
                self.A[layer] = self.LeakyReLU(self.Z[layer])
            
            has_nan = np.isnan(self.A[layer]).any()
            print(f'layer {layer}: ', has_nan)

    def backward_propagation(self, lambda_reg=0.01):
        for layer in range(len(self.layer_dimensions) - 1, 0, -1):
            if self._is_last_layer(layer):
                self.dZ[layer] = self.derivative_of_mse(self.Y_train, self.A[layer])
            else:
                self.dZ[layer] = self.W[layer + 1].T.dot(self.dZ[layer + 1]) * self.derivative_of_LeakyReLU(self.Z[layer])

            self.dW[layer] = 1 / self.m * self.dZ[layer].dot(self.A[layer - 1].T) + (lambda_reg / self.m) * self.W[layer]
            self.dB[layer] = 1 / self.m * np.sum(self.dZ[layer]) 
    
    def update_params(self):
        for layer in range(1, len(self.layer_dimensions)):
            self.W[layer] = self.W[layer] - self.LR * self.dW[layer]
            self.B[layer] = self.B[layer] - self.LR * self.dB[layer]

    def predict(self):
        for layer in range(1, len(self.layer_dimensions)):
            self.Z_test[layer] = self.W[layer].dot(self.A_test[layer - 1]) + self.B[layer]

            if self._is_last_layer(layer):
                self.A_test[layer] = self.Z_test[layer]
            else:
                self.A_test[layer] = self.LeakyReLU(self.Z_test[layer])

        return self.A_test[-1]

    def gradient_descent(self):
        train = []
        test = []
        
        for i in range(self.iterations + 1):
            print("Iteration: ", i)
            self.forward_propagation()
            self.backward_propagation()
            self.update_params()

            training_loss = self.mean_squared_error(self.Y_train, self.A[-1])
            training_r2 = self.r2_score(self.Y_train, self.A[-1])
            test_predictions = self.predict()
            test_loss = self.mean_squared_error(self.Y_test, test_predictions)
            test_r2 = self.r2_score(self.Y_test, test_predictions)
    
            train.append(training_loss)
            test.append(test_loss)
    
            if i % 10 == 0:
                print('- training_loss: ', training_loss)
                print('- training_r2: ', training_r2)
                print('- test_loss: ', test_loss)
                print('- test_r2: ', test_r2)
    
        return train, test

LR = 0.1
ITERATIONS = 100
layer_dimensions = [18, 18, 18, 18, 18, 18, 1]

nn = NeuralNetwork(X_training_scaled.T, Y_training_scaled.T, X_test_scaled.T, Y_test_scaled.T, LR, ITERATIONS, layer_dimensions)
train, test = nn.gradient_descent()

Iteration:  0
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
- training_loss:  1.5281260099465377
- training_r2:  -0.5281260099465377
- test_loss:  9.699960684725378
- test_r2:  -8.060421231891684
Iteration:  1
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
Iteration:  2
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
Iteration:  3
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
Iteration:  4
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
Iteration:  5
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
Iteration:  6
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
layer 6:  False
Iteration:  7
layer 1:  False
layer 2:  False
layer 3:  False
layer 4:  False
layer 5:  False
