In [28]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

import tensorflow as tf
from tensorflow import keras
from keras import metrics


from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [2]:
# calculate mse

def calculate_mse(y_pred, y_true):

    mse = np.array([])

    y_pred_im = np.reshape(y_pred, (y_true.shape[0], 94640))
    y_true_im = np.reshape(y_true,  (y_true.shape[0], 94640))

    for i in range(y_pred_im.shape[0]):

        image_pred = y_pred_im[i]
        image_true = y_true_im[i]
        diff = np.subtract(image_true, image_pred)
        se = np.square(diff)
        mse_im = np.average(se)

        mse = np.append(mse, mse_im)
    
    mse_overall = np.average(mse)
    return mse_overall

In [3]:
# calculate mae 

def calculate_mae(y_pred, y_true):

    mae = np.array([])

    y_pred_im = np.reshape(y_pred, (y_true.shape[0], 94640))
    y_true_im = np.reshape(y_true,  (y_true.shape[0], 94640))

    for i in range(y_pred_im.shape[0]):

        image_pred = y_pred_im[i]
        image_true = y_true_im[i]

        mae_for_image = np.average(np.abs(np.subtract(image_true, image_pred)))

    mae = np.append(mae, mae_for_image)

    mae_overall = np.average(mae)

    return mae_overall

In [4]:
# calculate r2

def calculate_r2(y_pred, y_true):

    r2_overall = r2_score(y_true, y_pred)
    
    return r2_overall

In [6]:
x_train_all = np.load('207_data/saved_data/x_train_3500_balanced.npy')
y_train_all = np.load('207_data/saved_data/y_train_3500_balanced.npy')

rows_id = random.sample(range(0, 3500), 350)
all_rows = np.arange(0, 3500)

train_rows_id = np.delete(all_rows, rows_id)

x_train = x_train_all[train_rows_id, :, :]
y_train_as_im = y_train_all[train_rows_id, :, :]

x_train = np.reshape(x_train/255, (298116000, 3))
y_train = y_train_as_im.flatten()

In [7]:
x_test = np.load('207_data/saved_data/x_test_1500_balanced.npy')
x_test = np.reshape(x_test/255, (141960000, 3))
y_test_as_im = np.load('207_data/saved_data/y_test_1500_balanced.npy')
y_test = y_test_as_im.flatten()

In [9]:
x_train_seq_all = np.load('207_data/saved_data/x_train_sequential_pixel.npy')
y_train_seq_all = np.load('207_data/saved_data/y_train_sequential.npy').flatten()

x_test_seq = np.load('207_data/saved_data/x_test_sequential_pixel.npy')
y_test_seq = np.load('207_data/saved_data/y_test_sequential.npy').flatten()

In [10]:
seq_train_rows = random.sample(range(0, 85176000), 8517600)

x_train_seq = x_train_seq_all[seq_train_rows, :]
y_train_seq = y_train_seq_all[seq_train_rows]

In [11]:
print(x_train_seq.shape, y_train_seq.shape)

(8517600, 4, 3) (8517600,)


In [12]:
%%time
grad_mod = HistGradientBoostingRegressor(learning_rate = 1, max_depth = 50,
                                         min_samples_leaf = 20, max_iter = 50, verbose = 0).fit(x_train, y_train)

Wall time: 28min 17s


In [13]:
y_pred = grad_mod.predict(x_train)

mse = calculate_mse(y_pred, y_train_as_im)
rmse = np.sqrt(mse)
mae = calculate_mae(y_pred, y_train_as_im)
r2 = calculate_r2(y_pred, y_train)

print("Training MSE", mse)
print("Training RMSE: ", rmse)
print("Training MAE: ", mae)
print("Training R2: ", r2)

Training MSE 137.24708672898618
Training RMSE:  11.715250177823185
Training MAE:  2.455432161247555
Training R2:  0.8825065303500381


In [14]:
y_pred_ts = grad_mod.predict(x_test)

mse = calculate_mse(y_pred_ts, y_test_as_im)
rmse = np.sqrt(mse)
mae = calculate_mae(y_pred_ts, y_test_as_im)
r2 = calculate_r2(y_pred_ts, y_test)

print("Testing MSE", mse)
print("Testing RMSE: ", rmse)
print("Testing MAE: ", mae)
print("Testing R2: ", r2)

Testing MSE 107.588253183262
Testing RMSE:  10.372475749948128
Testing MAE:  3.645828556415575
Testing R2:  0.9032085582141669


In [15]:
%%time
tree_mod = DecisionTreeRegressor(max_depth = 50, min_samples_leaf = 1, random_state = 0).fit(x_train, y_train)

Wall time: 17min 42s


In [16]:
y_pred = tree_mod.predict(x_train)

mse = calculate_mse(y_pred, y_train_as_im)
rmse = np.sqrt(mse)
mae = calculate_mae(y_pred, y_train_as_im)
r2 = calculate_r2(y_pred, y_train)

print("Training MSE", mse)
print("Training RMSE: ", rmse)
print("Training MAE: ", mae)
print("Training R2: ", r2)

Training MSE 135.9842729056451
Training RMSE:  11.66122947658801
Training MAE:  2.5232251129537704
Training R2:  0.8835875906563992


In [17]:
y_pred_ts = tree_mod.predict(x_test)

mse = calculate_mse(y_pred_ts, y_test_as_im)
rmse = np.sqrt(mse)
mae = calculate_mae(y_pred_ts, y_test_as_im)
r2 = calculate_r2(y_pred_ts, y_test)

print("Testing MSE", mse)
print("Testing RMSE: ", rmse)
print("Testing MAE: ", mae)
print("Testing R2: ", r2)

Testing MSE 106.99605463849102
Testing RMSE:  10.343889724783951
Testing MAE:  3.6332071157727848
Testing R2:  0.9037413278175013


In [18]:
tf.keras.backend.clear_session()
tf.random.set_seed(0)
np.random.seed(0)

rnn_model = tf.keras.Sequential()

    # Input layer
rnn_model.add(tf.keras.layers.SimpleRNN(
        units=150,                      # Number of neurons in the hidden layer
        input_shape=(4,3),             # Input dimension
        activation='tanh'             # Activation function (e.g., 'relu')
    ))
rnn_model.add(tf.keras.layers.Dense(
    units = 10,
    activation = 'relu'
))

    # Output layer
rnn_model.add(tf.keras.layers.Dense(
    units=1,                      # Output dimension (1 for regression)
    activation='linear'           # Linear activation for regression
))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

rnn_model.compile(loss='mse', optimizer=optimizer)
rnn_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 150)               23100     
_________________________________________________________________
dense (Dense)                (None, 10)                1510      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 11        
Total params: 24,621
Trainable params: 24,621
Non-trainable params: 0
_________________________________________________________________


In [19]:
%%time

history = rnn_model.fit(
  x = x_train_seq_all,
  y = y_train_seq_all,
  validation_split=0.1,  # use 10% of the examples as a validation set
  batch_size = 500,
  epochs = 3
)

history = pd.DataFrame(history.history)
display(history)

Epoch 1/3
Epoch 2/3
Epoch 3/3


Unnamed: 0,loss,val_loss
0,123.900177,92.534157
1,115.7304,83.582397
2,114.334999,83.890816


Wall time: 52min 53s


In [21]:
y_pred = rnn_model.predict(x_train_seq)

mse = mean_squared_error(y_pred, y_train_seq)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_pred, y_train_seq)
r2 = calculate_r2(y_pred, y_train_seq)

print("Training MSE", mse)
print("Training RMSE: ", rmse)
print("Training MAE: ", mae)
print("Training R2: ", r2)

Training MSE 110.45064288594631
Training RMSE:  10.509550080091264
Training MAE:  3.1421323874828047
Training R2:  0.9054602575591243


In [22]:
y_pred_ts = rnn_model.predict(x_test_seq)

mse = mean_squared_error(y_pred_ts, y_test_seq)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_pred_ts, y_test_seq)
r2 = calculate_r2(y_pred_ts, y_test_seq)

print("Testing MSE", mse)
print("Testing RMSE: ", rmse)
print("Testing MAE: ", mae)
print("Testing R2: ", r2)

Testing MSE 74.9488119085823
Testing RMSE:  8.657298187574591
Testing MAE:  2.7100338626821787
Testing R2:  0.9331857678748119


In [29]:
%%time
bagging_mod = BaggingRegressor(n_estimators = 3, max_samples = 0.5, verbose = 1).fit(x_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Wall time: 33min


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 33.0min finished


In [30]:
y_pred = bagging_mod.predict(x_train)

mse = calculate_mse(y_pred, y_train_as_im)
rmse = np.sqrt(mse)
mae = calculate_mae(y_pred, y_train_as_im)
r2 = calculate_r2(y_pred, y_train)

print("Training MSE", mse)
print("Training RMSE: ", rmse)
print("Training MAE: ", mae)
print("Training R2: ", r2)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.0min finished


Training MSE 136.20576273439048
Training RMSE:  11.670722459830431
Training MAE:  2.5233275519830656
Training R2:  0.883397979284007


In [31]:
y_pred_ts = bagging_mod.predict(x_test)

mse = calculate_mse(y_pred_ts, y_test_as_im)
rmse = np.sqrt(mse)
mae = calculate_mae(y_pred_ts, y_test_as_im)
r2 = calculate_r2(y_pred_ts, y_test)

print("Testing MSE", mse)
print("Testing RMSE: ", rmse)
print("Testing MAE: ", mae)
print("Testing R2: ", r2)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.2min finished


Testing MSE 107.00480244320266
Testing RMSE:  10.344312565037981
Testing MAE:  3.632696275472138
Testing R2:  0.903733457881839
