#### CSC 180 Intelligenct Systems (Fall 2021)

#### Dr. Haiquan Chen, Dept of Computer Scicence

#### California State University, Sacramento

## Lab 6: Evaluating Neural Networks 


# Helpful Functions for Tensorflow (little gems)

The following functions will be used with TensorFlow to help preprocess the data.  They allow you to build the feature vector for a neural network. 

* Predictors/Inputs 
    * Fill any missing inputs with the median for that column.  Use **missing_median**.
    * Encode textual/categorical values with **encode_text_dummy**.
    * Encode numeric values with **encode_numeric_zscore**.
* Output
    * Discard rows with missing outputs.
    * Encode textual/categorical values with **encode_text_index**.
    * Do not encode output numeric values.
* Produce final feature vectors (x) and expected output (y) with **to_xy**.

In [1]:
from collections.abc import Sequence
from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shutil
import os


# Encode text values to dummy variables(i.e. [1,0,0],[0,1,0],[0,0,1] for red,green,blue)
def encode_text_dummy(df, name):
    dummies = pd.get_dummies(df[name])
    for x in dummies.columns:
        dummy_name = "{}-{}".format(name, x)
        df[dummy_name] = dummies[x]
    df.drop(name, axis=1, inplace=True)


# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_


# Encode a numeric column as zscores
def encode_numeric_zscore(df, name, mean=None, sd=None):
    if mean is None:
        mean = df[name].mean()

    if sd is None:
        sd = df[name].std()

    df[name] = (df[name] - mean) / sd


# Convert all missing values in the specified column to the median
def missing_median(df, name):
    med = df[name].median()
    df[name] = df[name].fillna(med)


# Convert all missing values in the specified column to the default
def missing_default(df, name, default_value):
    df[name] = df[name].fillna(default_value)


# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column. 
    target_type = df[target].dtypes
    target_type = target_type[0] if isinstance(target_type, Sequence) else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df[result].values.astype(np.float32), dummies.values.astype(np.float32)
    else:
        # Regression
        return df[result].values.astype(np.float32), df[target].values.astype(np.float32)

# Nicely formatted time string
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)


# Regression chart.
def chart_regression(pred,y,sort=True):
    t = pd.DataFrame({'pred' : pred, 'y' : y.flatten()})
    if sort:
        t.sort_values(by=['y'],inplace=True)
    a = plt.plot(t['y'].tolist(),label='expected')
    b = plt.plot(t['pred'].tolist(),label='prediction')
    plt.ylabel('output')
    plt.legend()
    plt.show()

# Remove all rows where the specified column is +/- sd standard deviations
def remove_outliers(df, name, sd):
    drop_rows = df.index[(np.abs(df[name] - df[name].mean()) >= (sd * df[name].std()))]
    df.drop(drop_rows, axis=0, inplace=True)


# Encode a column to a range between normalized_low and normalized_high.
def encode_numeric_range(df, name, normalized_low=-1, normalized_high=1,
                         data_low=None, data_high=None):
    if data_low is None:
        data_low = min(df[name])
        data_high = max(df[name])

    df[name] = ((df[name] - data_low) / (data_high - data_low)) \
               * (normalized_high - normalized_low) + normalized_low


# Training with a Test Set with Early Stopping

**Overfitting** occurs when a neural network is trained to the point that it begins to memorize rather than generalize.  

![Training vs Validation Error for Overfitting](https://raw.githubusercontent.com/jeffheaton/t81_558_deep_learning/master/images/class_3_training_val.png "Training vs Validation Error for Overfitting")


### Split data into training and test using train_test_split

In [19]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split

from sklearn import metrics

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping

path = "./data/"
    
filename = os.path.join(path,"iris.csv")    
df = pd.read_csv(filename,na_values=['NA','?'])

species = encode_text_index(df,"species")

x,y = to_xy(df,"species")

# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

model = Sequential()

model.add(Dense(10, input_dim=x.shape[1], activation='relu'))
model.add(Dense(5,activation='relu'))
model.add(Dense(y.shape[1],activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=2, verbose=2, mode='auto')  

# patience: number of epochs with no improvement after which training will be stopped

# The test set is checked during training to monitor progress for early stopping but is never used for gradient descent (model training)

model.fit(x_train, y_train, validation_data=(x_test,y_test), callbacks=[monitor], verbose=2, epochs=1000)  


Epoch 1/1000
4/4 - 0s - loss: 2.0644 - val_loss: 1.9591
Epoch 2/1000
4/4 - 0s - loss: 1.8332 - val_loss: 1.7179
Epoch 3/1000
4/4 - 0s - loss: 1.6111 - val_loss: 1.4978
Epoch 4/1000
4/4 - 0s - loss: 1.4143 - val_loss: 1.2985
Epoch 5/1000
4/4 - 0s - loss: 1.2385 - val_loss: 1.1274
Epoch 6/1000
4/4 - 0s - loss: 1.0782 - val_loss: 0.9886
Epoch 7/1000
4/4 - 0s - loss: 0.9564 - val_loss: 0.8837
Epoch 8/1000
4/4 - 0s - loss: 0.8787 - val_loss: 0.8123
Epoch 9/1000
4/4 - 0s - loss: 0.8279 - val_loss: 0.7712
Epoch 10/1000
4/4 - 0s - loss: 0.8032 - val_loss: 0.7494
Epoch 11/1000
4/4 - 0s - loss: 0.7865 - val_loss: 0.7357
Epoch 12/1000
4/4 - 0s - loss: 0.7766 - val_loss: 0.7269
Epoch 13/1000
4/4 - 0s - loss: 0.7668 - val_loss: 0.7193
Epoch 14/1000
4/4 - 0s - loss: 0.7570 - val_loss: 0.7128
Epoch 15/1000
4/4 - 0s - loss: 0.7485 - val_loss: 0.7076
Epoch 16/1000
4/4 - 0s - loss: 0.7414 - val_loss: 0.7036
Epoch 17/1000
4/4 - 0s - loss: 0.7344 - val_loss: 0.6997
Epoch 18/1000
4/4 - 0s - loss: 0.7300 - 

<keras.callbacks.History at 0x7f6f8938a490>

Now that the neural network is trained, we can make predictions about the test set.  The following code predicts the type of iris for test set and displays the first five irises. 

In [3]:
pred = model.predict(x_test)
print(pred[0:5]) # print first five predictions

[[3.1041566e-04 8.7565869e-01 1.2403085e-01]
 [6.5769815e-01 2.1284823e-01 1.2945360e-01]
 [2.6612356e-06 1.0935251e-02 9.8906201e-01]
 [4.2240298e-04 7.8712070e-01 2.1245690e-01]
 [1.7658851e-04 8.4447700e-01 1.5534645e-01]]


Each line provides the probability that the iris is one of the 3 types of iris in the data set. 

### Saving Best Weights

It would be good idea to keep track of the most optimal weights during the entire training operation.  


An additional monitor, ModelCheckpoint,  is used and saves a copy of the neural network to **best_weights.hdf5** each time the validation score of the neural network improves.  

Once training is done, we just reload this file and we have the optimal training weights that were found.

In [4]:
import pandas as pd
import io
import requests
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint

path = "./data/"
    
filename = os.path.join(path,"iris.csv")    
df = pd.read_csv(filename,na_values=['NA','?'])

species = encode_text_index(df,"species")
x,y = to_xy(df,"species")

# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

model = Sequential()
model.add(Dense(10, input_dim=x.shape[1], activation='relu'))
model.add(Dense(5,activation='relu'))
model.add(Dense(y.shape[1],activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')

checkpointer = ModelCheckpoint(filepath="dnn/best_weights.hdf5", verbose=0, save_best_only=True) # save best model

model.fit(x_train, y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer],verbose=2,epochs=1000)

model.load_weights('dnn/best_weights.hdf5') # load weights from best model

Epoch 1/1000
4/4 - 0s - loss: 1.2744 - val_loss: 1.3051
Epoch 2/1000
4/4 - 0s - loss: 1.2373 - val_loss: 1.2699
Epoch 3/1000
4/4 - 0s - loss: 1.2084 - val_loss: 1.2342
Epoch 4/1000
4/4 - 0s - loss: 1.1826 - val_loss: 1.1993
Epoch 5/1000
4/4 - 0s - loss: 1.1537 - val_loss: 1.1697
Epoch 6/1000
4/4 - 0s - loss: 1.1301 - val_loss: 1.1418
Epoch 7/1000
4/4 - 0s - loss: 1.1088 - val_loss: 1.1147
Epoch 8/1000
4/4 - 0s - loss: 1.0863 - val_loss: 1.0914
Epoch 9/1000
4/4 - 0s - loss: 1.0693 - val_loss: 1.0665
Epoch 10/1000
4/4 - 0s - loss: 1.0502 - val_loss: 1.0445
Epoch 11/1000
4/4 - 0s - loss: 1.0335 - val_loss: 1.0244
Epoch 12/1000
4/4 - 0s - loss: 1.0165 - val_loss: 1.0059
Epoch 13/1000
4/4 - 0s - loss: 1.0011 - val_loss: 0.9879
Epoch 14/1000
4/4 - 0s - loss: 0.9868 - val_loss: 0.9690
Epoch 15/1000
4/4 - 0s - loss: 0.9718 - val_loss: 0.9519
Epoch 16/1000
4/4 - 0s - loss: 0.9589 - val_loss: 0.9354
Epoch 17/1000
4/4 - 0s - loss: 0.9447 - val_loss: 0.9199
Epoch 18/1000
4/4 - 0s - loss: 0.9325 - 

### Potential Keras Issue on Small Networks Regarding Saving Optimal Weights

You might occasionally see this error:

```
OSError: Unable to create file (Unable to open file: name = 'dnn/best_weights.hdf5', errno = 22, error message = 'invalid argument', flags = 13, o_flags = 302)
```

Usually you can just run rerun the code and it goes away.  This is an unfortnuate result of saving a file each time the validation score improves (as described in the previous section).  If the errors improve two rapidly, you might try to save the file twice and get an error from these two saves overlapping.  For larger neural networks this will not be a problem because each training step will take longer, allowing for plenty of time for the previous save to complete.   

## Evaluating Classification Models

### (1) Calculate Classification Accuracy/Precision/Recall/F1-Score

By default, Keras will return the predicted probability for each class. We can change these prediction probabilities into the actual iris predicted with **argmax**.

In [20]:
pred = model.predict(x_test)
pred

array([[4.8701959e-03, 8.0260587e-01, 1.9252394e-01],
       [9.8935294e-01, 1.0646974e-02, 1.1855019e-07],
       [2.0035655e-07, 1.6603183e-02, 9.8339659e-01],
       [3.0396341e-03, 7.1296006e-01, 2.8400034e-01],
       [2.3184558e-03, 7.7407336e-01, 2.2360826e-01],
       [9.8045427e-01, 1.9544963e-02, 6.6490134e-07],
       [3.2584552e-02, 9.1902930e-01, 4.8386265e-02],
       [3.6308735e-05, 1.6353524e-01, 8.3642846e-01],
       [5.5003236e-04, 4.1163653e-01, 5.8781344e-01],
       [1.8376991e-02, 9.0531248e-01, 7.6310582e-02],
       [1.8772215e-04, 3.0934802e-01, 6.9046420e-01],
       [9.8539901e-01, 1.4600452e-02, 5.5713917e-07],
       [9.9354607e-01, 6.4538140e-03, 6.0609523e-08],
       [9.8530197e-01, 1.4697556e-02, 4.9057326e-07],
       [9.9201304e-01, 7.9868222e-03, 1.0094357e-07],
       [3.5408696e-03, 7.8793710e-01, 2.0852198e-01],
       [4.0408568e-06, 4.6928771e-02, 9.5306724e-01],
       [1.6913282e-02, 8.8458097e-01, 9.8505765e-02],
       [4.6869460e-03, 7.450

In [21]:
pred = np.argmax(pred,axis=1) # raw probabilities to choose class (highest probability)
print(pred)

[1 0 2 1 1 0 1 2 2 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0]


Now that we have the actual iris flower predicted, we can calculate the percent accuracy (how many were correctly classified).

In [22]:
y_true= np.argmax(y_test,axis=1) 

score = metrics.accuracy_score(y_true, pred)

print("Accuracy score: {}".format(score))

Accuracy score: 0.9736842105263158


In [23]:
score = metrics.precision_score(y_true, pred, average= "weighted")
print("Precision score: {}".format(score))

Precision score: 0.9757085020242916


In [24]:
score = metrics.recall_score(y_true, pred, average= "weighted")
print("Recall score: {}".format(score))

Recall score: 0.9736842105263158


In [25]:
score = metrics.f1_score(y_true, pred, average= "weighted")
print("F1 score: {}".format(score))

F1 score: 0.9735839598997494


In [26]:
print(metrics.classification_report(y_true, pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.91      0.95        11
           2       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38



### (2) Calculate Classification Cross-Entropy Loss (Log Loss)  

Log loss is an error metric that is often used in place of accuracy for classification.  

Log loss allows for "partial credit". For example, a model might be used to classify A, B and C.  The correct answer might be A, however if the classification network chose B as having the highest probability, then accuracy gives the neural network no credit for this classification.  

However, with log loss, the probability of the correct answer is added to the score.  For example, the correct answer might be A, but if the neural network only predicted .4 probability of A being correct, then the value -log(.4) is added.

$$ logloss = -\frac{1}{N}\sum^N_{i=1}\sum^M_{j=1}y_{ij} \log(\hat{y}_{ij}) $$

The following code shows the logloss scores that correspond to the average probablity for the correct item. The **pred** column specifies the average robability for the correct class.  The **logloss** column specifies the log loss for that probability.


Calculating log loss

In [27]:
# Generate predictions
pred = model.predict(x_test)

print("Numpy array of predictions")
print(pred[0:5])
print()
print("y_test:")
print(y_test[0:5])

score = metrics.log_loss(y_test, pred)
print("Log loss score: {}".format(score))

Numpy array of predictions
[[4.8701959e-03 8.0260587e-01 1.9252394e-01]
 [9.8935294e-01 1.0646974e-02 1.1855019e-07]
 [2.0035655e-07 1.6603183e-02 9.8339659e-01]
 [3.0396341e-03 7.1296006e-01 2.8400034e-01]
 [2.3184558e-03 7.7407336e-01 2.2360826e-01]]

y_test:
[[0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]
Log loss score: 0.12896662875764855


## Evaluating Regression Models

Regression results are evaluated differently than classification.  Consider the following code. 

In [28]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn import metrics

path = "./data/"

filename_read = os.path.join(path,"auto-mpg.csv")
df = pd.read_csv(filename_read,na_values=['NA','?'])

cars = df['name']
df.drop('name',1,inplace=True)
missing_median(df, 'horsepower')

encode_text_dummy(df, 'origin')

x,y = to_xy(df,"mpg")

# Split into train/test
x_train, x_test, y_train, y_test = train_test_split (x, y, test_size=0.25, random_state=45)

model = Sequential()
model.add(Dense(10, input_dim=x.shape[1], activation='relu'))
model.add(Dense(10))
model.add(Dense(10))
model.add(Dense(10))

model.add(Dense(1))  # 1 output neuron 


model.compile(loss='mean_squared_error', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')

model.fit(x_train,y_train, validation_data=(x_test,y_test),callbacks=[monitor],verbose=2,epochs=1000)


  df.drop('name',1,inplace=True)


Epoch 1/1000
10/10 - 0s - loss: 558.9835 - val_loss: 460.3647
Epoch 2/1000
10/10 - 0s - loss: 204.8360 - val_loss: 181.8289
Epoch 3/1000
10/10 - 0s - loss: 136.9872 - val_loss: 81.7431
Epoch 4/1000
10/10 - 0s - loss: 110.7115 - val_loss: 78.3472
Epoch 5/1000
10/10 - 0s - loss: 103.3049 - val_loss: 76.0195
Epoch 6/1000
10/10 - 0s - loss: 101.0144 - val_loss: 73.6367
Epoch 7/1000
10/10 - 0s - loss: 97.3610 - val_loss: 71.5221
Epoch 8/1000
10/10 - 0s - loss: 95.1923 - val_loss: 95.8291
Epoch 9/1000
10/10 - 0s - loss: 100.7969 - val_loss: 74.0035
Epoch 10/1000
10/10 - 0s - loss: 100.2045 - val_loss: 96.9955
Epoch 11/1000
10/10 - 0s - loss: 115.4351 - val_loss: 101.0345
Epoch 12/1000
10/10 - 0s - loss: 113.8146 - val_loss: 62.5429
Epoch 13/1000
10/10 - 0s - loss: 84.4479 - val_loss: 89.7855
Epoch 14/1000
10/10 - 0s - loss: 111.8030 - val_loss: 104.4330
Epoch 15/1000
10/10 - 0s - loss: 124.0882 - val_loss: 78.8012
Epoch 16/1000
10/10 - 0s - loss: 90.5690 - val_loss: 78.8363
Epoch 17/1000
10/

<keras.callbacks.History at 0x7f6f892a1400>

### Mean Square Error

The mean square error is the sum of the squared differences between the prediction ($\hat{y}$) and the expected ($y$).  MSE values are not of a particular unit.  If an MSE value has decreased for a model, that is good. Low MSE values are desired.

$ \text{MSE} = \frac{1}{n} \sum_{i=1}^n \left(\hat{y}_i - y_i\right)^2 $


In [14]:
# Predict
pred = model.predict(x_test)

# Measure MSE error.  
score = metrics.mean_squared_error(pred,y_test)
print("Final score (MSE): {}".format(score))

Final score (MSE): 501.094970703125


### Root Mean Square Error

The root mean square (RMSE) is essentially the square root of the MSE.  Because of this, the RMSE error is in the same units as the training data outcome. Low RMSE values are desired.

$ \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^n \left(\hat{y}_i - y_i\right)^2} $

In [15]:
# Measure RMSE error.  RMSE is common for regression.
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("Final score (RMSE): {}".format(score))

Final score (RMSE): 22.385150909423828


# Performance Improvement by Normalizing Features and Tuning Hyperparameters

There are many different settings that you can use for a neural network.  These can affect performance.  The following code changes some of these, beyond their default values:

* **activation:** relu, sigmoid, tanh
* **Layers and Neuron Counts**
* **optimizer:** adam, sgd, rmsprop, and [others](https://keras.io/optimizers/)

In [16]:
import pandas as pd
import io
import requests
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint

path = "./data/"
preprocess = True

filename_read = os.path.join(path,"auto-mpg.csv")
df = pd.read_csv(filename_read,na_values=['NA','?'])

# create feature vector
missing_median(df, 'horsepower')
encode_text_dummy(df, 'origin')
df.drop('name',1,inplace=True)

if preprocess:
    encode_numeric_zscore(df, 'horsepower')
    encode_numeric_zscore(df, 'weight')
    encode_numeric_zscore(df, 'cylinders')
    encode_numeric_zscore(df, 'displacement')
    encode_numeric_zscore(df, 'acceleration')
    encode_numeric_zscore(df, 'year')

# Encode to a 2D matrix for training
x,y = to_xy(df,'mpg')

# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

model = Sequential()
model.add(Dense(100, input_dim=x.shape[1], activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=2, mode='auto')

model.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor],verbose=2,epochs=1000)


Epoch 1/1000


  df.drop('name',1,inplace=True)


10/10 - 0s - loss: 598.0247 - val_loss: 545.8450
Epoch 2/1000
10/10 - 0s - loss: 551.8244 - val_loss: 487.1441
Epoch 3/1000
10/10 - 0s - loss: 479.3139 - val_loss: 396.2396
Epoch 4/1000
10/10 - 0s - loss: 368.0444 - val_loss: 265.5640
Epoch 5/1000
10/10 - 0s - loss: 217.2141 - val_loss: 122.2944
Epoch 6/1000
10/10 - 0s - loss: 83.4488 - val_loss: 35.5629
Epoch 7/1000
10/10 - 0s - loss: 28.8215 - val_loss: 31.6495
Epoch 8/1000
10/10 - 0s - loss: 28.3959 - val_loss: 20.9232
Epoch 9/1000
10/10 - 0s - loss: 19.0158 - val_loss: 15.4415
Epoch 10/1000
10/10 - 0s - loss: 16.1865 - val_loss: 12.8007
Epoch 11/1000
10/10 - 0s - loss: 13.9584 - val_loss: 11.5890
Epoch 12/1000
10/10 - 0s - loss: 12.9410 - val_loss: 10.5097
Epoch 13/1000
10/10 - 0s - loss: 12.0190 - val_loss: 9.4069
Epoch 14/1000
10/10 - 0s - loss: 11.5248 - val_loss: 8.6842
Epoch 15/1000
10/10 - 0s - loss: 11.0827 - val_loss: 8.0587
Epoch 16/1000
10/10 - 0s - loss: 10.6274 - val_loss: 7.8248
Epoch 17/1000
10/10 - 0s - loss: 10.3487

<keras.callbacks.History at 0x7f6f8950c760>

In [17]:
# Predict and measure RMSE
pred = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("Score (RMSE): {}".format(score))

Score (RMSE): 2.3292784690856934


In [18]:
# print out prediction
df_y = pd.DataFrame(y_test, columns=['ground_truth'])
df_pred = pd.DataFrame(pred, columns=['predicted'])
result = pd.concat([df_y, df_pred],axis=1)
result

Unnamed: 0,ground_truth,predicted
0,33.0,34.130035
1,28.0,31.279890
2,19.0,20.470285
3,13.0,14.781211
4,14.0,12.962220
...,...,...
75,19.9,20.778809
76,17.5,17.217361
77,28.0,30.890749
78,29.0,31.005367
