In [109]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [110]:
import pandas as pd
import numpy as np

In [111]:
abalone_train = pd.read_csv(
    "https://storage.googleapis.com/download.tensorflow.org/data/abalone_train.csv",
    names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
           "Viscera weight", "Shell weight", "Age"])

abalone_train.head()


Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Age
0,0.435,0.335,0.11,0.334,0.1355,0.0775,0.0965,7
1,0.585,0.45,0.125,0.874,0.3545,0.2075,0.225,6
2,0.655,0.51,0.16,1.092,0.396,0.2825,0.37,14
3,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16
4,0.545,0.42,0.13,0.879,0.374,0.1695,0.23,13


The above section of code loads training data from a csv from an online link. If I want to read a file from my local system, I simply put it's name instead of the URL.

In [112]:
abalone_features = abalone_train.copy()
abalone_labels = abalone_features.pop('Age')

This is some neural network terminology. Features are the input data, the stuff we train our model on. Labels are the ouputs we want our model to predict. It is important to note that features DONOT contain labels. The total data is the union of features and labels, and both of them are mutually exclusive.

It is also important to understand what the above code is doing. The pop method not only returns the thing popped, but also removes it from the thing on which the pop method was applied.

In [113]:
abalone_features = np.array(abalone_features)
abalone_labels = np.array(abalone_labels)

Before we can feed the features into a model for training, we need to have them as a numpy array. It is really easy to convert a pandas datasheet to a numpy array (as done above) simply because these are extremely popular tools with great interoperability. Both features and labels could still work as a pandas datasheet, but it is safer to first convert them to a numpy array as I did above.

In [114]:
abalone_features

array([[0.435 , 0.335 , 0.11  , ..., 0.1355, 0.0775, 0.0965],
       [0.585 , 0.45  , 0.125 , ..., 0.3545, 0.2075, 0.225 ],
       [0.655 , 0.51  , 0.16  , ..., 0.396 , 0.2825, 0.37  ],
       ...,
       [0.53  , 0.42  , 0.13  , ..., 0.3745, 0.167 , 0.249 ],
       [0.395 , 0.315 , 0.105 , ..., 0.1185, 0.091 , 0.1195],
       [0.45  , 0.355 , 0.12  , ..., 0.1145, 0.0665, 0.16  ]])

Now we are done with the data pre-processing. Now it is time to define a model.

In [115]:
abalone_model = tf.keras.Sequential([
    tf.keras.Input(shape=(7,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

abalone_model.compile(
    loss=tf.keras.losses.MeanAbsolutePercentageError(),
    optimizer=tf.keras.optimizers.Adam()
)

In [116]:
abalone_model.fit(abalone_features, abalone_labels, epochs=10)

Epoch 1/10


[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 874us/step - loss: 95.0043
Epoch 2/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 913us/step - loss: 45.0266
Epoch 3/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 832us/step - loss: 19.8204
Epoch 4/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 849us/step - loss: 18.3027
Epoch 5/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 830us/step - loss: 16.7902
Epoch 6/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 884us/step - loss: 16.1082
Epoch 7/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 826us/step - loss: 16.1688
Epoch 8/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 833us/step - loss: 15.8132
Epoch 9/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 844us/step - loss: 16.2812
Epoch 10/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

<keras.src.callbacks.history.History at 0x7f64de69e510>

It is important to choose a loss function that actually conveys meaningful information. For the case of MAPE, a loss of 15 means that the model predictions are on average 15% off from what they should be. This kind of a score should be good enough for most of the purposes.

In [117]:
abalone_model.predict(np.array([
    [0.435,0.335,0.110,0.334,0.1355,0.0775,0.096],
    [0.655,0.510,0.160,1.092,0.3960,0.2825,0.3700]
    ]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step


array([[ 7.4021535],
       [10.590408 ]], dtype=float32)

Always remember that the predict method takes numpy arrays, and not just the usual python lists. Make sure to convert the lists to arrays before passing them in.