# Life Expectancy

Codacademy Exercise: Implementing Neural Networks

Predict life expectancy using a neural network to perform regression.

In [13]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.optimizers import Adam

In [14]:
# load the dataset into a pandas DataFrame
dataset = pd.read_csv('life_expectancy.csv')

# print the first five entries in the dataset and the summary stats
print(dataset.head(5))
print(dataset.describe())

       country  year      status  adult mortality  infant deaths  alcohol  \
0  Afghanistan  2015  Developing            263.0             62     0.01   
1  Afghanistan  2014  Developing            271.0             64     0.01   
2  Afghanistan  2013  Developing            268.0             66     0.01   
3  Afghanistan  2012  Developing            272.0             69     0.01   
4  Afghanistan  2011  Developing            275.0             71     0.01   

   percentage expenditure  hepatitis b  measles   bmi  ...  total expenditure  \
0               71.279624         65.0     1154  19.1  ...               8.16   
1               73.523582         62.0      492  18.6  ...               8.18   
2               73.219243         64.0      430  18.1  ...               8.13   
3               78.184215         67.0     2787  17.6  ...               8.52   
4                7.097109         68.0     3013  17.2  ...               7.87   

   diphtheria  hiv/aids         gdp  population  t

In [15]:
# remove the country column from the dataset
dataset = dataset.drop(['country'], axis=1)

# split the data into labels and features
labels = dataset.iloc[:, -1] # select the last column
features = dataset.iloc[:, 0:-1] # select all columns except the last

# apply one-hot-encoding to the categorical columns
features = pd.get_dummies(features)

# split the data into a training set and a test set
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.20, random_state=42)

# standardize the numerical features
numerical_features = features.select_dtypes(include=['float64', 'int64'])
numerical_columns = numerical_features.columns
ct = ColumnTransformer([('numeric', StandardScaler(), numerical_columns)], remainder='passthrough')
features_train_scaled = ct.fit_transform(features_train)
features_test_scaled = ct.transform(features_test)

In [16]:
# build the model
num_features = features.shape[1]
my_model = Sequential()
my_model.add(InputLayer(input_shape=(num_features)))
my_model.add(Dense(64, activation = 'relu')) # hidden layer
my_model.add(Dense(1)) # output layer
print(my_model.summary())

# initialize the gradient descent optimizer
opt = Adam(learning_rate = 0.01)

# compile the model
# using mean-squared error as the loss function and mean average error as the metric
my_model.compile(loss = 'mse', metrics = ['mae'], optimizer = opt)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 64)                1408      
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 1473 (5.75 KB)
Trainable params: 1473 (5.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [17]:
# train the model
my_model.fit(features_train_scaled, labels_train, epochs=50, batch_size=1, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f661c6a6ad0>

In [19]:
# evaluate the trained model with the test set
val_mse, val_mae = my_model.evaluate(features_test_scaled,labels_test, verbose=1)
print('MAE: ', val_mae)

MAE:  1.7526745796203613
