In [13]:
#Import libs
import pandas
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [14]:
#Import Data
dataset = pandas.read_csv('lifeExpectancy_data.csv')

In [15]:
#Label and feature selection, train test split

labels = dataset.iloc[:, -1]
features = dataset.iloc[:, 0:-1]
features = pandas.get_dummies(dataset)
features_train, features_test, labels_train,labels_test = train_test_split(features, labels, test_size = 0.2, random_state = 23)

#Selecting numerical features and columns
numerical_features = features.select_dtypes(include=['float64', 'int64'])
numerical_columns = numerical_features.columns

In [16]:
#Using a columntransformer to apply a standard scaler to only numerical columns, so we can keep the categorical values unchanged

ct = ColumnTransformer([("only numeric", StandardScaler(), numerical_columns)], remainder='passthrough')

features_train_scaled = ct.fit_transform(features_train)
features_test_scaled = ct.fit_transform(features_test)

In [17]:
# Initialize a Sequential model
model = Sequential()

# Add an input layer matching the number of features in the dataset
input = InputLayer(input_shape=(features_train.shape[1],))
model.add(input)


# Add a dense hidden layer with 64 neurons and ReLU activation
model.add(Dense(64, activation='relu'))

# Add a dense output layer with 1 neuron (for regression)
model.add(Dense(1))

# Display a summary of the model architecture
print(model.summary())

# Compile the model with Adam optimizer and mean squared error loss
opt = Adam(learning_rate=0.01)
model.compile(loss='mse', metrics=['mae'], optimizer=opt)


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 64)                13824     
                                                                 
 dense_5 (Dense)             (None, 1)                 65        


                                                                 
Total params: 13,889
Trainable params: 13,889
Non-trainable params: 0
_________________________________________________________________
None


In [18]:
# Train the model on the training data for 50 epochs with a batch size of 1
model.fit(features_train_scaled, labels_train, epochs=50, batch_size=1, verbose=0)

# Evaluate the model on the test data and store the MSE and MAE results
res_mse, res_mae = model.evaluate(features_test_scaled, labels_test, verbose=0)

print(res_mse, res_mae)


0.06821408867835999 0.20222051441669464


In [None]:
## The model's performance is quite good for predicting life expectancy.
# An MSE of 0.068 and an MAE of 0.2022 indicate that, on average, the predictions 
# are off by about 0.2 years (or roughly 2.4 months). Given that life expectancy 
# typically ranges between 50 to 85 years, these errors are small and suggest 
# the model is making accurate predictions with minimal large deviations.