In [1]:
# ======================================================================= #
# Course: Deep Learning Complete Course (CS-501)
# Author: Dr. Saad Laouadi
# Lesson: Linear Regression: Introductory Example
#
# Description: Training Linear Regression with Keras 3 API
#
#
# =======================================================================
#.          Copyright © Dr. Saad Laouadi
# =======================================================================

In [2]:
import os  
import sys 
from pathlib import Path

# Disable Metal API Validation
os.environ["METAL_DEVICE_WRAPPER_TYPE"] = "0"

import numpy as np 
import pandas as pd

import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
print("="*72)

%reload_ext watermark
%watermark -a "Dr. Saad Laouadi" -u -d -m

print("="*72)
print("Imported Packages and Their Versions:")
print("="*72)

%watermark -iv
print("="*72)

Author: Dr. Saad Laouadi

Last updated: 2024-11-18

Compiler    : Clang 14.0.6 
OS          : Darwin
Release     : 23.5.0
Machine     : arm64
Processor   : arm
CPU cores   : 16
Architecture: 64bit

Imported Packages and Their Versions:
pandas    : 2.2.2
keras     : 3.6.0
numpy     : 1.26.4
tensorflow: 2.16.2
sklearn   : 1.5.1
sys       : 3.11.10 (main, Oct  3 2024, 02:26:51) [Clang 14.0.6 ]



In [3]:
# Global Configuration
DATA_ROOT = Path(r"../../../datasets/regression/")
DATASET = DATA_ROOT.joinpath("wages.csv")

In [4]:
# Load the data
wages = pd.read_csv(DATASET)

In [5]:
# Check the data info
wages.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 534 entries, 0 to 533
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   wage_per_hour   534 non-null    float64
 1   union           534 non-null    int64  
 2   education_yrs   534 non-null    int64  
 3   experience_yrs  534 non-null    int64  
 4   age             534 non-null    int64  
 5   female          534 non-null    int64  
 6   marr            534 non-null    int64  
 7   south           534 non-null    int64  
 8   manufacturing   534 non-null    int64  
 9   construction    534 non-null    int64  
dtypes: float64(1), int64(9)
memory usage: 41.8 KB


In [6]:
# Check the data description
wages.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
wage_per_hour,534.0,9.024064,5.139097,1.0,5.25,7.78,11.25,44.5
union,534.0,0.179775,0.38436,0.0,0.0,0.0,0.0,1.0
education_yrs,534.0,13.018727,2.615373,2.0,12.0,12.0,15.0,18.0
experience_yrs,534.0,17.822097,12.37971,0.0,8.0,15.0,26.0,55.0
age,534.0,36.833333,11.726573,18.0,28.0,35.0,44.0,64.0
female,534.0,0.458801,0.498767,0.0,0.0,0.0,1.0,1.0
marr,534.0,0.655431,0.475673,0.0,0.0,1.0,1.0,1.0
south,534.0,0.292135,0.45517,0.0,0.0,0.0,1.0,1.0
manufacturing,534.0,0.185393,0.388981,0.0,0.0,0.0,0.0,1.0
construction,534.0,0.044944,0.207375,0.0,0.0,0.0,0.0,1.0


In [7]:
# Check the data shape
print(wages.shape)

(534, 10)


In [8]:
# Split the data into features and target
X = wages.drop('wage_per_hour', axis=1)
y = wages['wage_per_hour']

In [9]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# define the model
model = keras.Sequential()

In [11]:
# Define the model
model = keras.Sequential([
    keras.layers.Input(shape=(X.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)
])

In [12]:
# Check the model summary
print(model.summary())

None


In [13]:
# Compile the model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

In [14]:
# Train the model
history = model.fit(X_train, y_train, 
                   validation_data=(X_test, y_test),
                   epochs=100, 
                   batch_size=32,
                   verbose=1)

Epoch 1/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 29.4595 - mean_absolute_error: 3.7462 - val_loss: 30.9202 - val_mean_absolute_error: 4.0935
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 22.6147 - mean_absolute_error: 3.1608 - val_loss: 26.8283 - val_mean_absolute_error: 4.2133
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 24.8784 - mean_absolute_error: 3.6140 - val_loss: 25.0986 - val_mean_absolute_error: 3.6710
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 22.9278 - mean_absolute_error: 3.3218 - val_loss: 24.2688 - val_mean_absolute_error: 3.8395
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 19.2577 - mean_absolute_error: 3.1396 - val_loss: 23.9643 - val_mean_absolute_error: 3.8279
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

In [15]:
# Evaluate the model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print('Test Mean Absolute Error:', mae)

Test Mean Absolute Error: 3.5291662216186523
