<a href="https://colab.research.google.com/github/hansglick/book_errata/blob/main/p004_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import tensorflow as tf
import numpy as np
import pandas as pd

In [15]:
# Le layer normalisation prend des batch de features continues et va les normer, i.e. moyenne de zéro, std de 1
# Le layer normalisation fonctionne de la manière suivante
# 1/ Il doit d'abord être instancié
# 2/ Avoir à disposition des data adapt, à savoir sur qui vont être compute la mean et la variance
# 3/ Adapter le layer afin d'évaluer les stats basics
layer = tf.keras.layers.Normalization(axis=-1) #1
adapt_data = np.array([[0., 7., 4.],
                       [2., 9., 6.],
                       [0., 7., 4.],
                       [2., 9., 6.]], dtype='float32') #2
layer.adapt(adapt_data) #3

In [16]:
# Application du layer à un numpy array
input_data = np.array([[0., 7., 4.]], dtype='float32')
layer(input_data)

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-1., -1., -1.]], dtype=float32)>

In [17]:
# On peut télécharger un dataframe via la fonction read_csv() de pandas
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(url, names=column_names,
                          na_values='?', comment='\t',
                          sep=' ', skipinitialspace=True)

In [18]:
# Remove les NAs
# Dummification de la variable Origin pour le réseau de neurones
dataset = raw_dataset.copy()
dataset = dataset.dropna()
dataset["Origin"] = dataset["Origin"].map({1:"USA",2:"Europe",3:"Japan"})
dataset = pd.get_dummies(dataset, columns = ['Origin'], prefix="", prefix_sep="")

In [19]:
# Création de labels et features et train,set
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)
train_features = train_dataset.copy()
test_features = test_dataset.copy()
train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')

In [20]:
# Création du modèle
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))
linear_model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=1)
])

In [21]:
# Test & prédiction
linear_model.predict(train_features[:10])

array([[ 0.7508768 ],
       [-0.86499375],
       [-1.4448656 ],
       [ 0.6345582 ],
       [ 1.1647086 ],
       [-1.2364887 ],
       [ 1.1328601 ],
       [-1.308997  ],
       [-0.7821326 ],
       [ 0.7988956 ]], dtype=float32)

In [22]:
# Compilation du modèle
linear_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [23]:
# Training du modèle
history = linear_model.fit(
    train_features,
    train_labels,
    epochs=100,
    verbose=0,
    validation_split = 0.2)

In [28]:
# Fonction qui définit une architecture et renvoie le modele compilé
def build_and_compile_model(norm):
  model = tf.keras.Sequential([
      norm,
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model

In [29]:
# Entrianement du modele
nn_model = build_and_compile_model(normalizer)
nn_model.fit(train_features,train_labels,validation_split=0.2,verbose=0,epochs=100)

<keras.callbacks.History at 0x7f8675e07510>

In [30]:
# Evaluation du modele réseau de neurones
nn_model.evaluate(test_features, test_labels, verbose=0)

1.7175453901290894

In [31]:
# Evaluation du modele réseau de neurones
linear_model.evaluate(test_features, test_labels, verbose=0)

2.4735820293426514