# NEURAL NETWORK TRAINING

## REFERENCE 
https://www.kaggle.com/code/cdeotte/nn-mlp-starter-cv-0-0608

In [1]:
import pandas as pd 
import numpy as np 
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Embedding
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.layers import Activation
import tensorflow.keras.backend as K

VER = 1

2025-05-26 16:08:30.096674: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-26 16:08:30.151679: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748268510.188443  195022 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748268510.198950  195022 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748268510.244727  195022 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
X = pd.read_csv('../data/X.csv')
y = pd.read_csv('../data/y.csv')
X_test = pd.read_csv('../data/X_test.csv')

FEATURES = X.columns.tolist()
TARGET = 'Calories'

In [3]:
# SIMPLE MLP
def build_model(size=len(FEATURES)):
    x_in = Input(shape=(size,))
    x = Dense(32)(x_in)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)

    x = Dense(64)(x)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)

    x = Dense(32)(x)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)

    x = Dense(1, activation='linear')(x)
    model = Model(inputs=x_in, outputs=x)
    return model

In [4]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping

def make_callbacks(): 
    lr_callback = ReduceLROnPlateau(
        monitor='val_loss',
        factor = 0.5,
        patience = 3,
        verbose= 1,
        min_lr=1e-6
    )
    early_stop_cb = EarlyStopping(
        monitor="val_loss",
        patience=10,
        restore_best_weights=True,
        mode="min",
        verbose=1
    )
    return [lr_callback, early_stop_cb]

EPOCHS = 100

In [5]:
y.head()

Unnamed: 0,Calories
0,5.01728
1,3.555348
2,3.401197
3,4.94876
4,4.990433


In [6]:
import time
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

FOLDS = 5
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Arrays pour stocker les prédictions en échelle ORIGINALE
test_predictions = np.zeros(len(X_test))
oof_predictions = np.zeros(len(X))

for fold, (train_idx, valid_idx) in enumerate(kf.split(X, y)):
    
    print(f"\n{'#'*28}")
    print(f"{'#'*10} Fold {fold+1} {'#'*10}")
    print(f"{'#'*28}")
    
    # TRAIN
    X_train = X.loc[train_idx, FEATURES].copy()
    y_train = np.log1p(y.loc[train_idx, TARGET])
    
    # EXTRA DATA
    # for k in range(4):
    #     X_train = pd.concat([X_train,orig[FEATURES]],axis=0)
    #     y_train = pd.concat([y_train,np.log1p( orig[TARGET] )],axis=0)
    
    # VALID
    X_valid = X.loc[valid_idx, FEATURES].copy()
    y_valid_log = np.log1p(y.loc[valid_idx, TARGET])        # Pour l'entraînement
    y_valid_original = y.loc[valid_idx, TARGET]             # Pour le calcul RMSE
    
    # TEST
    X_test_data = X_test[FEATURES].copy()
    
    # NORMALIZE FOR NN
    print("Normalizing...", end='')
    # Spécifier explicitement les colonnes numériques
    norm_cols = [c for c in FEATURES if X_train[c].dtype in ['float64', 'int64', 'float32', 'int32']]
    
    means = X_train[norm_cols].mean()
    stds = X_train[norm_cols].std()
    stds = stds.replace(0, 1)
    X_train[norm_cols] = (X_train[norm_cols] - means) / stds
    X_valid[norm_cols] = (X_valid[norm_cols] - means) / stds
    X_test_data[norm_cols] = (X_test_data[norm_cols] - means) / stds
    print("done")
    
    start = time.time()
    
    K.clear_session()
    model = build_model(X_train.shape[1])
    model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                  loss="mse",
                  metrics=[tf.keras.metrics.RootMeanSquaredError()],
                  )
    model.fit(X_train, y_train,
              validation_data=(X_valid, y_valid_log),  # Utiliser version log pour validation
              callbacks=make_callbacks(),
              batch_size=256, epochs=EPOCHS, verbose=2)
    
    # Prédictions en échelle LOG
    oof_pred_log = model.predict(X_valid, batch_size=512, verbose=2).flatten()
    test_pred_log = model.predict(X_test_data, batch_size=512, verbose=2).flatten()
    
    # RETRANSFORMATION en échelle originale avant stockage
    oof_pred_original = np.expm1(oof_pred_log)
    test_pred_original = np.expm1(test_pred_log)
    
    # Stockage des prédictions en échelle originale
    oof_predictions[valid_idx] = oof_pred_original
    test_predictions += test_pred_original
    
    # RMSE calculé en échelle originale (plus interprétable)
    rmse = np.sqrt(mean_squared_error(y_valid_original, oof_pred_original))
    print(f"Fold {fold+1} RMSE: {rmse:.4f}")
    print(f"Feature engineering & training time: {time.time() - start:.1f} sec")

# Moyenne des prédictions test (en échelle originale)
test_predictions /= FOLDS

# Calcul du RMSE global OOF
overall_rmse = np.sqrt(mean_squared_error(y[TARGET], oof_predictions))
print(f"\nOverall OOF RMSE: {overall_rmse:.4f}")

# Vérifications de cohérence
print(f"\nPredictions Summary:")
print(f"OOF range: {oof_predictions.min():.2f} - {oof_predictions.max():.2f}")
print(f"Test range: {test_predictions.min():.2f} - {test_predictions.max():.2f}")
print(f"Target range: {y[TARGET].min():.2f} - {y[TARGET].max():.2f}")


############################
########## Fold 1 ##########
############################
Normalizing...done


I0000 00:00:1748268519.173369  195022 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5566 MB memory:  -> device: 0, name: NVIDIA RTX 2000 Ada Generation Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Epoch 1/100


I0000 00:00:1748268521.328622  195191 service.cc:152] XLA service 0x7f63400101d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748268521.328699  195191 service.cc:160]   StreamExecutor device (0): NVIDIA RTX 2000 Ada Generation Laptop GPU, Compute Capability 8.9
2025-05-26 16:08:41.365796: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1748268521.585718  195191 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1748268522.222029  195191 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


2344/2344 - 12s - 5ms/step - loss: 0.0356 - root_mean_squared_error: 0.1887 - val_loss: 8.6489e-04 - val_root_mean_squared_error: 0.0294 - learning_rate: 1.0000e-03
Epoch 2/100
2344/2344 - 14s - 6ms/step - loss: 0.0014 - root_mean_squared_error: 0.0371 - val_loss: 8.9699e-04 - val_root_mean_squared_error: 0.0299 - learning_rate: 1.0000e-03
Epoch 3/100
2344/2344 - 14s - 6ms/step - loss: 0.0011 - root_mean_squared_error: 0.0336 - val_loss: 5.9441e-04 - val_root_mean_squared_error: 0.0244 - learning_rate: 1.0000e-03
Epoch 4/100
2344/2344 - 14s - 6ms/step - loss: 9.9634e-04 - root_mean_squared_error: 0.0316 - val_loss: 5.5774e-04 - val_root_mean_squared_error: 0.0236 - learning_rate: 1.0000e-03
Epoch 5/100
2344/2344 - 14s - 6ms/step - loss: 8.8912e-04 - root_mean_squared_error: 0.0298 - val_loss: 4.8979e-04 - val_root_mean_squared_error: 0.0221 - learning_rate: 1.0000e-03
Epoch 6/100
2344/2344 - 14s - 6ms/step - loss: 8.1714e-04 - root_mean_squared_error: 0.0286 - val_loss: 9.0093e-04 - va

In [7]:
# import time
# from sklearn.model_selection import KFold
# from sklearn.metrics import mean_squared_error

# FOLDS = 5
# kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)


# test_predictions = np.zeros(len(X_test))
# oof_predictions = np.zeros(len(X)) 

# for fold, (train_idx, valid_idx) in enumerate(kf.split(X, y)):
    
#     print(f"\n{'#'*28}")
#     print(f"{'#'*10} Fold {fold+1} {'#'*10}")
#     print(f"{'#'*28}")

#     # TRAIN
#     X_train = X.loc[train_idx,FEATURES].copy()
#     y_train = np.log1p( y.loc[train_idx,TARGET] )

#     # EXTRA DATA
#     # for k in range(4):
#     #     X_train = pd.concat([X_train,orig[FEATURES]],axis=0)
#     #     y_train = pd.concat([y_train,np.log1p( orig[TARGET] )],axis=0)

#     # VALID
#     X_valid = X.loc[valid_idx,FEATURES].copy()
#     y_valid = np.log1p( y.loc[valid_idx,TARGET] )

#     # TEST
#     X_test_data = X_test[FEATURES].copy()

#     # NORMALIZE FOR NN
#     print("Normalizing...", end='')
#     norm_cols = [c for c in FEATURES if c not in []]
#     means = X_train[norm_cols].mean()
#     stds = X_train[norm_cols].std()
#     stds = stds.replace(0, 1)
#     X_train[norm_cols] = (X_train[norm_cols] - means) / stds
#     X_valid[norm_cols] = (X_valid[norm_cols] - means) / stds
#     X_test_data[norm_cols] = (X_test_data[norm_cols] - means) / stds
#     print("done")
    
#     start = time.time()

#     K.clear_session()
#     model = build_model( X_train.shape[1] )
#     model.compile(optimizer=tf.keras.optimizers.Adam(0.001), 
#                     loss="mse", 
#                     metrics=[tf.keras.metrics.RootMeanSquaredError()],
#                  )
#     model.fit(X_train, y_train, 
#               validation_data = (X_valid, y_valid),
#               callbacks = make_callbacks(),
#               batch_size=256, epochs=EPOCHS, verbose=2)

#     oof_predictions[valid_idx] = model.predict(X_valid,batch_size=512,verbose=2).flatten()
#     test_predictions += model.predict(X_test_data,batch_size=512,verbose=2).flatten()

#     rmse = np.sqrt(mean_squared_error(y_valid, oof_predictions[valid_idx]))
#     print(f"Fold {fold+1} RMSE: {rmse:.4f}")
#     print(f"Feature engineering & training time: {time.time() - start:.1f} sec")

# test_predictions /= FOLDS

In [14]:
full_rmse = np.sqrt(mean_squared_error(np.log1p(y[TARGET]), oof_predictions))
print(f"Overall CV RMSE: {full_rmse:.5f}")
# np.save(f"oof_v{VER}",oof_predictions)

Overall CV RMSE: 2.63365


In [17]:
test_predictions

array([3.3510078 , 4.68666677, 4.45688219, ..., 4.30832596, 5.12852554,
       4.37116652])

In [18]:
mn = y.Calories.min()
mx = y.Calories.max()
X_test['Calories'] = np.clip( np.expm1( test_predictions ),mn,mx )
test_predictions

array([3.3510078 , 4.68666677, 4.45688219, ..., 4.30832596, 5.12852554,
       4.37116652])

## Submission

In [19]:
import os 


mn = y.Calories.min()
mx = y.Calories.max()

submission = pd.read_csv('../../dataset/sample_submission.csv')
test_predictions = np.expm1(test_predictions)

i = 1 
while os.path.exists(f"../predictions/submissions/nn_submission_{i}.csv"):
    i+=1

submission["Calories"] = test_predictions

csv_filename = f"../predictions/submissions/nn_submission_{i}.csv"
submission.to_csv(csv_filename, index=False)
print(f"Fichier CSV enregistré : {csv_filename}")

npy_filename = f"../predictions/oof/nn_oof_predictions_{i}.npy"
np.save(npy_filename, oof_predictions)
print(f"Fichier NumPy enregistré : {npy_filename}")

Fichier CSV enregistré : ../predictions/submissions/nn_submission_1.csv
Fichier NumPy enregistré : ../predictions/oof/nn_oof_predictions_1.npy
