# Neural Networks with Keras    Mirror of the PyTorch notebook using `tf.keras`: MLP on moons data, training loop via `model.fit`, activation visuals, loss comparisons, and LR sweeps.

In [None]:
    # If needed, install: !pip install tensorflow matplotlib scikit-learn    import tensorflow as tf    from tensorflow import keras    from tensorflow.keras import layers    import numpy as np    import matplotlib.pyplot as plt    from sklearn.datasets import make_moons    from sklearn.model_selection import train_test_split    from sklearn.preprocessing import StandardScaler    tf.keras.utils.set_random_seed(42)    

## Build a small ANN on moons

In [None]:
    X, y = make_moons(n_samples=800, noise=0.2, random_state=0)    X = StandardScaler().fit_transform(X)    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)    model = keras.Sequential([        layers.Input(shape=(2,)),        layers.Dense(16, activation='relu'),        layers.Dense(16, activation='relu'),        layers.Dense(2, activation='softmax')    ])    model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.1, momentum=0.9),                  loss='sparse_categorical_crossentropy',                  metrics=['accuracy'])    history = model.fit(X_train, y_train, batch_size=64, epochs=30, validation_data=(X_val, y_val), verbose=0)    print("Final val acc:", history.history['val_accuracy'][-1])    

## Activation Functions

In [None]:
    xs = np.linspace(-4, 4, 200)    activations = {        "sigmoid": 1 / (1 + np.exp(-xs)),        "tanh": np.tanh(xs),        "relu": np.maximum(0, xs),        "leaky_relu": np.where(xs > 0, xs, 0.1 * xs)    }    plt.figure(figsize=(6,4))    for name, vals in activations.items():        plt.plot(xs, vals, label=name)    plt.legend(); plt.grid(True); plt.xlabel('input'); plt.ylabel('activation'); plt.title('Activations');    plt.show()    

## Loss Functions (Keras API)

In [None]:
    logits = tf.constant([[2.0, 0.5, -1.0], [0.1, 1.0, 0.2]])    targets = tf.constant([0, 2])    ce = keras.losses.SparseCategoricalCrossentropy(from_logits=True)(targets, logits).numpy()    probs = tf.nn.softmax(logits)    one_hot = tf.one_hot(targets, depth=3)    mse = tf.reduce_mean(tf.square(probs - one_hot)).numpy()    print("Cross Entropy:", ce)    print("MSE on probs:", mse)    

## Hyperparameter: learning rate sweep

In [None]:
    def run_lr(lr):        m = keras.Sequential([            layers.Input(shape=(2,)),            layers.Dense(8, activation='relu'),            layers.Dense(2, activation='softmax')        ])        m.compile(optimizer=keras.optimizers.SGD(learning_rate=lr),                  loss='sparse_categorical_crossentropy')        hist = m.fit(X_train, y_train, batch_size=128, epochs=40, verbose=0)        return hist.history['loss'][-1]    for lr in [0.001, 0.01, 0.1]:        print(f"LR={lr}: final loss {run_lr(lr):.3f}")    