In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import time, sys

# Model hyperparameters
DENSE1_SIZE = 15
DENSE2_SIZE = 10
NUM_OF_EPOCHS = 50
BATCH_SIZE = 8

def load_and_preprocess_data():
    """Load and preprocess the Australian Credit dataset."""
    # Load dataset
    data = pd.read_csv("/content/australian.dat", delim_whitespace=True, header=None)

    # Split features and target
    X = data.iloc[:, :-1].astype(float)
    y = data.iloc[:, -1].astype(float)

    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize features to the range [0, 1]
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

def create_model(input_shape):
    """Create and compile the neural network model."""
    model = Sequential([
        Dense(DENSE1_SIZE, activation='relu', input_shape=input_shape),
        Dense(DENSE2_SIZE, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy'])
    return model

def convert_to_tflite(model, X_test):
    """Convert Keras model to TFLite format without optimizations."""
    def representative_dataset():
        for i in range(len(X_test)):
            yield [np.array(X_test[i], dtype=np.float32).reshape(1, -1)]

    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    # Note: optimization line is commented out to avoid NaN issues on ESP32
    # converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset
    return converter.convert()

def evaluate_tflite_model(tflite_model, X_test, y_test):
    """Evaluate the TFLite model accuracy."""
    interpreter = tf.lite.Interpreter(model_content=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    tflite_predictions = []
    for i in range(len(X_test)):
        input_data = np.array(X_test[i], dtype=np.float32).reshape(1, -1)
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        tflite_predictions.append(int(output_data[0] > 0.5))

    tflite_accuracy = np.mean(np.array(tflite_predictions) == y_test)
    return tflite_accuracy

def generate_c_header(hex_data, var_name):
    """Generate C header file content from TFLite model."""
    c_str = f'#ifndef {var_name.upper()}_H\n#define {var_name.upper()}_H\n\n'
    c_str += "/*\n Auto-generated model data.\n"
    localtime = time.asctime(time.localtime(time.time()))
    c_str += f" Generated on: {localtime}\n*/\n\n"

    c_str += f'const unsigned int {var_name}_len = {len(hex_data)};\n'
    c_str += f'alignas(8) const unsigned char {var_name}[] = {{\n'

    hex_array = []
    for i, val in enumerate(hex_data):
        hex_str = format(val, '#04x')
        if (i + 1) < len(hex_data):
            hex_str += ','
        if (i + 1) % 12 == 0:
            hex_str += '\n'
        hex_array.append(hex_str)

    c_str += ''.join(hex_array) + '\n};\n\n'
    c_str += f'#endif // {var_name.upper()}_H\n'
    return c_str

def main():
    # Load and preprocess data
    X_train, X_test, y_train, y_test = load_and_preprocess_data()

    # Create and train model
    model = create_model((X_train.shape[1],))
    history = model.fit(X_train, y_train,
                       epochs=NUM_OF_EPOCHS,
                       batch_size=BATCH_SIZE,
                       validation_split=0.2)

    # Evaluate original model
    loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Accuracy of original model: {accuracy}")

    # Convert to TFLite
    tflite_model = convert_to_tflite(model, X_test)

    # Save TFLite model
    with open("AustralianCreditModel.tflite", "wb") as f:
        f.write(tflite_model)

    # Evaluate TFLite model
    tflite_accuracy = evaluate_tflite_model(tflite_model, X_test, y_test)
    print(f"Test Accuracy of TFLite model: {tflite_accuracy}")

    # Generate and save C header file
    with open("australian_credit_model_esp32.h", "w") as f:
        f.write(generate_c_header(tflite_model, "australian_credit_model_esp32"))

if __name__ == "__main__":
    main()

  data = pd.read_csv("/content/australian.dat", delim_whitespace=True, header=None)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.4606 - loss: 0.7003 - val_accuracy: 0.6126 - val_loss: 0.6629
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6136 - loss: 0.6610 - val_accuracy: 0.7477 - val_loss: 0.6278
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7144 - loss: 0.6406 - val_accuracy: 0.7748 - val_loss: 0.5929
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8041 - loss: 0.5935 - val_accuracy: 0.8559 - val_loss: 0.5395
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8696 - loss: 0.5200 - val_accuracy: 0.8559 - val_loss: 0.4873
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8573 - loss: 0.4719 - val_accuracy: 0.8559 - val_loss: 0.4428
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━

  tflite_predictions.append(int(output_data[0] > 0.5))
