In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)  # for reproducibility
num_samples = 1000

# Input features
temp_C = np.random.uniform(15, 40, num_samples)          # 15°C to 40°C
humidity_pct = np.random.uniform(20, 90, num_samples)    # 20% to 90%
pressure_hPa = np.random.uniform(950, 1050, num_samples) # 950 hPa to 1050 hPa
gas_res_ohm = np.random.uniform(100, 1000, num_samples)  # 100Ω to 1000Ω

# Gas concentrations (synthetic)
co_ppm = 0.5 * temp_C + 0.3 * humidity_pct + np.random.normal(0, 5, num_samples)
co2_ppm = 10 * np.log1p(temp_C) + 0.2 * pressure_hPa + np.random.normal(0, 10, num_samples)
so2_ppm = 0.05 * gas_res_ohm + np.random.normal(0, 2, num_samples)
no2_ppm = 0.1 * humidity_pct + 0.02 * pressure_hPa + np.random.normal(0, 3, num_samples)
ch4_ppm = 0.01 * gas_res_ohm + 0.5 * temp_C + np.random.normal(0, 1, num_samples)

# Create DataFrame
df = pd.DataFrame({
    'temp_C': temp_C,
    'humidity_pct': humidity_pct,
    'pressure_hPa': pressure_hPa,
    'gas_res_ohm': gas_res_ohm,
    'co_ppm': co_ppm,
    'co2_ppm': co2_ppm,
    'so2_ppm': so2_ppm,
    'no2_ppm': no2_ppm,
    'ch4_ppm': ch4_ppm
})

# Save to CSV in the same folder as notebook
df.to_csv('train.csv', index=False)
print("train.csv generated with 1000 samples!")


train.csv generated with 1000 samples!


In [10]:
import os
import logging
import warnings

# -----------------------------
# Suppress TF, absl, and warnings
# -----------------------------
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # hide TF info/warnings
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
warnings.filterwarnings("ignore", category=UserWarning)

# -----------------------------
# Imports
# -----------------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# -----------------------------
# 1. Load dataset
# -----------------------------
df = pd.read_csv("train.csv")  # ensure this is in the same folder

feature_cols = ['temp_C','humidity_pct','pressure_hPa','gas_res_ohm']
target_cols = ['co_ppm','co2_ppm','so2_ppm','no2_ppm','ch4_ppm']

X = df[feature_cols].values.astype(np.float32)
y = df[target_cols].values.astype(np.float32)

# -----------------------------
# 2. Split dataset
# -----------------------------
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)

# -----------------------------
# 3. Scale inputs
# -----------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Save scaler params for MCU
np.savez("scaler_params.npz", mean=scaler.mean_, scale=scaler.scale_)
print("Scaler parameters saved.")

# -----------------------------
# 4. Build model
# -----------------------------
inputs = tf.keras.Input(shape=(X_train_scaled.shape[1],))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(len(target_cols), activation='linear')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Hide model.summary() output for clean console
model.summary(print_fn=lambda x: None)

# -----------------------------
# 5. Train model
# -----------------------------
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
    ]
)

# -----------------------------
# 6. Save Keras model
# -----------------------------
model.save("gas_model.keras")  # use native Keras format to avoid HDF5 warnings
print("Keras model saved: gas_model.keras")

# -----------------------------
# 7. Convert to TFLite (integer quantization)
# -----------------------------
def representative_data_gen():
    for i in range(min(1000, X_train_scaled.shape[0])):
        sample = X_train_scaled[i:i+1].astype(np.float32)
        yield [sample]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_model = converter.convert()
open("gas_model_int8.tflite","wb").write(tflite_model)
print("TFLite model saved: gas_model_int8.tflite")


Scaler parameters saved.


Epoch 1/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 11366.9678 - mae: 66.6682 - val_loss: 11207.8320 - val_mae: 66.3985
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 11072.6533 - mae: 65.5728 - val_loss: 10750.3525 - val_mae: 64.4282
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 10425.9082 - mae: 62.4267 - val_loss: 9810.2188 - val_mae: 59.3825
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 9228.3955 - mae: 55.6372 - val_loss: 8258.3994 - val_mae: 50.0990
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 7484.8228 - mae: 46.2482 - val_loss: 6304.8994 - val_mae: 42.2028
Epoch 6/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5464.0137 - mae: 39.4032 - val_loss: 4215.5918 - val_mae: 35.4259
Epoch 7/100
[1m27/27[0m [32m━━━━━━━━━━

In [13]:
import numpy as np
import tensorflow as tf

# Load TFLite model
interpreter = tf.lite.Interpreter(model_path="gas_model_int8.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load scaler parameters
scaler_params = np.load("scaler_params.npz")
mean = scaler_params["mean"]
scale = scaler_params["scale"]

# Sample inputs
sample_inputs = np.array([
    [25.0, 50.0, 1000.0, 500.0],
    [26.0, 45.0, 1005.0, 520.0],
    [24.5, 55.0, 995.0, 510.0],
    [27.0, 60.0, 1002.0, 530.0],
    [23.0, 40.0, 998.0, 480.0],
    [28.0, 65.0, 1008.0, 540.0],
    [22.0, 50.0, 1001.0, 495.0],
    [29.0, 55.0, 1003.0, 550.0],
    [21.0, 45.0, 997.0, 470.0],
    [30.0, 60.0, 1010.0, 560.0]
], dtype=np.float32)

# Run inference for each sample
input_scale, input_zero_point = input_details[0]["quantization"]
output_scale, output_zero_point = output_details[0]["quantization"]

for i, sample_raw in enumerate(sample_inputs):
    # Scale
    sample_scaled = (sample_raw - mean) / scale
    # Quantize
    sample_int8 = (sample_scaled / input_scale + input_zero_point).astype(np.int8)
    sample_int8 = sample_int8.reshape(1, -1)  # batch size 1

    # Set input and invoke
    interpreter.set_tensor(input_details[0]["index"], sample_int8)
    interpreter.invoke()

    # Get output and dequantize
    output_int8 = interpreter.get_tensor(output_details[0]["index"])
    output_float = (output_int8.astype(np.float32) - output_zero_point) * output_scale

    print(f"Sample {i+1}: Predicted gas values: {output_float[0]}")


Sample 1: Predicted gas values: [ 27.195318 228.64212   25.18085   24.173615  17.122978]
Sample 2: Predicted gas values: [ 26.188084 230.65659   26.188084  24.173615  17.122978]
Sample 3: Predicted gas values: [ 28.20255  227.63487   25.18085   25.18085   16.115744]
Sample 4: Predicted gas values: [ 31.224253 229.64935   26.188084  26.188084  18.13021 ]
Sample 5: Predicted gas values: [ 23.166382 227.63487   24.173615  23.166382  15.10851 ]
Sample 6: Predicted gas values: [ 33.23872  230.65659   27.195318  26.188084  19.137445]
Sample 7: Predicted gas values: [ 26.188084 227.63487   24.173615  24.173615  15.10851 ]
Sample 8: Predicted gas values: [ 31.224253 230.65659   27.195318  25.18085   19.137445]
Sample 9: Predicted gas values: [ 24.173615 226.62764   23.166382  23.166382  15.10851 ]
Sample 10: Predicted gas values: [ 33.23872  232.67105   28.20255   26.188084  20.14468 ]


In [15]:
import pathlib

tflite_model_file = "gas_model_int8.tflite"
c_array_file = "gas_model_int8.cc"

tflite_model = pathlib.Path(tflite_model_file).read_bytes()
with open(c_array_file, "w") as f:
    f.write("const unsigned char g_gas_model[] = {")
    for i, byte in enumerate(tflite_model):
        if i % 12 == 0:
            f.write("\n  ")
        f.write(f"0x{byte:02x}, ")
    f.write("\n};\n")
    f.write(f"const int g_gas_model_len = {len(tflite_model)};\n")

print(f"C array file generated successfully: {c_array_file}")
print(f"File size: {len(tflite_model)} bytes")


C array file generated successfully: gas_model_int8.cc
File size: 10536 bytes
