In [3]:
pip install numpy pandas scikit-learn tensorflow




In [4]:
import numpy as np
import pandas as pd

np.random.seed(42)

def generate_sample(n=1000):
    temps = np.random.uniform(18, 32, size=n)       # 18–32 °C
    hums  = np.random.uniform(30, 80, size=n)       # 30–80 %
    co2   = np.random.uniform(400, 2000, size=n)    # 400–2000 ppm
    airflow = np.random.uniform(0.0, 1.0, size=n)   # normalized 0–1

    labels = []
    for t, h, c, a in zip(temps, hums, co2, airflow):
        if (c < 800 and 0.7 <= a <= 1.0 and 22 <= t <= 26 and 40 <= h <= 60):
            labels.append(2)  # Good
        elif (800 <= c <= 1200 and 0.4 <= a < 0.7):
            labels.append(1)  # Moderate
        else:
            labels.append(0)  # Poor

    df = pd.DataFrame({
        "temp": temps,
        "humidity": hums,
        "co2": co2,
        "airflow": airflow,
        "kpi_label": labels
    })
    return df

if __name__ == "__main__":
    df = generate_sample(1500)
    print(df["kpi_label"].value_counts())
    df.to_csv("ventilation_kpi_synthetic.csv", index=False)
    print("Saved to ventilation_kpi_synthetic.csv")


kpi_label
0    1385
1     104
2      11
Name: count, dtype: int64
Saved to ventilation_kpi_synthetic.csv


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

df = pd.read_csv("ventilation_kpi_synthetic.csv")
print(df.head())
print(df["kpi_label"].value_counts())


        temp   humidity          co2   airflow  kpi_label
0  23.243562  55.954089  1476.324791  0.599299          0
1  31.310000  53.959094  1674.690236  0.513708          0
2  28.247915  31.282103   800.748638  0.288185          0
3  26.381219  47.062391  1399.798559  0.006464          0
4  20.184261  49.009781  1314.793573  0.496239          0
kpi_label
0    1385
1     104
2      11
Name: count, dtype: int64


In [6]:
X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values  # 0,1,2


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# Save mean and std to use later in ESP32 / C code
print("Means:", scaler.mean_)
print("Scales:", scaler.scale_)


Means: [2.50408589e+01 5.51482202e+01 1.18068923e+03 5.00866032e-01]
Scales: [4.12939513e+00 1.44739619e+01 4.52279413e+02 2.88622975e-01]


In [9]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [10]:
num_features = 4
num_classes = 3

model = keras.Sequential([
    layers.Input(shape=(num_features,)),
    layers.Dense(16, activation="relu"),
    layers.Dense(8, activation="relu"),
    layers.Dense(num_classes, activation="softmax")
])

model.summary()


In [11]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)


In [12]:
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=30,
    batch_size=32,
    verbose=1
)


Epoch 1/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.0162 - loss: 1.3146 - val_accuracy: 0.1292 - val_loss: 1.1528
Epoch 2/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.2821 - loss: 1.0940 - val_accuracy: 0.7750 - val_loss: 0.9514
Epoch 3/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.8801 - loss: 0.8902 - val_accuracy: 0.8917 - val_loss: 0.7831
Epoch 4/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.9179 - loss: 0.7218 - val_accuracy: 0.8917 - val_loss: 0.6589
Epoch 5/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.9325 - loss: 0.5695 - val_accuracy: 0.8917 - val_loss: 0.5740
Epoch 6/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9238 - loss: 0.4898 - val_accuracy: 0.8917 - val_loss: 0.5133
Epoch 7/30
[1m30/30[0m [32m━━━━

In [13]:
test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test accuracy: {test_acc:.3f}")


Test accuracy: 0.923


In [14]:
model = keras.Sequential([
    layers.Input(shape=(num_features,)),
    layers.Dense(8, activation="relu"),
    layers.Dense(4, activation="relu"),
    layers.Dense(num_classes, activation="softmax")
])


In [15]:
model.save("vent_kpi_ann.h5")




In [16]:
import tensorflow as tf

model = tf.keras.models.load_model("vent_kpi_ann.h5")

# Standard float model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("vent_kpi_ann_float.tflite", "wb") as f:
    f.write(tflite_model)

print("Saved vent_kpi_ann_float.tflite")




Saved artifact at '/tmp/tmpxmt1fghm'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 4), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  134169238473232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238474960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238474576: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238478608: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238476496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238480336: TensorSpec(shape=(), dtype=tf.resource, name=None)
Saved vent_kpi_ann_float.tflite


In [17]:
import numpy as np

# Use training data sample for calibration:
import pandas as pd
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("ventilation_kpi_synthetic.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values

# Use same scaler:
# Ideally reuse the saved mean/scale from training
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

def rep_data():
    for i in range(100):
        yield [X_scaled[i].astype(np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = rep_data
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_quant = converter.convert()
with open("vent_kpi_ann_int8.tflite", "wb") as f:
    f.write(tflite_quant)

print("Saved vent_kpi_ann_int8.tflite")


Saved artifact at '/tmp/tmp80l_bh46'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 4), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  134169238473232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238474960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238474576: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238478608: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238476496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134169238480336: TensorSpec(shape=(), dtype=tf.resource, name=None)
Saved vent_kpi_ann_int8.tflite




In [18]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load sample
df = pd.read_csv("ventilation_kpi_synthetic.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values[:10]
y = df["kpi_label"].values[:10]

# Use same scaler (here refit; later you'll hardcode)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Load float TFLite
interpreter = tf.lite.Interpreter(model_path="vent_kpi_ann_float.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

for i in range(10):
    sample = X_scaled[i:i+1].astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], sample)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    pred_class = np.argmax(output, axis=1)[0]
    print(f"True: {y[i]}, Pred: {pred_class}, Probs: {output}")


True: 0, Pred: 2, Probs: [[0.2417114  0.17461458 0.5836741 ]]
True: 0, Pred: 0, Probs: [[0.5520965  0.1833754  0.26452807]]
True: 0, Pred: 0, Probs: [[0.6739437  0.16681258 0.15924372]]
True: 0, Pred: 0, Probs: [[0.6106684  0.24756096 0.14177063]]
True: 0, Pred: 2, Probs: [[0.17347205 0.1141322  0.7123958 ]]
True: 0, Pred: 2, Probs: [[0.1851582  0.12217507 0.69266677]]
True: 0, Pred: 2, Probs: [[0.08656234 0.03928419 0.87415344]]
True: 0, Pred: 0, Probs: [[0.46408165 0.2514218  0.2844966 ]]
True: 0, Pred: 2, Probs: [[0.3500033  0.23874286 0.41125384]]
True: 0, Pred: 0, Probs: [[0.42958003 0.19706061 0.37335935]]


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [19]:
# After training, in train_ann_model.py
print("Means:", scaler.mean_)
print("Scales:", scaler.scale_)

# Optional: save scaler using joblib
import joblib
joblib.dump(scaler, "vent_kpi_scaler.pkl")


Means: [2.52819143e+01 5.31536761e+01 1.26294437e+03 4.47437785e-01]
Scales: [4.19519017e+00 9.37193782e+00 4.87982897e+02 2.82948308e-01]


['vent_kpi_scaler.pkl']

In [20]:
import joblib
scaler = joblib.load("vent_kpi_scaler.pkl")

df = pd.read_csv("ventilation_kpi_synthetic.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values[:100]  # use 100 samples now
y = df["kpi_label"].values[:100]

X_scaled = scaler.transform(X)


In [21]:
correct = 0
total = len(X_scaled)

for i in range(total):
    sample = X_scaled[i:i+1].astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], sample)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    pred_class = np.argmax(output, axis=1)[0]

    if pred_class == y[i]:
        correct += 1

tflite_acc = correct / total
print(f"TFLite accuracy over {total} samples: {tflite_acc:.3f}")


TFLite accuracy over 100 samples: 0.320


In [22]:
import joblib
import pandas as pd
import numpy as np
import tensorflow as tf

# 1) Load data
df = pd.read_csv("ventilation_kpi_synthetic.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values

# To make it fair, randomly choose 100 samples:
np.random.seed(42)
indices = np.random.choice(len(X), size=100, replace=False)
X_sample = X[indices]
y_sample = y[indices]

# 2) Load scaler used during training
scaler = joblib.load("vent_kpi_scaler.pkl")
X_scaled = scaler.transform(X_sample)

# 3) Evaluate original Keras model
keras_model = tf.keras.models.load_model("vent_kpi_ann.h5")
keras_probs = keras_model.predict(X_scaled)
keras_preds = np.argmax(keras_probs, axis=1)
keras_acc = np.mean(keras_preds == y_sample)
print(f"Keras accuracy on 100-sample subset: {keras_acc:.3f}")




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step
Keras accuracy on 100-sample subset: 0.320


In [23]:
print(df["kpi_label"].value_counts(normalize=True))


kpi_label
0    0.923333
1    0.069333
2    0.007333
Name: proportion, dtype: float64


In [24]:
import numpy as np
import pandas as pd

np.random.seed(42)

def generate_poor(n=500):
    temps = np.random.uniform(18, 32, size=n)
    hums  = np.random.uniform(30, 80, size=n)
    co2   = np.random.uniform(1200, 2000, size=n)      # high CO2
    airflow = np.random.uniform(0.0, 0.4, size=n)      # low airflow

    labels = np.zeros(n, dtype=int)  # 0 = Poor
    return temps, hums, co2, airflow, labels

def generate_moderate(n=500):
    temps = np.random.uniform(20, 30, size=n)
    hums  = np.random.uniform(35, 75, size=n)
    co2   = np.random.uniform(800, 1400, size=n)       # mid CO2
    airflow = np.random.uniform(0.3, 0.7, size=n)      # medium airflow

    labels = np.ones(n, dtype=int)   # 1 = Moderate
    return temps, hums, co2, airflow, labels

def generate_good(n=500):
    temps = np.random.uniform(22, 26, size=n)
    hums  = np.random.uniform(40, 60, size=n)
    co2   = np.random.uniform(400, 800, size=n)        # low CO2
    airflow = np.random.uniform(0.7, 1.0, size=n)      # high airflow

    labels = np.full(n, 2, dtype=int)  # 2 = Good
    return temps, hums, co2, airflow, labels

def make_balanced_dataset(n_per_class=500):
    t0, h0, c0, a0, y0 = generate_poor(n_per_class)
    t1, h1, c1, a1, y1 = generate_moderate(n_per_class)
    t2, h2, c2, a2, y2 = generate_good(n_per_class)

    temps = np.concatenate([t0, t1, t2])
    hums  = np.concatenate([h0, h1, h2])
    co2   = np.concatenate([c0, c1, c2])
    airflow = np.concatenate([a0, a1, a2])
    labels  = np.concatenate([y0, y1, y2])

    df = pd.DataFrame({
        "temp": temps,
        "humidity": hums,
        "co2": co2,
        "airflow": airflow,
        "kpi_label": labels
    })
    return df

if __name__ == "__main__":
    df = make_balanced_dataset(500)
    print(df["kpi_label"].value_counts(normalize=True))
    df.to_csv("ventilation_kpi_balanced.csv", index=False)
    print("Saved to ventilation_kpi_balanced.csv")


kpi_label
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64
Saved to ventilation_kpi_balanced.csv


In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# 1) Load balanced dataset
df = pd.read_csv("ventilation_kpi_balanced.csv")
print(df["kpi_label"].value_counts(normalize=True))

X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values

# 2) Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 3) Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

joblib.dump(scaler, "vent_kpi_scaler.pkl")
print("Saved scaler to vent_kpi_scaler.pkl")

# 4) Model
num_features = 4
num_classes = 3

model = keras.Sequential([
    layers.Input(shape=(num_features,)),
    layers.Dense(32, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(8, activation="relu"),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

callback = keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[callback],
    verbose=1
)

test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test accuracy: {test_acc:.3f}")

model.save("vent_kpi_ann.h5")
print("Saved model to vent_kpi_ann.h5")


kpi_label
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64
Saved scaler to vent_kpi_scaler.pkl
Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 54ms/step - accuracy: 0.3016 - loss: 1.2125 - val_accuracy: 0.4667 - val_loss: 1.0792
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5192 - loss: 1.0461 - val_accuracy: 0.7792 - val_loss: 0.9062
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8075 - loss: 0.8596 - val_accuracy: 0.7875 - val_loss: 0.6813
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.8056 - loss: 0.6593 - val_accuracy: 0.8625 - val_loss: 0.4911
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.8603 - loss: 0.4766 - val_accuracy: 0.9125 - val_loss: 0.3634
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m



Test accuracy: 0.973
Saved model to vent_kpi_ann.h5


In [26]:
import joblib
import pandas as pd
import numpy as np
import tensorflow as tf

df = pd.read_csv("ventilation_kpi_balanced.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values

np.random.seed(42)
indices = np.random.choice(len(X), size=300, replace=False)
X_sample = X[indices]
y_sample = y[indices]

scaler = joblib.load("vent_kpi_scaler.pkl")
X_scaled = scaler.transform(X_sample)

# Keras model
keras_model = tf.keras.models.load_model("vent_kpi_ann.h5")
keras_probs = keras_model.predict(X_scaled)
keras_preds = np.argmax(keras_probs, axis=1)
keras_acc = np.mean(keras_preds == y_sample)
print(f"Keras accuracy on 300-sample subset: {keras_acc:.3f}")

# TFLite float model
interpreter = tf.lite.Interpreter(model_path="vent_kpi_ann_float.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input details:", input_details)
print("Output details:", output_details)


correct = 0
for i in range(len(X_scaled)):
    sample = X_scaled[i:i+1].astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], sample)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    pred_class = np.argmax(output, axis=1)[0]
    if pred_class == y_sample[i]:
        correct += 1

tflite_acc = correct / len(X_scaled)
print(f"TFLite accuracy on 300-sample subset: {tflite_acc:.3f}")




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Keras accuracy on 300-sample subset: 0.967
Input details: [{'name': 'serving_default_input_layer_1:0', 'index': 0, 'shape': array([1, 4], dtype=int32), 'shape_signature': array([-1,  4], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
Output details: [{'name': 'StatefulPartitionedCall_1:0', 'index': 7, 'shape': array([1, 3], dtype=int32), 'shape_signature': array([-1,  3], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
TFLite accuracy on 300-sample subset: 0.470


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [27]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf

# 1) Load balanced data
df = pd.read_csv("ventilation_kpi_balanced.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values

# pick a fixed subset
np.random.seed(42)
indices = np.random.choice(len(X), size=10, replace=False)
X_sample = X[indices]
y_sample = y[indices]

# 2) Load scaler (same used in training)
scaler = joblib.load("vent_kpi_scaler.pkl")
X_scaled = scaler.transform(X_sample)

# 3) Load Keras model
keras_model = tf.keras.models.load_model("vent_kpi_ann.h5")
keras_probs = keras_model.predict(X_scaled)
keras_preds = np.argmax(keras_probs, axis=1)

print("=== Keras predictions ===")
for i in range(10):
    print(f"True: {y_sample[i]}, Pred: {keras_preds[i]}, Probs: {keras_probs[i]}")

# 4) Load TFLite model (FLOAT version)
interpreter = tf.lite.Interpreter(model_path="vent_kpi_ann_float.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("\nInput details:", input_details)
print("Output details:", output_details)

print("\n=== TFLite predictions ===")
tflite_preds = []
for i in range(10):
    sample = X_scaled[i:i+1].astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], sample)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    pred_class = np.argmax(output, axis=1)[0]
    tflite_preds.append(pred_class)
    print(f"True: {y_sample[i]}, Pred: {pred_class}, Probs: {output[0]}")

# 5) Quick accuracies on this subset
keras_acc = np.mean(keras_preds == y_sample)
tflite_acc = np.mean(np.array(tflite_preds) == y_sample)
print(f"\nKeras acc on 10-sample subset: {keras_acc:.3f}")
print(f"TFLite acc on 10-sample subset: {tflite_acc:.3f}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
=== Keras predictions ===
True: 2, Pred: 2, Probs: [4.8588108e-06 2.6566868e-03 9.9733847e-01]
True: 2, Pred: 2, Probs: [1.2085118e-05 3.8274087e-03 9.9616045e-01]
True: 0, Pred: 1, Probs: [0.36798218 0.6256703  0.00634748]
True: 0, Pred: 0, Probs: [0.9184234  0.07556049 0.00601604]
True: 0, Pred: 0, Probs: [0.9966864  0.00225001 0.00106363]
True: 1, Pred: 1, Probs: [0.00329261 0.94542724 0.05128022]
True: 2, Pred: 2, Probs: [1.0596546e-05 8.1666773e-03 9.9182278e-01]
True: 1, Pred: 1, Probs: [0.0028445 0.9779547 0.0192008]
True: 2, Pred: 2, Probs: [9.4617817e-06 5.5967807e-03 9.9439371e-01]
True: 0, Pred: 0, Probs: [0.98515815 0.01373827 0.00110361]

Input details: [{'name': 'serving_default_input_layer_1:0', 'index': 0, 'shape': array([1, 4], dtype=int32), 'shape_signature': array([-1,  4], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], d

    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [28]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# 1) Load balanced dataset
df = pd.read_csv("ventilation_kpi_balanced.csv")
print("Class distribution:")
print(df["kpi_label"].value_counts(normalize=True))

X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values

# 2) Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 3) Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

joblib.dump(scaler, "vent_kpi_scaler.pkl")
print("Saved scaler to vent_kpi_scaler.pkl")

# 4) Define model
num_features = 4
num_classes = 3

model = keras.Sequential([
    layers.Input(shape=(num_features,)),
    layers.Dense(32, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(8, activation="relu"),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

callback = keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[callback],
    verbose=1
)

test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Final Keras test accuracy: {test_acc:.3f}")

# 5) Save Keras model
model.save("vent_kpi_ann.h5")
print("Saved model to vent_kpi_ann.h5")

# 6) Convert THIS SAME MODEL instance to TFLite (float)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open("vent_kpi_ann_float.tflite", "wb") as f:
    f.write(tflite_model)

print("Saved vent_kpi_ann_float.tflite")


Class distribution:
kpi_label
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64
Saved scaler to vent_kpi_scaler.pkl
Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 53ms/step - accuracy: 0.3482 - loss: 1.0869 - val_accuracy: 0.6042 - val_loss: 0.9315
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.6187 - loss: 0.9098 - val_accuracy: 0.6958 - val_loss: 0.7516
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.6896 - loss: 0.7189 - val_accuracy: 0.7000 - val_loss: 0.5950
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.6899 - loss: 0.5829 - val_accuracy: 0.7125 - val_loss: 0.4902
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.7391 - loss: 0.4872 - val_accuracy: 0.7875 - val_loss: 0.4146
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Final Keras test accuracy: 0.957
Saved model to vent_kpi_ann.h5
Saved artifact at '/tmp/tmp3cl2y29r'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 4), dtype=tf.float32, name='keras_tensor_45')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  134168275264784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275259792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275258448: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275260752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275256912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275260368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275261904: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134168275262480: TensorSpec(shape=(), dtype=tf.resource, name=None)
Saved vent_kpi_ann_float.tflite


In [30]:
print("Using TFLite model: vent_kpi_ann_float.tflite")
print("Using scaler: vent_kpi_scaler.pkl")


Using TFLite model: vent_kpi_ann_float.tflite
Using scaler: vent_kpi_scaler.pkl


In [31]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf

print("Using TFLite model: vent_kpi_ann_float.tflite")
print("Using scaler: vent_kpi_scaler.pkl")

# 1) Load data
df = pd.read_csv("ventilation_kpi_balanced.csv")
X = df[["temp", "humidity", "co2", "airflow"]].values
y = df["kpi_label"].values

# choose a subset for debugging
np.random.seed(42)
indices = np.random.choice(len(X), size=10, replace=False)
X_sample = X[indices]
y_sample = y[indices]

# 2) Load scaler
scaler = joblib.load("vent_kpi_scaler.pkl")
X_scaled = scaler.transform(X_sample)

# 3) Keras model
keras_model = tf.keras.models.load_model("vent_kpi_ann.h5")
keras_probs = keras_model.predict(X_scaled)
keras_preds = np.argmax(keras_probs, axis=1)

print("=== Keras predictions ===")
for i in range(10):
    print(f"True: {y_sample[i]}, Pred: {keras_preds[i]}, Probs: {keras_probs[i]}")

# 4) TFLite model (FLOAT)
interpreter = tf.lite.Interpreter(model_path="vent_kpi_ann_float.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("\nInput details:", input_details)
print("Output details:", output_details)

print("\n=== TFLite predictions ===")
tflite_preds = []
for i in range(10):
    sample = X_scaled[i:i+1].astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], sample)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    pred_class = np.argmax(output, axis=1)[0]
    tflite_preds.append(pred_class)
    print(f"True: {y_sample[i]}, Pred: {pred_class}, Probs: {output[0]}")

keras_acc = np.mean(keras_preds == y_sample)
tflite_acc = np.mean(np.array(tflite_preds) == y_sample)
print(f"\nKeras acc on 10-sample subset: {keras_acc:.3f}")
print(f"TFLite acc on 10-sample subset: {tflite_acc:.3f}")




Using TFLite model: vent_kpi_ann_float.tflite
Using scaler: vent_kpi_scaler.pkl
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
=== Keras predictions ===
True: 2, Pred: 2, Probs: [2.8285987e-05 1.1964604e-02 9.8800713e-01]
True: 2, Pred: 2, Probs: [5.8625450e-05 1.6538963e-02 9.8340243e-01]
True: 0, Pred: 1, Probs: [0.3265195  0.65673494 0.01674562]
True: 0, Pred: 0, Probs: [0.91585314 0.07898689 0.00515999]
True: 0, Pred: 0, Probs: [9.8690325e-01 1.2804362e-02 2.9233139e-04]
True: 1, Pred: 1, Probs: [0.0206587 0.7586871 0.2206542]
True: 2, Pred: 2, Probs: [1.6046451e-04 4.0786415e-02 9.5905322e-01]
True: 1, Pred: 1, Probs: [0.00372432 0.93849725 0.05777847]
True: 2, Pred: 2, Probs: [1.5004676e-04 3.0441247e-02 9.6940875e-01]
True: 0, Pred: 0, Probs: [9.5589375e-01 4.3462984e-02 6.4328336e-04]

Input details: [{'name': 'serving_default_keras_tensor_45:0', 'index': 0, 'shape': array([1, 4], dtype=int32), 'shape_signature': array([-1,  4], dtype=int32), 'dtype': 

    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [32]:
from google.colab import files

files.download("vent_kpi_ann_float.tflite")
files.download("vent_kpi_scaler.pkl")
files.download("ventilation_kpi_balanced.csv")
files.download("vent_kpi_ann.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [33]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive
