# AI Training
## Edge model training

In [53]:
!python3 -m pip install tensorflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler


python(16955) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.




In [25]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Input, Model, layers
from sklearn.model_selection import train_test_split

# Load and preprocess data
RAIN_THRESHHOLD = 0.5
data = pd.read_csv('weather_data.csv', low_memory=False)
data = data.sample(frac=1, random_state=2).reset_index(drop=True)
data = data.replace('', float('nan')).dropna()
data = data.replace(' ', float('nan'))
data = data.dropna(subset=['wetb', 'vappr', 'rhum', 'vis'])
X = data.drop(columns=['rain','date'])
y = data['rain']
print(data.isna().sum())

# Ensure all data is numeric
X = X.astype(float)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape y to (num_samples, 1) for regression
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

# Define the model
inputs = Input(shape=(X_train.shape[1],))
x = layers.Dense(64, activation='relu', name='e1')(inputs)
x_edge = layers.Dense(32, activation='relu', name='e2')(x)
edge_outputs = layers.Dense(1, activation='linear', name='edge_output')(x_edge)
x = layers.Dense(64, activation='relu', name='controlmodel')(x_edge)
x = layers.Dense(128, activation='relu', name='l1')(x)
x = layers.Dense(256, activation='relu', name='l2')(x)
x = layers.Dense(128, activation='relu', name='l3')(x)
x = layers.Dense(64, activation='relu', name='l4')(x)
control_outputs = layers.Dense(1, activation='linear', name='control_output')(x)

# Create and compile the combined model
combined_model = Model(inputs=inputs, outputs=[edge_outputs, control_outputs])
combined_model.compile(optimizer='adam',
                       loss={'edge_output': 'mean_squared_error',
                             'control_output': 'mean_squared_error'},
                       metrics={'edge_output': 'mae',
                                'control_output': 'mae'})

# Train the combined model
combined_model.fit(X_train, [y_train, y_train], 
                   epochs=20, batch_size=32, 
                   validation_data=(X_test, [y_test, y_test]))

# Define the edge_model
edge_model = Model(inputs=inputs, outputs=[combined_model.get_layer('e2').output, edge_outputs])

# x§This needs training 
# Set last as untrainable, then train last ones, this is not needed for contorl model becasue its last output is trained when entire model 
# gets trained
for layer in edge_model.layers:
    layer.trainable = False

edge_model.get_layer('edge_output').trainable = True

edge_model.compile(optimizer='adam',
                   loss={'e2': None, 'edge_output': 'mean_squared_error'},  # Ignore loss for 'e2', we only care about the output of the inference
                   metrics={'edge_output': 'mae'})

history = edge_model.fit(X_train, [y_train, y_train], 
               epochs=10, batch_size=32, 
               validation_data=(X_test, [y_test, y_test]))

# Define the control_model
control_model_input = Input(shape=(32,))  # Shape of the intermediate layer output
z = combined_model.get_layer('controlmodel')(control_model_input)
z = combined_model.get_layer('l1')(z)
z = combined_model.get_layer('l2')(z)
z = combined_model.get_layer('l3')(z)
z = combined_model.get_layer('l4')(z)
control_model_outputs = combined_model.get_layer('control_output')(z)
control_model = Model(inputs=control_model_input, outputs=control_model_outputs)

# Inspect the models
edge_model.summary()
control_model.summary()


date     0
ind      0
rain     0
ind1     0
temp     0
ind2     0
wetb     0
dewpt    0
vappr    0
rhum     0
msl      0
ind3     0
wdsp     0
ind4     0
wddir    0
ww       0
w        0
sun      0
vis      0
clht     0
clamt    0
dtype: int64
Epoch 1/20
[1m3184/3184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - control_output_loss: 1437.9135 - control_output_mae: 6.2386 - edge_output_loss: 34606.1289 - edge_output_mae: 35.6869 - loss: 36044.0508 - val_control_output_loss: 0.3612 - val_control_output_mae: 0.3811 - val_edge_output_loss: 7.4296 - val_edge_output_mae: 2.2446 - val_loss: 7.7915
Epoch 2/20
[1m3184/3184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - control_output_loss: 2.2256 - control_output_mae: 0.8053 - edge_output_loss: 748.0011 - edge_output_mae: 10.0709 - loss: 750.2266 - val_control_output_loss: 0.3366 - val_control_output_mae: 0.3604 - val_edge_output_loss: 7.0296 - val_edge_output_mae: 2.0016 - val_loss: 7.3665
Epoch 3/20
[1m

In [15]:

results = combined_model.evaluate(X_test, [y_test, y_test])
print("Test Loss:", results[0])
print("Edge Output - MAE:", results[1])
print("Edge Output - MSE - How much its off by on average:", results[2])
print("Control Output - MAE:", results[3])
print("Control Output - MSE:", results[4])

[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 479us/step - control_output_loss: 0.2200 - control_output_mae: 0.1751 - edge_output_loss: 3.1340 - edge_output_mae: 1.5192 - loss: 3.3541
Test Loss: 3.3811850547790527
Edge Output - MAE: 3.1479437351226807
Edge Output - MSE - How much its off by on average: 0.2329651266336441
Control Output - MAE: 0.1760137677192688
Control Output - MSE: 1.51903235912323


In [21]:

edge_model.export("expt")
converter = tf.lite.TFLiteConverter.from_saved_model('./expt')
tflite_model = converter.convert()
with open('../deployment/models/edge_model.tflite', 'wb') as f:
  f.write(tflite_model)

edge_model.save('../deployment/models/edge_model.keras')
    
control_model.save('../deployment/models/control_model.keras')


INFO:tensorflow:Assets written to: expt/assets


INFO:tensorflow:Assets written to: expt/assets


Saved artifact at 'expt'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 19), dtype=tf.float32, name='keras_tensor_54')
Output Type:
  List[TensorSpec(shape=(None, 32), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)]
Captures:
  4531610320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4531613584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4531617424: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4531613392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4531616272: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4531615888: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1741185165.733189   73505 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1741185165.735712   73505 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-03-05 14:32:45.738432: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: ./expt
2025-03-05 14:32:45.738945: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-03-05 14:32:45.738952: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: ./expt
2025-03-05 14:32:45.749383: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-03-05 14:32:45.825329: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: ./expt
2025-03-05 14:32:45.830744: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 92315 microseconds.


In [24]:
# Check combined model's training loss and validation loss
training_loss = history.history['loss']
validation_loss = history.history['val_loss']

print("Training Loss:", training_loss)
print("Validation Loss:", validation_loss)

raw = [2, 0, 13.4, 0, 13.4, 13.4, 15.4, 100, 1022.1, 2, 8, 2, 220, 50, 64, 0.0, 4000, 3, 8]
datapoint = np.array([raw], dtype=np.float32)

# Predict using the combined model
output_data = combined_model.predict(datapoint)

# output_data will be a list of two arrays, one for each output (edge_output, control_output)
edge_output = output_data[0][0]  # First output: edge_output (this is the first array in the list)
control_output = output_data[1][0]  # Second output: control_output (this is the second array in the list)

print("Edge Output:", edge_output)  # This should give you the edge prediction
print("Control Output:", control_output)  # This should give you the control prediction


Training Loss: [0.28839436173439026, 0.2710084021091461, 0.27692022919654846, 0.2741170823574066, 0.28040051460266113, 0.28103864192962646, 0.284660667181015, 0.2706926465034485, 0.2759763300418854, 0.28502851724624634]
Validation Loss: [0.21689669787883759, 0.5002687573432922, 0.2974936366081238, 0.4211593270301819, 1.8763623237609863, 1.7760847806930542, 0.32529234886169434, 0.3460192382335663, 1.0404915809631348, 0.2263903170824051]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Edge Output: [0.2518861]
Control Output: [0.14281805]


# On the AI Edge Server

In [8]:
from ai_edge_litert.interpreter import Interpreter
interpreter = Interpreter(model_path='../deployment/models/edge_model.tflite')
signatures = interpreter.get_signature_list()
interpreter.allocate_tensors()

# FEED IN THE INPUT DATA
input_details = interpreter.get_input_details()
input_data = np.array([[0,0,8.7,0,8.6,8.5,11.1,99,1003.1,2,12,2,350,61,66,0.0,15000,45,8]], dtype=np.float32)
# Set input tensor
input_index = input_details[0]['index']
interpreter.set_tensor(input_index, input_data)
# GET THE OUTPUT DATA
interpreter.invoke()
output_details = interpreter.get_output_details()
rain_data = interpreter.get_tensor(output_details[0]['index'])
node_data = interpreter.get_tensor(output_details[1]['index'])
node_data = node_data[0] # Send this to control
prob_rain = rain_data[0]


# Determine if it's raining
RAIN_THRESHOLD = 0.5
prob_rain
node_data

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


array([   0.      ,    0.      , 1736.833   ,    0.      ,    0.      ,
        122.79834 ,    0.      ,    0.      ,    0.      ,  498.57092 ,
        308.47418 ,    0.      ,    0.      ,    0.      , 2261.8982  ,
       1048.9276  ,    0.      ,    0.      ,  886.5067  ,    0.      ,
          0.      , 1980.9333  ,  846.6481  ,    0.      , 1223.5721  ,
          0.      ,   91.276794, 1690.1853  , 1646.5259  ,  399.7732  ,
          0.      ,  788.00134 ], dtype=float32)

# Control Model
## On control model

In [78]:
import numpy as np
import tensorflow as tf
control_model = tf.keras.models.load_model('../deployment/models/control_model.keras')

## Vars that need to be defined for the function 
# Recieve node_data, data_result
BATCH_SIZE = 1 
X_data = []  
Y_data = []  
data_result = 0.4
data_result = np.array([data_result])

edge_model_result = node_data
edge_model_result = np.reshape(edge_model_result, (-1, 32)) 

control_predictions = control_model.predict(edge_model_result)
print("Control predictions:", control_predictions)

# IF data result exists, add it and intermediate layer to batches
if data_result.size > 0: 
    # Add data_result to Y and beginning nodes to X
    X_data.append(edge_model_result) 
    Y_data.append(data_result) 
    if len(X_data) >= BATCH_SIZE:
        numpy_X_data = np.vstack(X_data)
        numpy_Y_data = np.vstack(Y_data)  

        # Clear lists for the next batch
        X_data.clear()
        Y_data.clear()
        
        dataset = tf.data.Dataset.from_tensor_slices((numpy_X_data, numpy_Y_data))
        dataset = dataset.batch(BATCH_SIZE)
        # Compile and train the control_model
        control_model.compile(optimizer='adam',
                              loss='mean_squared_error',
                              metrics=['mae'])
        
        print("Training Model")
        # Below is how models are usually trained, we need to step down from the high level api of fit() and get slightly lower down in
        # tensorflow mush
        # history = control_model.fit(numpy_X_data, numpy_Y_data,
        #                             epochs=1, batch_size=BATCH_SIZE)
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

        for epoch in range(5):
            with tf.GradientTape() as tape: # This will track computations performed on tensors inside the block 
                for batch_X, batch_Y in dataset:
                    predictions = control_model(batch_X, training=True) # This is the forward pass 
                    loss = tf.keras.losses.MeanSquaredError()(batch_Y, predictions)
                gradients = tape.gradient(loss, control_model.trainable_variables) # gradients get calculated via calling tape.gradient 
                optimizer.apply_gradients(zip(gradients, control_model.trainable_variables))
            print(f"Epoch {epoch+1}, Loss: {loss.numpy()}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
Control predictions: [[-0.01699629]]
Training Model
Epoch 1, Loss: 0.17388591170310974
Epoch 2, Loss: 0.12094246596097946
Epoch 3, Loss: 0.012411841191351414
Epoch 4, Loss: 0.011553991585969925
Epoch 5, Loss: 0.010628522373735905


In [7]:
!pip show tensorflow

Name: tensorflow
Version: 2.18.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras, libclang, ml-dtypes, numpy, opt-einsum, packaging, protobuf, requests, setuptools, six, tensorboard, termcolor, typing-extensions, wrapt
Required-by: 


## Split Learning on Edge

In [107]:
# Load edge model
edge_learning_model = tf.keras.models.load_model("../deployment/models/edge_model.keras")

# Reinitialize the optimizer
optimizer = tf.keras.optimizers.Adam()
edge_learning_model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

for layer in edge_learning_model.layers:
    layer.trainable=True
edge_learning_model.get_layer("edge_output").trainable = False
# Apply the received gradients
grad_updates = [(gradients[10], edge_learning_model.trainable_variables[-2])]
optimizer.apply_gradients(grad_updates)

<Variable path=adam/iteration, shape=(), dtype=int64, value=1>

In [106]:
control_model.summary()
edge_model.summary()