# AI Training
## Edge model training

In [1]:
!python3 -m pip install tensorflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler




In [120]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Input, Model, layers
from sklearn.model_selection import train_test_split

# Load and preprocess data
RAIN_THRESHHOLD = 0.5
data = pd.read_csv('weather_data.csv', low_memory=False)
data = data.sample(frac=1, random_state=2).reset_index(drop=True)
data = data.replace('', float('nan')).dropna()
data = data.replace(' ', float('nan'))
data = data.dropna(subset=['wetb', 'vappr', 'rhum', 'vis'])
X = data.drop(columns=['rain','date'])
y = data['rain']
print(data.isna().sum())

# Ensure all data is numeric
X = X.astype(float)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape y to (num_samples, 1) for regression
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

# Define the model
inputs = Input(shape=(X_train.shape[1],))
x = layers.Dense(64, activation='relu', name='e1')(inputs)
x_edge = layers.Dense(32, activation='relu', name='e2')(x)
edge_outputs = layers.Dense(1, activation='linear', name='edge_output')(x_edge)
x = layers.Dense(64, activation='relu', name='controlmodel')(x_edge)
x = layers.Dense(128, activation='relu', name='l1')(x)
x = layers.Dense(256, activation='relu', name='l2')(x)
x = layers.Dense(128, activation='relu', name='l3')(x)
x = layers.Dense(64, activation='relu', name='l4')(x)
control_outputs = layers.Dense(1, activation='linear', name='control_output')(x)

# Create and compile the combined model
combined_model = Model(inputs=inputs, outputs=[edge_outputs, control_outputs])
combined_model.compile(optimizer='adam',
                       loss={'edge_output': 'mean_squared_error',
                             'control_output': 'mean_squared_error'},
                       metrics={'edge_output': 'mae',
                                'control_output': 'mae'})

# Train the combined model
combined_model.fit(X_train, [y_train, y_train], 
                   epochs=20, batch_size=32, 
                   validation_data=(X_test, [y_test, y_test]))

# Define the edge_model
edge_model = Model(inputs=inputs, outputs=[combined_model.get_layer('e2').output, edge_outputs])

# x§This needs training 
# Set last as untrainable, then train last ones, this is not needed for contorl model becasue its last output is trained when entire model 
# gets trained
for layer in edge_model.layers:
    layer.trainable = False

edge_model.get_layer('edge_output').trainable= True

edge_model.compile(optimizer='adam',
                   loss={'e2': None, 'edge_output': 'mean_squared_error'},
                   metrics={'edge_output': 'mae'})

history = edge_model.fit(X_train, [y_train, y_train], 
               epochs=10, batch_size=32, 
               validation_data=(X_test, [y_test, y_test]))

# Define the control_model
control_model_input = Input(shape=(32,))  # Shape of the intermediate layer output
z = combined_model.get_layer('controlmodel')(control_model_input)
z = combined_model.get_layer('l1')(z)
z = combined_model.get_layer('l2')(z)
z = combined_model.get_layer('l3')(z)
z = combined_model.get_layer('l4')(z)
control_model_outputs = combined_model.get_layer('control_output')(z)
control_model = Model(inputs=control_model_input, outputs=control_model_outputs)

# Inspect the models
edge_model.summary()
control_model.summary()


date     0
ind      0
rain     0
ind1     0
temp     0
ind2     0
wetb     0
dewpt    0
vappr    0
rhum     0
msl      0
ind3     0
wdsp     0
ind4     0
wddir    0
ww       0
w        0
sun      0
vis      0
clht     0
clamt    0
dtype: int64
Epoch 1/20
[1m3184/3184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - control_output_loss: 2703.8560 - control_output_mae: 7.8365 - edge_output_loss: 46748.5000 - edge_output_mae: 45.3577 - loss: 49452.3828 - val_control_output_loss: 1.1386 - val_control_output_mae: 0.9208 - val_edge_output_loss: 16.6874 - val_edge_output_mae: 3.5229 - val_loss: 17.8266
Epoch 2/20
[1m3184/3184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - control_output_loss: 4.4766 - control_output_mae: 0.8798 - edge_output_loss: 521.4469 - edge_output_mae: 8.8528 - loss: 525.9230 - val_control_output_loss: 0.5292 - val_control_output_mae: 0.5534 - val_edge_output_loss: 1.7321 - val_edge_output_mae: 0.7892 - val_loss: 2.2616
Epoch 3/20
[

In [121]:

results = combined_model.evaluate(X_test, [y_test, y_test])
print("Test Loss:", results[0])
print("Edge Output - MAE:", results[1])
print("Edge Output - MSE - How much its off by on average:", results[2])
print("Control Output - MAE:", results[3])
print("Control Output - MSE:", results[4])

[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 520us/step - control_output_loss: 0.2220 - control_output_mae: 0.1973 - edge_output_loss: 1.1925 - edge_output_mae: 0.9446 - loss: 1.4145
Test Loss: 1.43659245967865
Edge Output - MAE: 1.2024098634719849
Edge Output - MSE - How much its off by on average: 0.2340036779642105
Control Output - MAE: 0.1968470811843872
Control Output - MSE: 0.9444083571434021


In [122]:

edge_model.export("expt")
converter = tf.lite.TFLiteConverter.from_saved_model('./expt')
tflite_model = converter.convert()
with open('../deployment/models/edge_model.tflite', 'wb') as f:
  f.write(tflite_model)

edge_model.save('../deployment/models/edge_model.keras')
    
control_model.save('../deployment/models/control_model.keras')


INFO:tensorflow:Assets written to: expt/assets


INFO:tensorflow:Assets written to: expt/assets


Saved artifact at 'expt'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 19), dtype=tf.float32, name='keras_tensor_559')
Output Type:
  List[TensorSpec(shape=(None, 32), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)]
Captures:
  6281939472: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6280865232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6280870032: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6280869456: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6280867152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6280869840: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1742325378.279301 7326384 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1742325378.280193 7326384 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-03-18 19:16:18.283135: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: ./expt
2025-03-18 19:16:18.284457: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-03-18 19:16:18.284465: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: ./expt
2025-03-18 19:16:18.291681: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-03-18 19:16:18.326698: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: ./expt
2025-03-18 19:16:18.331200: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 48103 microseconds.


In [123]:
# Check combined model's training loss and validation loss
training_loss = history.history['loss']
validation_loss = history.history['val_loss']

print("Training Loss:", training_loss)
print("Validation Loss:", validation_loss)

raw = [2, 0, 13.4, 0, 13.4, 13.4, 15.4, 100, 1022.1, 2, 8, 2, 220, 50, 64, 0.0, 4000, 3, 8]
datapoint = np.array([raw], dtype=np.float32)

# Predict using the combined model
output_data = combined_model.predict(datapoint)

# output_data will be a list of two arrays, one for each output (edge_output, control_output)
edge_output = output_data[0][0]  # First output: edge_output (this is the first array in the list)
control_output = output_data[1][0]  # Second output: control_output (this is the second array in the list)

print("Edge Output:", edge_output)  # This should give you the edge prediction
print("Control Output:", control_output)  # This should give you the control prediction


Training Loss: [5.789871692657471, 0.5210365653038025, 0.6209264993667603, 0.5918828845024109, 0.645732581615448, 0.6143204569816589, 0.5428948998451233, 0.651405930519104, 0.5996027588844299, 0.563533365726471]
Validation Loss: [0.3759338855743408, 0.6198548674583435, 0.42595940828323364, 0.6275033354759216, 0.3161601126194, 0.3311826288700104, 0.9518794417381287, 0.5015499591827393, 0.3200078308582306, 1.2025336027145386]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Edge Output: [0.99606246]
Control Output: [1.2060171]


# On the AI Edge Server

In [124]:
from ai_edge_litert.interpreter import Interpreter
interpreter = Interpreter(model_path='../deployment/models/edge_model.tflite')
signatures = interpreter.get_signature_list()
interpreter.allocate_tensors()

# FEED IN THE INPUT DATA
input_details = interpreter.get_input_details()
input_data = np.array([[0,0,8.7,0,8.6,8.5,11.1,99,1003.1,2,12,2,350,61,66,0.0,15000,45,8]], dtype=np.float32)
# Set input tensor
input_index = input_details[0]['index']
interpreter.set_tensor(input_index, input_data)
# GET THE OUTPUT DATA
interpreter.invoke()
output_details = interpreter.get_output_details()
rain_data = interpreter.get_tensor(output_details[0]['index'])
node_data = interpreter.get_tensor(output_details[1]['index'])
node_data = node_data[0] # Send this to control
prob_rain = rain_data[0]


# Determine if it's raining
RAIN_THRESHOLD = 0.5
prob_rain
node_data

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


array([ 370.9547 ,  103.4874 ,    0.     ,  644.8587 ,    0.     ,
          0.     ,    0.     ,    0.     ,    0.     ,    0.     ,
        904.62415,    0.     ,    0.     ,    0.     ,    0.     ,
          0.     ,  856.35223,    0.     ,    0.     ,    0.     ,
          0.     ,    0.     ,    0.     ,    0.     ,    0.     ,
        407.45276,    0.     ,    0.     ,    0.     ,    0.     ,
       1863.0437 ,    0.     ], dtype=float32)

# Control Model
## On control model

In [125]:
import numpy as np
import tensorflow as tf
control_model = tf.keras.models.load_model('../deployment/models/control_model.keras')

## Vars that need to be defined for the function 
# Recieve node_data, data_result
BATCH_SIZE = 1 
X_data = []  
Y_data = []  
data_result = 0.4
data_result = np.array([data_result])

edge_model_result = node_data
edge_model_result = np.reshape(edge_model_result, (-1, 32)) 

control_predictions = control_model.predict(edge_model_result)
print("Control predictions:", control_predictions)

# IF data result exists, add it and intermediate layer to batches
if data_result.size > 0: 
    # Add data_result to Y and beginning nodes to X
    X_data.append(edge_model_result) 
    Y_data.append(data_result) 
    if len(X_data) >= BATCH_SIZE:
        numpy_X_data = np.vstack(X_data)
        numpy_Y_data = np.vstack(Y_data)  

        # Clear lists for the next batch
        X_data.clear()
        Y_data.clear()
        
        dataset = tf.data.Dataset.from_tensor_slices((numpy_X_data, numpy_Y_data))
        dataset = dataset.batch(BATCH_SIZE)
        # Compile and train the control_model
        edge_model_result = tf.convert_to_tensor(edge_model_result, dtype=tf.float32)  # Convert to Tensor
        control_model.compile(optimizer='adam',
                              loss='mean_squared_error',
                              metrics=['mae'])

        #Get edge gradients
        with tf.GradientTape(persistent=True) as tape:
            tape.watch(edge_model_result)  # Track edge model output
            control_predictions = control_model(edge_model_result, training=True)
            loss = tf.keras.losses.MeanSquaredError()(data_result, control_predictions)
        edge_output_grad = tape.gradient(loss, edge_model_result)
        
        print("Training Model")
        # Below is how models are usually trained, we need to step down from the high level api of fit() and get slightly lower down in
        # tensorflow mush
        # history = control_model.fit(numpy_X_data, numpy_Y_data,
        #                             epochs=1, batch_size=BATCH_SIZE)
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

        for epoch in range(5):
            with tf.GradientTape() as tape: # This will track computations performed on tensors inside the block 
                for batch_X, batch_Y in dataset:
                    predictions = control_model(batch_X, training=True) # This is the forward pass 
                    loss = tf.keras.losses.MeanSquaredError()(batch_Y, predictions)
                control_gradients = tape.gradient(loss, control_model.trainable_variables) # gradients get calculated via calling tape.gradient 
                optimizer.apply_gradients(zip(control_gradients, control_model.trainable_variables))
            print(f"Epoch {epoch+1}, Loss: {loss.numpy()}")
            


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Control predictions: [[0.27650845]]
Training Model
Epoch 1, Loss: 0.015250164084136486
Epoch 2, Loss: 0.01060198713093996
Epoch 3, Loss: 0.007098429836332798
Epoch 4, Loss: 0.004537483677268028
Epoch 5, Loss: 0.0006160945049487054


In [126]:
!pip show tensorflow

Name: tensorflow
Version: 2.18.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras, libclang, ml-dtypes, numpy, opt-einsum, packaging, protobuf, requests, setuptools, six, tensorboard, termcolor, typing-extensions, wrapt
Required-by: 


## Split Learning on Edge v1

In [127]:
# Load edge model
edge_learning_model = tf.keras.models.load_model("../deployment/models/edge_model.keras")

# Reinitialize the optimizer
optimizer = tf.keras.optimizers.Adam()
edge_learning_model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

for layer in edge_learning_model.layers:
    layer.trainable=True
edge_learning_model.get_layer("edge_output").trainable = False
# Apply the received gradients
grad_updates = [(gradients[10], edge_learning_model.trainable_variables[-2])]
optimizer.apply_gradients(grad_updates)

<Variable path=adam/iteration, shape=(), dtype=int64, value=1>

In [128]:
control_model.summary()
edge_model.summary()

In [147]:
edge_learning_model = tf.keras.models.load_model("../deployment/models/edge_model.keras")
for layer in edge_learning_model.layers:
    layer.trainable = True
edge_learning_model.get_layer("edge_output").trainable=False

In [166]:
import tensorflow as tf
import numpy as np

# Load edge model
edge_learning_model = tf.keras.models.load_model("../deployment/models/edge_model.keras")
input_data = np.array([[0,0,8.7,0,8.6,8.5,11.1,99,1003.1,2,12,2,350,61,66,0.0,15000,45,8]], dtype=np.float32)
print(input_data)
edge_learning_model = tf.keras.models.load_model("../deployment/models/edge_model.keras")
for layer in edge_learning_model.layers:
    layer.trainable = True
edge_learning_model.get_layer("edge_output").trainable=False

# Reinitialize the optimizer
print(edge_learning_model.trainable_variables)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
edge_learning_model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

# Assume edge_output_grad is received from the control machine
edge_output_grad = tf.convert_to_tensor(edge_output_grad, dtype=tf.float32)
edge_output_grad_temp=edge_output_grad
# Perform backpropagation on edge model
with tf.GradientTape() as tape:
    edge_output = edge_learning_model(input_data, training=True)  # Forward pass
tape.watch(edge_output)

# Compute edge model gradients
edge_model_grads = tape.gradient(edge_output, edge_learning_model.trainable_variables, output_gradients=edge_output_grad)

# Apply gradients to update edge model
optimizer.apply_gradients(zip(edge_model_grads, edge_learning_model.trainable_variables))

#Output layer for inference now needs to be retrained so just unfreeze it and refreeze other layers 


[[0.0000e+00 0.0000e+00 8.7000e+00 0.0000e+00 8.6000e+00 8.5000e+00
  1.1100e+01 9.9000e+01 1.0031e+03 2.0000e+00 1.2000e+01 2.0000e+00
  3.5000e+02 6.1000e+01 6.6000e+01 0.0000e+00 1.5000e+04 4.5000e+01
  8.0000e+00]]
[<Variable path=e1/kernel, shape=(19, 64), dtype=float32, value=[[-0.17289169  0.07072189 -0.05398406 ...  1.0727086  -0.02976517
  -0.01256253]
 [ 0.01366728  0.21567327  0.29743728 ...  0.30747536  0.09442616
   0.21558851]
 [ 0.14047492  0.16423541 -0.3698767  ... -0.18042594 -0.18231882
   0.14491336]
 ...
 [-0.16987199  0.00195603  0.0373666  ... -0.01674793  0.0634799
   0.23821852]
 [-0.16831516  0.12744576 -0.19917376 ... -0.08832585  0.01801423
  -0.18682417]
 [ 0.07383933 -0.06046855 -0.27734149 ... -0.18795775 -0.2964336
  -0.08152622]]>, <Variable path=e1/bias, shape=(64,), dtype=float32, value=[-0.0209678  -0.02042335 -0.10733514 -0.03591165 -0.15299055 -0.01618449
  0.          0.         -0.09352917 -0.04611762 -0.09896345 -0.09997325
 -0.00125823 -0.05624

In [2]:
# S+1

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Input, Model, layers
from sklearn.model_selection import train_test_split

# Load and preprocess data
RAIN_THRESHHOLD = 0.5
data = pd.read_csv('weather_data.csv', low_memory=False)
data = data.sample(frac=1, random_state=2).reset_index(drop=True)
data = data.replace('', float('nan')).dropna()
data = data.replace(' ', float('nan'))
data = data.dropna(subset=['wetb', 'vappr', 'rhum', 'vis'])
X = data.drop(columns=['rain','date'])
y = data['rain']
print(data.isna().sum())

# Ensure all data is numeric
X = X.astype(float)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape y to (num_samples, 1) for regression
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

# Define the model
inputs = Input(shape=(X_train.shape[1],))
x = layers.Dense(64, activation='relu', name='e1')(inputs)
edge_outputs = layers.Dense(1, activation='linear', name='edge_output')(x)
x = layers.Dense(64, activation='relu', name='controlmodel')(x)
x = layers.Dense(64, activation='relu', name='l1')(x)
x = layers.Dense(128, activation='relu', name='l2')(x)
x = layers.Dense(256, activation='relu', name='l3')(x)
x = layers.Dense(128, activation='relu', name='l4')(x)
x = layers.Dense(64, activation='relu', name='l5')(x)
control_outputs = layers.Dense(1, activation='linear', name='control_output')(x)

# Create and compile the combined model
combined_model = Model(inputs=inputs, outputs=[edge_outputs, control_outputs])
combined_model.compile(optimizer='adam',
                       loss={'edge_output': 'mean_squared_error',
                             'control_output': 'mean_squared_error'},
                       metrics={'edge_output': 'mae',
                                'control_output': 'mae'})

# Train the combined model
combined_model.fit(X_train, [y_train, y_train], 
                   epochs=20, batch_size=32, 
                   validation_data=(X_test, [y_test, y_test]))

# Define the edge_model
edge_model = Model(inputs=inputs, outputs=[combined_model.get_layer('e1').output, edge_outputs])

# x§This needs training 
# Set last as untrainable, then train last ones, this is not needed for contorl model becasue its last output is trained when entire model 
# gets trained
for layer in edge_model.layers:
    layer.trainable = False

edge_model.get_layer('edge_output').trainable= True

edge_model.compile(optimizer='adam',
                   loss={'e2': None, 'edge_output': 'mean_squared_error'},
                   metrics={'edge_output': 'mae'})

history = edge_model.fit(X_train, [y_train, y_train], 
               epochs=10, batch_size=32, 
               validation_data=(X_test, [y_test, y_test]))

# Define the control_model
control_model_input = Input(shape=(32,))  # Shape of the intermediate layer output
z = combined_model.get_layer('controlmodel')(control_model_input)
z = combined_model.get_layer('l1')(z)
z = combined_model.get_layer('l2')(z)
z = combined_model.get_layer('l3')(z)
z = combined_model.get_layer('l4')(z)
z = combined_model.get_layer('l5')(z)

control_model_outputs = combined_model.get_layer('control_output')(z)
control_model = Model(inputs=control_model_input, outputs=control_model_outputs)

# Inspect the models
edge_model.summary()
control_model.summary()

edge_model.export("expt")
converter = tf.lite.TFLiteConverter.from_saved_model('./expt')
tflite_model = converter.convert()
with open('../deployment/models/edge_modelS1.tflite', 'wb') as f:
  f.write(tflite_model)

edge_model.save('../deployment/models/edge_modelS1.keras')
    
control_model.save('../deployment/models/control_modelS1.keras')



date     0
ind      0
rain     0
ind1     0
temp     0
ind2     0
wetb     0
dewpt    0
vappr    0
rhum     0
msl      0
ind3     0
wdsp     0
ind4     0
wddir    0
ww       0
w        0
sun      0
vis      0
clht     0
clamt    0
dtype: int64
Epoch 1/20


ValueError: Expected keys ListWrapper(['controlmodel', 'control_output']) in loss dict, but found loss.keys()=['edge_output', 'control_output']