In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
import numpy as np

# Define a simple sequence-to-sequence model for text-to-text transformation
def create_simple_llm_model(input_vocab_size, output_vocab_size, input_seq_length, output_seq_length):
    # Encoder
    encoder_input = layers.Input(shape=(input_seq_length,))
    encoder_embedding = layers.Embedding(input_vocab_size, 32)(encoder_input)
    encoder_lstm = layers.LSTM(64, return_state=True)
    encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
    encoder_states = [state_h, state_c]

    # Decoder
    decoder_input = layers.Input(shape=(output_seq_length,))
    decoder_embedding = layers.Embedding(output_vocab_size, 32)(decoder_input)
    decoder_lstm = layers.LSTM(64, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
    decoder_dense = layers.Dense(output_vocab_size, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model
    model = keras.Model([encoder_input, decoder_input], decoder_outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Example: Temperature input mapped to recommendations
temperature_data = [
    ("The temperature is 10°C", "Cooling recommended."),
    ("The temperature is 20°C", "Cooling recommended."),
    ("The temperature is 25°C", "Temperature is acceptable."),
    ("The temperature is 26°C", "Temperature is acceptable."),
    ("The temperature is 27°C", "Temperature is acceptable."),
    ("The temperature is 30°C", "Heating recommended."),
    ("The temperature is 35°C", "Heating recommended."),
    ("The temperature is 40°C", "Heating recommended.")
]

# Preprocess the data into a form suitable for the model
def preprocess_data(temperature_data):
    input_texts = []
    output_texts = []
    for temp, rec in temperature_data:
        input_texts.append(temp)
        output_texts.append(rec)
    return input_texts, output_texts

input_texts, output_texts = preprocess_data(temperature_data)

# Tokenize the text data (convert characters to indices)
tokenizer_input = tf.keras.preprocessing.text.Tokenizer(char_level=False)
tokenizer_input.fit_on_texts(input_texts)
input_seq = tokenizer_input.texts_to_sequences(input_texts)

tokenizer_output = tf.keras.preprocessing.text.Tokenizer(char_level=False)
tokenizer_output.fit_on_texts(output_texts)
output_seq = tokenizer_output.texts_to_sequences(output_texts)

# Pad sequences to ensure uniform length
max_input_length = max(len(seq) for seq in input_seq)
max_output_length = max(len(seq) for seq in output_seq)

input_seq_padded = tf.keras.preprocessing.sequence.pad_sequences(input_seq, maxlen=max_input_length, padding='post')
output_seq_padded = tf.keras.preprocessing.sequence.pad_sequences(output_seq, maxlen=max_output_length, padding='post')

# Convert to one-hot encoded vectors for the decoder output
output_seq_one_hot = tf.keras.utils.to_categorical(output_seq_padded, num_classes=len(tokenizer_output.word_index)+1)

# Model input parameters
input_vocab_size = len(tokenizer_input.word_index) + 1
output_vocab_size = len(tokenizer_output.word_index) + 1

# Create the model
model = create_simple_llm_model(input_vocab_size, output_vocab_size, max_input_length, max_output_length)

# Train the model
model.fit([input_seq_padded, output_seq_padded], output_seq_one_hot, epochs=100, batch_size=8)

# Save the model
model.save('simple_temperature_llm.h5')



Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.2083 - loss: 1.9476
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2083 - loss: 1.9431
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2083 - loss: 1.9386
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2083 - loss: 1.9341
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.2083 - loss: 1.9294
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2083 - loss: 1.9246
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2083 - loss: 1.9195
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2083 - loss: 1.9142
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences  # Import pad_sequences

# Load the trained model
model = tf.keras.models.load_model('simple_temperature_llm.h5')
# For simplicity, we'll assume you have the tokenizer already available. Here's an example:


# Define a simple preprocessing function (use your tokenizer here)
def preprocess_input(input_text):
    # Tokenize the input text (temperature info)
    input_seq = tokenizer_input.texts_to_sequences([input_text])
    # Pad sequences to the same length as used during training
    # Get the actual max_input_length from the model's input shape
    max_input_length = model.inputs[0].shape[1]
    input_seq_padded = pad_sequences(input_seq, maxlen=max_input_length, padding='post')
    return input_seq_padded

# Define a function to decode the output sequence from indices back to text
def decode_output(output_seq):
    return tokenizer_output.sequences_to_texts(output_seq)[0]

# Test input (e.g., temperature info)
input_text = "The temperature is 30°C"

# Preprocess input
input_seq_padded = preprocess_input(input_text)

# Prepare a dummy output sequence to pass (just for prediction)
# Get the actual max_output_length from the model's input shape
max_output_length = model.inputs[1].shape[1]
dummy_output = np.zeros((1, max_output_length))  # Simple 1-step output (length of 1)



# Make a prediction
predicted_output = model.predict([input_seq_padded, dummy_output])

# Decode the output
predicted_text = decode_output(np.argmax(predicted_output, axis=-1))

print("Input Text: ", input_text)
print("Predicted Recommendation: ", predicted_text)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321ms/step
Input Text:  The temperature is 30°C
Predicted Recommendation:  heating recommended


In [None]:
# Convert Keras model to a tflite model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Enable resource variables
converter.experimental_enable_resource_variables = True
# Set the supported ops
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS,
  tf.lite.OpsSet.SELECT_TF_OPS
]
# Disable experimental lowering of tensor list ops
converter._experimental_lower_tensor_list_ops = False
tflite_model = converter.convert()

open('simple_temperature_llm.tflite', 'wb').write(tflite_model)

Saved artifact at '/tmp/tmpte0x3w8q'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 4), dtype=tf.float32, name='input_layer_2'), TensorSpec(shape=(None, 3), dtype=tf.float32, name='input_layer_3')]
Output Type:
  TensorSpec(shape=(None, 3, 7), dtype=tf.float32, name=None)
Captures:
  136747922054784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922060944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922444480: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922060064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922446416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922445536: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922447120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922448880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922447648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136747922440960

87256

In [1]:
# Function: Convert some hex value into an array for C programming
def hex_to_c_array(hex_data, var_name):

  c_str = ''

  # Create header guard
  c_str += '#ifndef ' + var_name.upper() + '_H\n'
  c_str += '#define ' + var_name.upper() + '_H\n\n'

  # Add array length at top of file
  c_str += '\nunsigned int ' + var_name + '_len = ' + str(len(hex_data)) + ';\n'

  # Declare C variable
  c_str += 'unsigned char ' + var_name + '[] = {'
  hex_array = []
  for i, val in enumerate(hex_data) :

    # Construct string from hex
    hex_str = format(val, '#04x')

    # Add formatting so each line stays within 80 characters
    if (i + 1) < len(hex_data):
      hex_str += ','
    if (i + 1) % 12 == 0:
      hex_str += '\n '
    hex_array.append(hex_str)

  # Add closing brace
  c_str += '\n ' + format(' '.join(hex_array)) + '\n};\n\n'

  # Close out header guard
  c_str += '#endif //' + var_name.upper() + '_H'

  return c_str

In [6]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving simple_temperature_llm.tflite to simple_temperature_llm.tflite
User uploaded file "simple_temperature_llm.tflite" with length 87256 bytes


In [7]:


# Convert the tflite model to a C array
with open('simple_temperature_llm.tflite', 'rb') as f:
  tflite_model_data = f.read()

c_array_code = hex_to_c_array(tflite_model_data, 'tflite_model')

# Save the C array code to a file
with open('tflite_model.h', 'w') as f:
  f.write(c_array_code)

In [8]:
from google.colab import files

files.download('tflite_model.h')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>