## CNN Training (Model Type: `ana-cont`)

Train a CNN network to extract the needle position of an analog dial based on **sin/cos coding and angle calculation**

### Basic Parameter

IMPORTANT: Do not rename any variables in this section — they are externally referenced in the GitHub action `Train Model`.

* `TFlite_MainType`: Model type name
* `TFlite_Version`: Model version identifier
* `TFlite_Size`: Model architecture defined by size
* `Input_Dir`: Input directory path containing training images
* `Output_Dir`: Output directory path where results (models, logs, etc.) will be saved
* `Input_Shape`: Image dimensions (width, height, channels)

In [None]:
# Model type (No need to adapt)
TFlite_MainType: str = 'ana-cont'

# Define model version (e.g. 1700 -> v17.00)
TFlite_Version: str  = 'xxxx'

# Choose model size
TFlite_Size: str     = 's0'
# Model size pool (Available models: `./src/models/ana_cont.py`)
Available_Model_Sizes = {'s0', 's1', 's2', 's3'}

# Input folder
# Images will be resized automatically 
# Exception: Using folder name: `data_resize_all`
Input_Dir: str  = 'data_raw_all'

# Output folder
Output_Dir: str = 'models/ana-cont'

# Input image size [width, height, channels]
Input_Shape = (32, 32, 3)


### Load Libraries

In [None]:
import os
import sys
import glob
from pathlib import Path
import random
import math
import numpy as np
import pandas as pd

import tensorflow as tf

from src.models.ana_cont import *
from src.utils.augmentation import random_invert_image, random_white_balance

from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from src.utils.plot_functions import (plot_loss, plot_dataset_distribution, plot_dataset_analog, 
                                      plot_dataset_analog_result, plot_divergence)
from src.models.evaluation import predict_and_evaluate, get_false_predictions


%matplotlib inline
np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)


# Make sure version is at least 4 characters long if defined with digits only (e.g. papermill paramter 100 -> 0100)
if str(TFlite_Version).isdigit() and len(str(TFlite_Version)) < 4:
    TFlite_Version = str(TFlite_Version).zfill(4)


# Validate model size input
if TFlite_Size not in Available_Model_Sizes:
    raise ValueError(f"Invalid TFlite_Size '{TFlite_Size}'. Must be one of: {', '.join(Available_Model_Sizes)}")


# Prepare folders
if not (Path(Input_Dir).exists() and Path(Input_Dir).is_dir()): # Check if input is availabe
    sys.exit(f"Folder '{Input_Dir}' does not exist.")
    
Path(Output_Dir).mkdir(parents=True, exist_ok=True)  # Create output folder if it doesn't exist


# Disable GPUs
try:
    tf.config.set_visible_devices([], 'GPU')
    visible_devices = tf.config.get_visible_devices()
    for device in visible_devices:
        assert device.device_type != 'GPU'
except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass


### Load images

* The images are expected in the "Input_Dir"
* The image size must be 32 x 32 with 3 color channels (RGB)
* The first 3 digits of image filename must contain the real value representation of the image:
  * Generic: `x.y_zzzz.jpg`
  * Example: `4.6_main_ana1_2019-06-02T050011.jpg`

| Filename Part | Description                  | Usage                    |
|---------------|------------------------------|--------------------------|
| x.y           | Represented value (e.g. 4.6) | **Value to be learned**  |
| _zzzz         | Further file description     | Not required / processed |

* The images are stored in the x_data[]
* The expected output for each image in the corresponding y_data[]
  * The periodic nature is reflected in a **sin/cos coding**, which allows to restore the angle/counter value with an arctan later on.
* The last step is a shuffle (from sklearn.utils) as the filenames are on order due to the encoding of the expected analog readout in the filename
* Split dataset into training and validation parts

In [None]:
files = glob.glob(f"{Input_Dir}/*.jpg")
num_files = len(files)

# Prepare data containers
f_data = np.empty(num_files, dtype="<U250")
x_data = np.empty((num_files, Input_Shape[0], Input_Shape[1], Input_Shape[2]), dtype="float32")
y_data = np.empty((num_files, 2), dtype="float32")
y_data_target = np.empty(num_files)

# Process files
for i, file in enumerate(files):
    # Read image file
    image_bytes = tf.io.read_file(file)
    image = tf.image.decode_image(image_bytes, channels=Input_Shape[2], expand_animations=False)

    # Resize image (if required)
    if image.shape[0] != Input_Shape[0] or image.shape[1] != Input_Shape[1]:
        image = tf.image.resize(image, [Input_Shape[0], Input_Shape[1]], method=tf.image.ResizeMethod.MITCHELLCUBIC)
        image = tf.clip_by_value(tf.cast(image, tf.float32), 0.0, 255.0)
    else:
        image = tf.cast(image, tf.float32)

    # Extract truth value from filename and calculate respective sin/cos values
    base = Path(file).name
    target_number = float(base[:3]) / 10
    target_sin = math.sin(target_number * math.pi * 2)
    target_cos = math.cos(target_number * math.pi * 2)

    # Save data
    f_data[i] = file
    x_data[i] = image.numpy()
    y_data[i] = [target_sin, target_cos]
    y_data_target[i] = target_number

    if i % 500 == 0:
        print(f"{i} files processed...")


print("Data count: ", len(y_data))
print(x_data.shape)
print(y_data.shape)


### Dataset Distribution

Uneven distribution of data can lead to poorer results.

In [None]:
_ = plot_dataset_distribution(y_data_target)


### Model Definition

**Important**
* Shape of the input layer: (32, 32, 3)
* Shape of the output layer: (2) - sin and cos

In [None]:
if (TFlite_Size == "s0"):
    model = model_ana_cont_s0(input_shape=(Input_Shape[0], Input_Shape[1], Input_Shape[2]), learning_rate=1e-3)
elif (TFlite_Size == "s1"):
    model = model_ana_cont_s1(input_shape=(Input_Shape[0], Input_Shape[1], Input_Shape[2]), learning_rate=1e-3)
elif (TFlite_Size == "s2"):
    model = model_ana_cont_s2(input_shape=(Input_Shape[0], Input_Shape[1], Input_Shape[2]), learning_rate=1e-3)
elif (TFlite_Size == "s3"):
    model = model_ana_cont_s3(input_shape=(Input_Shape[0], Input_Shape[1], Input_Shape[2]), learning_rate=1e-3)
else:
    raise ValueError(f"TFlite_Size: '{TFlite_Size}' is not supported.")


### Prepare Datasets / Data Augmentation

In [None]:
# Parameter
###############################
Validation_Percentage = 0.2 # (0.0 == 0%: Use all images for training, none for validation)
Batch_Size = 8
Shift_Range = 1
Brightness_Range = 0.2
Zoom_Range = 0.05
Channel_Shift_Range = 5
Shear_Range = 1
###############################


# Augmentation helper function
def preprocessing(x):
    x = random_white_balance(x)
    x = random_invert_image(x)
    x = np.clip(x, 0.0, 255.0)
    return x.astype(np.float32)


# Shuffle dataset
x_data, y_data, y_data_target, f_data = shuffle(x_data, y_data, y_data_target, f_data)

# Split data into training and validation dataset
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=Validation_Percentage)

# Prepare training dataset (with augmentation)
print("Training data")
datagen = ImageDataGenerator(
    width_shift_range=[-Shift_Range, Shift_Range], 
    height_shift_range=[-Shift_Range, Shift_Range],
    brightness_range=[1 - Brightness_Range, 1 + Brightness_Range],
    zoom_range=[1 - Zoom_Range, 1 + Zoom_Range],
    channel_shift_range=Channel_Shift_Range,
    shear_range=Shear_Range,
    preprocessing_function=preprocessing
)

train_iterator = datagen.flow(x_train, y_train, batch_size=Batch_Size)
plot_dataset_analog(train_iterator)

# Prepare validation dataset (without augmentation)
if (Validation_Percentage > 0):
    datagen_val = ImageDataGenerator() # No augmentation for validation
    validation_iterator = datagen_val.flow(x_test, y_test, batch_size=Batch_Size)
    print("  ")
    print("Validation data")
    plot_dataset_analog(validation_iterator, rows=3)


### Training
 
* Train the model using augmented training data and unaltered validation data
* Visualize training and validation performance over epochs (e.g., loss and accuracy curves)
* Avoid significant overfitting: The validation performance should not deviate heavily from training due to augmentation being applied only to the training data
* Best-performing model automatically selected, not necessarily the final one. The optimal model is often found within the last ~30-40 epochs

In [None]:
# Parameter
###############################
Training_Epochs = 600
###############################

# Learning Rate Scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.9, patience=5, min_lr=1e-5, verbose=0
)

# Early Stopping
early_stopping = EarlyStopping(
    monitor='val_loss', mode='min', patience=40, restore_best_weights=True, verbose=0
)

# Train model
if (Validation_Percentage > 0):
    history = model.fit(
        train_iterator, validation_data = validation_iterator, epochs = Training_Epochs, 
        callbacks=[lr_scheduler, early_stopping], verbose=0
    )
else:
    history = model.fit(
        train_iterator, epochs = Training_Epochs, 
        callbacks=[lr_scheduler, early_stopping], verbose=0
    )

# Plot model loss graph
_ = plot_loss(history, validation=Validation_Percentage > 0)


### Model verification

* The following code uses the trained model to check the deviation for each picture (train + validation).
* The accepted_deviation can be used to get the accuracy with allowed differences (for instance +/- 0.1)
* The first (max) 49 false predicted images will be shown
* A csv-file with all false predicted images will be created. It can be used for relabeling with this tool: https://github.com/haverland/collectmeteranalog

In [None]:
# Parameters
###############################
Accepted_Deviation = 0.1
###############################

# Predict and evaluate results
predicted_val, expected_val, deviation_val = predict_and_evaluate(model=model, x_data=x_data, y_data=y_data, y_data_target=y_data_target)

# Process false predictions
false_predictions_result = get_false_predictions(expected_val=expected_val, predicted_val=predicted_val, deviation_val=deviation_val, 
                               x_data=x_data, f_data=f_data, accepted_deviation=Accepted_Deviation)


In [None]:
# Plot false predicted divergation
print(f"Accuracy: {(1 - len(false_predictions_result['dev']) / len(y_data)) * 100.0:.2f} % (Images: {len(y_data)} | False Predicted: {len(false_predictions_result['dev'])}) | Accepted Deviation: {Accepted_Deviation}")

title = f"False Predicted Divergation  |  Images: {len(y_data)}\nAccuracy: {(1 - len(false_predictions_result['dev']) / len(y_data)) * 100.0:.2f} % (False Predicted: {len(false_predictions_result['dev'])}) With Accepted Deviation: {Accepted_Deviation}"
_ = plot_divergence(np.bincount(np.array(np.round((np.abs(false_predictions_result['dev']) * 10) % 51).astype(int)), minlength=51), title)


In [None]:
# Plot the dataset of false predictions (Use first 49 entries)
print("False Predictions (Sorted by highest deviation, max. 49 images)")
plot_dataset_analog_result(false_predictions_result["img"], false_predictions_result["lbl"], columns=7, rows=7, figsize=(18,18))


In [None]:
# Save false predicted image list to CSV
# The csv file can be further processed with the tool [collectmeteranalog](https://github.com/haverland/collectmeteranalog) to evaluate or adjust labels

csv_dir = f"{Output_Dir}/training_details/{TFlite_MainType}_{TFlite_Version}_{TFlite_Size}/"
Path(csv_dir).mkdir(parents=True, exist_ok=True)  # Create csv folder if it doesn't exist

false_predicted_df = pd.DataFrame(list(zip(false_predictions_result["file"], false_predictions_result["pred"], false_predictions_result["exp"], 
                                  false_predictions_result["dev"])), columns=["File", "Predicted", "Expected", "Deviation"])
false_predicted_df.to_csv(f"{csv_dir}/{TFlite_MainType}_{TFlite_Version}_{TFlite_Size}_false_predicted.csv", index=True, index_label="Index")


###############################################################################################################################################################################
# Save false predicted image list to CSV (--> LEGACY File Syntax)
# The csv file can be further processed with the tool [collectmeteranalog](https://github.com/haverland/collectmeteranalog) to evaluate or adjust labels

#false_predicted_df = pd.DataFrame(false_predicted_files)
#false_predicted_df.to_csv(f"{csv_dir}/{TFlite_MainType}_{TFlite_Version}_{TFlite_Size}_false_predicted.csv", index=True)


### Save the model

* Save the model to the file with the "tflite" file format
* quantize the model and store it as _q.tflite

In [None]:
FileName = f"{Output_Dir}/{TFlite_MainType}_{TFlite_Version}_{TFlite_Size}.tflite"

# TensorFlow Lite conversion
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the converted model
with open(FileName, "wb") as f:
    f.write(tflite_model)

print(f"Model saved successfully. File: {FileName}")
print(f"File size: {Path(FileName).stat().st_size} bytes")


In [None]:
FileName = f"{Output_Dir}/{TFlite_MainType}_{TFlite_Version}_{TFlite_Size}_q.tflite"

# Representative dataset function
def representative_dataset():
    for n in range(x_data.shape[0]):
        data = np.expand_dims(x_data[n], axis=0)
        yield [data.astype(np.float32)]


# TensorFlow Lite conversion with optimizations
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

converter._experimental_disable_per_channel_quantization_for_dense_layers = True

tflite_quant_model = converter.convert()

# Save the converted model to the specified file
with open(FileName, "wb") as f:
    f.write(tflite_quant_model)

print(f"Model saved successfully. File: {FileName}")
print(f"File size: {Path(FileName).stat().st_size} bytes")
