In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from signlens.preprocessing.data import *
from signlens.preprocessing.preprocess import *
from sklearn.model_selection import train_test_split
from signlens.model.model import *
from tensorflow.keras.optimizers import Adam
from signlens.params import *
from utils.model_utils import *

2024-03-26 11:21:47.485430: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-26 11:21:47.525406: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
def preprocess(random_state=None):
    print(Fore.MAGENTA + Style.BRIGHT + "\n⭐️ Use case: preprocess" + Style.RESET_ALL)

    # Data loading
    train = load_data_subset_csv(balanced=True, random_state=random_state)

    # Train test split
    X_files = train.file_path
    y = encode_labels(train.sign)
    X_train_files, X_val_files, y_train, y_val = train_test_split(X_files, y, test_size=0.2, stratify=y, random_state=random_state)

    # Preprocessing
    print(Fore.BLUE + f"\nPreprocessing {len(X_train_files)} training files..." + Style.RESET_ALL)
    X_train = preprocess_and_pad_sequences_from_pq_list(X_train_files)
    print(Fore.BLUE + f"\nPreprocessing {len(X_val_files)} validation files..." + Style.RESET_ALL)
    X_val = preprocess_and_pad_sequences_from_pq_list(X_val_files)

    return X_train, X_val, y_train, y_val


In [4]:
def train(X_train, y_train,epochs=EPOCHS, patience=20, verbose=1, batch_size=32, validation_data=None, shuffle=True):

    print(Fore.MAGENTA + Style.BRIGHT + "\n⭐️ Use case: train" + Style.RESET_ALL)

    new_model_required = ''
    while new_model_required.lower() not in ['y', 'n']:
        new_model_required = input("Do you want to train a new model from scratch? (y/n): ")

    if new_model_required.strip().lower() == 'y':
        paths = create_model_folder()
        model = initialize_model(num_classes=NUM_CLASSES)
    else:
        model_base_dir_pattern = input("Enter the name (or a part of the name) of the model you want to load: ").strip()
        model, paths = load_model(mode='most_recent', model_base_dir_pattern=model_base_dir_pattern, return_paths=True)

    model = compile_model(model)
    model, history = train_model(model, X_train, y_train,
                                 patience=patience,
                                 epochs=epochs,
                                 verbose=verbose,
                                 batch_size=batch_size,
                                 validation_data=validation_data,
                                 model_save_epoch_path=paths['iter'],
                                 shuffle=shuffle
                                 )

    val_accuracy = np.max(history.history['val_accuracy'])

    params = dict(
        context="train",
        training_frac=DATA_FRAC,
        row_count=len(X_train),
        num_classes=NUM_CLASSES,
    )

    save_results(params=params,
                 metrics=dict(val_accuracy=val_accuracy),
                 params_path=paths['params'],
                 metrics_path=paths['metrics'],
                 mode='train'
                 )

    save_model(model=model, model_path=paths['model'])

    return model, paths


In [None]:
random_state=42
unique_train_test_split()
X_train, X_val, y_train, y_val = preprocess(random_state=random_state)
shuffle = (random_state is None) # shuffle in fit if random_state is None


In [None]:
model, paths = train(X_train, y_train, validation_data=(X_val, y_val), shuffle=shuffle)