# Prepare Environment (default operations)

In [None]:
import os, tensorflow as tf, json, matplotlib.pyplot as plt
from data_loader.hgf_export import HGFresource
from dotenv import load_dotenv

In [None]:
load_dotenv()
HGF_TOKEN = os.environ['HUGGINGFACE_TOKEN']
HGF_DATA_REPO = os.environ['HUGGINGFACE_DATASET_V2_REPO']
HGF_BASELINE_MODEL_REPO = os.environ['HUGGINGFACE_BASELINE_CNN_REPO']
HGF_BASELINE_MODEL_CONFIG = os.environ['HUGGINGFACE_BASELINE_CNN_CONFIG_FILE']
HGF_BASELINE_MODEL_WEIGHTS = os.environ['HUGGINGFACE_BASELINE_CNN_WEIGHTS_FILE']

In [None]:
hgf = HGFresource(token=HGF_TOKEN)

In [None]:
# load config of baseline model which we need to optimize
model_config_file = hgf.load_file(repo=HGF_BASELINE_MODEL_REPO, filename=HGF_BASELINE_MODEL_CONFIG)
with open(model_config_file, 'r') as f:
    model_config = f.read()
model_config = json.loads(model_config)
# extract number of classes from the last layer
N_CLASSES = model_config['layers'][-1]['config']['units']

# # alternatively, you can load the full baseline model with pre-defined weights
# # you may want to do this for the sake of performance comparison
# # but note that you will occupy additional RAM space (without a good reason probably)
baseline_model = hgf.load_model(
    repo=HGF_BASELINE_MODEL_REPO,
    filename={
        'model_weights': HGF_BASELINE_MODEL_WEIGHTS,
        'model_config': HGF_BASELINE_MODEL_CONFIG
    }
)
# # extract number of classes from the last layer
# N_CLASSES = baseline_model.layers[-1].get_config()['units']

In [None]:
train_data, test_data = hgf.load_data_tfds(repo=HGF_DATA_REPO, n_classes=N_CLASSES, batch_size=32)

Downloading readme:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/244M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/81.5M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/40989 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/13663 [00:00<?, ? examples/s]

Map:   0%|          | 0/40989 [00:00<?, ? examples/s]

Map:   0%|          | 0/13663 [00:00<?, ? examples/s]

In [None]:
# learning rate may require fine-tuning but use the optimizer specified below
# OPTIMIZER = tf.keras.optimizers.legacy.Adam(learning_rate=0.001) # use this if you have M1/M2 Mac, otherwise use the next line
# OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.001)
LOSS = 'categorical_crossentropy'
METRICS = [tf.keras.metrics.F1Score('weighted')]
EPOCHS = 20
INPUT_SHAPE = (256, 219, 3)

In [None]:
# initialize the baseline model architecture
model = tf.keras.Sequential.from_config(model_config)
# view the model architecture
model.summary()

In [None]:
def history_plot(history):
    train_f1_scores = history.history['f1_score']
    val_f1_scores = history.history['val_f1_score']

    epochs = range(1, len(train_f1_scores) + 1)
    plt.figure(figsize=(10, 6))
    plt.plot(epochs, train_f1_scores, 'bo-', label='Training F1 Score')
    plt.plot(epochs, val_f1_scores, 'ro-', label='Validation F1 Score')
    plt.title('Training and Validation F1 Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()
    plt.grid(True)
    plt.show()

# Your Code Starts Here

In [None]:
# some fancy stuff

In [None]:
history = model.fit(
    train_data,
    validation_data=test_data,
    epochs=EPOCHS
)

# Commit Your Results to Hugging Face Once Done

In [None]:
# NOTE: when naming your weights file
# indicate the type of model
# e.g.:
## 'tuned_optimizer_model_weights.h5'
## 'data_augmentation_model_weights.h5'
WEIGHTS_TO_COMMIT = '<your_name>_model_weights.h5'
# save model weights
model.save_weights(WEIGHTS_TO_COMMIT)

In [None]:
PERFORMANCE_TO_COMMIT = '<your_name_same_as_for_weights>_model_performance.json'
performance_dict = {
    'train': {
        'weighted_f1': history.history['f1_score'][-1]
    },
    'test': {
        'weighted_f1': history.history['val_f1_score'][-1]
    }
}
performance_dict = json.dumps(performance_dict)
with open(PERFORMANCE_TO_COMMIT, 'w') as f:
    f.write(performance_dict)

In [None]:
hgf.commit_to_hub(
    repo=HGF_BASELINE_MODEL_REPO,
    path_on_local=[WEIGHTS_TO_COMMIT, PERFORMANCE_TO_COMMIT],
    path_in_repo=[WEIGHTS_TO_COMMIT, PERFORMANCE_TO_COMMIT],
    # do not forget to change your commit message
    # to add even more clarity
    commit_message='your commit message'
)

In [None]:
# remove files from local = clean up
for path in [WEIGHTS_TO_COMMIT, PERFORMANCE_TO_COMMIT]:
    os.remove(path)