Isabelle Niu Solar Flare EfficientNet Model TPU Training

NOTES:

efficientnetv2b3:
- better than v2b2

dropout layer:
- 0.5 

even label dataset:
- better for accuracy

image size:
- changed to 300 x 300

batch size:
- changed from 32 to 16 

image processing:
- using random rotation with fill_mode = nearest

trainable layers:
- training all three ending layers

class weights:
- 1 vs 3


training from epoch 4

In [None]:
!pip install astropy
!pip install --upgrade pandas


In [None]:
import tensorflow.keras as keras
import pandas as pd
import numpy as np
from astropy.io import fits
import skimage
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.saving import register_keras_serializable

In [None]:
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

# instantiate a distribution strategy
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [None]:
#ARs: 2284-2731
#loading in the training data from the file
magnetogramTrainDf = pd.read_csv('/kaggle/input/datasetsmagnetogram/trainmagnetograms_even',dtype=str)
magnetogramTestDf = pd.read_csv('/kaggle/input/datasetsmagnetogram/testmagnetograms_even',dtype=str)
magnetogramValDf = pd.read_csv('/kaggle/input/datasetsmagnetogram/valmagnetograms_even',dtype=str)



In [None]:
imagesize = 300;
batchsize = 16;


In [None]:
#Boucheron 2023
class FitsDataGen(keras.utils.Sequence):
    # The input to the data generator will be the dataframe and which columns to use
    def __init__(self, df, X_col, y_col,
                 directory,
                 batch_size,
                 input_size=(imagesize, imagesize, 3),
                 shuffle=True):
        
        self.df = df.copy() # dataframe
        self.X_col = X_col # column for X data (filename)
        self.y_col = y_col # column for y data (class label)
        self.directory = directory # base directory for data
        self.batch_size = batch_size # batch size
        self.input_size = input_size # size expected by network (224,224,3) for VGG
        self.shuffle = shuffle # whether to shuffle batches
        
        self.n = len(self.df) # number of data points
        self.nclasses = df[y_col].nunique() # number of classes
            
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def __get_input(self, path, directory, input_size):
    
        with fits.open(directory+path) as img: # read in fits image
            img.verify('silentfix')
            img = img[1].data
            
        img = np.expand_dims(img,axis=2) # copy single channel to three to create rgb dimensioned image
        img = np.tile(img,(1,1,3))
        
        # scale to input_size (expected dimensions for input to network)
        img = skimage.transform.resize(img, (input_size[0],input_size[1]), order=1, mode='reflect',\
                                       clip=True, preserve_range=True, anti_aliasing=True)
        
        # scale intensities to range [0,255] as expected by VGG preprocessing function
        # can cheat a bit here and treat each channel the same since these are grayscale images
        img = img + 5978.7 # -5978.7 is minimum of entire magnetogram dataset
        img = img/(2*5978.7)*255 # +5978.7 is maximum of entire magnetogram dataset        
        
        img = keras.applications.efficientnet.preprocess_input(img) # preprocess according to VGG expectations
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.keras.preprocessing.image.random_rotation(img, 10, fill_mode = "nearest")
        return img
    
    def __get_output(self, label, num_classes):
        return keras.utils.to_categorical(label, num_classes=num_classes)
    
    def __get_data(self, batches):
        # Generates data containing batch_size samples

        path_batch = batches[self.X_col]
        
        label_batch = batches[self.y_col]

        X_batch = np.asarray([self.__get_input(x, self.directory, self.input_size) for x in path_batch])

        y_batch = np.asarray([self.__get_output(y, self.nclasses) for y in label_batch])
        
        return X_batch, y_batch
    
    def __getitem__(self, index):
        
        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size

In [None]:
# custom metrics since these will not average batch to batch
class TNR(keras.metrics.Metric):
    def __init__(self, name='TNR', **kwargs):
        super(TNR, self).__init__(name=name, **kwargs)
        self.TN = self.add_weight(name='TN', initializer='zeros')
        self.FP = self.add_weight(name='FP', initializer='zeros')
        self.TNR = self.add_weight(name='TNR', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_true = 1 - y_true
        neg_y_pred = 1 - y_pred
        fp = keras.backend.cast(keras.backend.sum(neg_y_true * y_pred),'float32')
        tn = keras.backend.cast(keras.backend.sum(neg_y_true * neg_y_pred),'float32')
        
        self.TN.assign_add(tn)
        self.FP.assign_add(fp)
        
        tnr = self.TN / (self.TN + self.FP + keras.backend.epsilon())
        
        self.TNR.assign(tnr)

    def result(self):
        return self.TNR

    def reset_states(self):
        self.TN.assign(0)
        self.FP.assign(0)
        self.TNR.assign(0)
        
class TPR(keras.metrics.Metric):
    def __init__(self, name='TPR', **kwargs):
        super(TPR, self).__init__(name=name, **kwargs)
        self.TP = self.add_weight(name='TP', initializer='zeros')
        self.FN = self.add_weight(name='FN', initializer='zeros')
        self.TPR = self.add_weight(name='TPR', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_pred = 1 - y_pred
        fn = keras.backend.cast(keras.backend.sum(y_true * neg_y_pred),'float32')
        tp = keras.backend.cast(keras.backend.sum(y_true * y_pred),'float32')
        
        self.TP.assign_add(tp)
        self.FN.assign_add(fn)
        
        tpr = self.TP / (self.TP + self.FN + keras.backend.epsilon())
        
        self.TPR.assign(tpr)

    def result(self):
        return self.TPR

    def reset_states(self):
        self.TP.assign(0)
        self.FN.assign(0)
        self.TPR.assign(0)
        
class TSS(keras.metrics.Metric):
    def __init__(self, name='TSS', **kwargs):
        super(TSS, self).__init__(name=name, **kwargs)
        self.TP = self.add_weight(name='TP', initializer='zeros')
        self.TN = self.add_weight(name='TN', initializer='zeros')
        self.FP = self.add_weight(name='FP', initializer='zeros')
        self.FN = self.add_weight(name='FN', initializer='zeros')
        self.TSS = self.add_weight(name='TSS', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_true = 1 - y_true
        neg_y_pred = 1 - y_pred
        fp = keras.backend.cast(keras.backend.sum(neg_y_true * y_pred),'float32')
        tn = keras.backend.cast(keras.backend.sum(neg_y_true * neg_y_pred),'float32')
        fn = keras.backend.cast(keras.backend.sum(y_true * neg_y_pred),'float32')
        tp = keras.backend.cast(keras.backend.sum(y_true * y_pred),'float32')
        
        self.TP.assign_add(tp)
        self.TN.assign_add(tn)
        self.FP.assign_add(fp)
        self.FN.assign_add(fn)
        
        tnr = self.TN / (self.TN + self.FP + keras.backend.epsilon())
        tpr = self.TP / (self.TP + self.FN + keras.backend.epsilon())
        tss = tpr + tnr - 1
       
        self.TSS.assign(tss)

    def result(self):
        return keras.backend.cast(self.TSS, 'float32')

    def reset_states(self):
        self.TP.assign(0)
        self.TN.assign(0)
        self.FP.assign(0)
        self.FN.assign(0)
        self.TSS.assign(0)
        
class HSS(keras.metrics.Metric):
    def __init__(self, name='HSS', **kwargs):
        super(HSS, self).__init__(name=name, **kwargs)
        self.TP = self.add_weight(name='TP', initializer='zeros')
        self.TN = self.add_weight(name='TN', initializer='zeros')
        self.FP = self.add_weight(name='FP', initializer='zeros')
        self.FN = self.add_weight(name='FN', initializer='zeros')
        self.HSS = self.add_weight(name='HSS', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_true = 1 - y_true
        neg_y_pred = 1 - y_pred
        fp = keras.backend.cast(keras.backend.sum(neg_y_true * y_pred),'float32')
        tn = keras.backend.cast(keras.backend.sum(neg_y_true * neg_y_pred),'float32')
        fn = keras.backend.cast(keras.backend.sum(y_true * neg_y_pred),'float32')
        tp = keras.backend.cast(keras.backend.sum(y_true * y_pred),'float32')
        
        self.TP.assign_add(tp)
        self.TN.assign_add(tn)
        self.FP.assign_add(fp)
        self.FN.assign_add(fn)
        
        hss = 2*(self.TP*self.TN-self.FN*self.FP)/((self.TP+self.FN)*(self.FN+self.TN)+(self.TP+self.FP)*(self.FP+self.TN))
       
        self.HSS.assign(hss)

    def result(self):
        return self.HSS

    def reset_states(self):
        self.TP.assign(0)
        self.TN.assign(0)
        self.FP.assign(0)
        self.FN.assign(0)
        self.HSS.assign(0)
        
class TN(keras.metrics.Metric):
    def __init__(self, name='TN', **kwargs):
        super(TN, self).__init__(name=name, **kwargs)
        self.TN = self.add_weight(name='TN', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_true = 1 - y_true
        neg_y_pred = 1 - y_pred
        tn = keras.backend.cast(keras.backend.sum(neg_y_true * neg_y_pred),'float32')

        self.TN.assign_add(tn)

    def result(self):
        return self.TN

    def reset_states(self):
        self.TN.assign(0)
        
class TP(keras.metrics.Metric):
    def __init__(self, name='TP', **kwargs):
        super(TP, self).__init__(name=name, **kwargs)
        self.TP = self.add_weight(name='FP', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        tp = keras.backend.cast(keras.backend.sum(y_true * y_pred),'float32')

        self.TP.assign_add(tp)

    def result(self):
        return self.TP

    def reset_states(self):
        self.TP.assign(0)
        
class FN(keras.metrics.Metric):
    def __init__(self, name='FN', **kwargs):
        super(FN, self).__init__(name=name, **kwargs)
        self.FN = self.add_weight(name='FN', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_pred = 1 - y_pred
        fn = keras.backend.cast(keras.backend.sum(y_true * neg_y_pred),'float32')

        self.FN.assign_add(fn)

    def result(self):
        return self.FN

    def reset_states(self):
        self.FN.assign(0)
        
class FP(keras.metrics.Metric):
    def __init__(self, name='FP', **kwargs):
        super(FP, self).__init__(name=name, **kwargs)
        self.FP = self.add_weight(name='FP', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = keras.backend.argmax(y_true)
        y_pred = keras.backend.argmax(y_pred)
        neg_y_true = 1 - y_true
        fp = keras.backend.cast(keras.backend.sum(neg_y_true * y_pred),'float32')

        self.FP.assign_add(fp)

    def result(self):
        return self.FP

    def reset_states(self):
        self.FP.assign(0)

@register_keras_serializable()
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)
    
    def result(self):
        precision = self.precision.result()
        recall = self.recall.result()
        return 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())
    
    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

In [None]:
train_generator = FitsDataGen(magnetogramTrainDf, X_col='filename', y_col='class',\
                                  directory='/kaggle/input/solar-flare-medium-dataset/Lat60_Lon60_Nans0/',\
                                  batch_size=batchsize, input_size=(imagesize, imagesize, 3), shuffle=True)
test_generator = FitsDataGen(magnetogramTestDf, X_col='filename', y_col='class',\
                                  directory='/kaggle/input/solar-flare-medium-dataset/Lat60_Lon60_Nans0/',\
                                  batch_size=batchsize, input_size=(imagesize, imagesize, 3), shuffle=True)
val_generator = FitsDataGen(magnetogramValDf, X_col='filename', y_col='class',\
                                  directory='/kaggle/input/solar-flare-medium-dataset/Lat60_Lon60_Nans0/',\
                                  batch_size=batchsize, input_size=(imagesize, imagesize, 3), shuffle=True)


In [None]:
#calculating the weights for each class in training set
class_weights = {0:1., 1: 1.5}

In [None]:
with tpu_strategy.scope():
    results_dir = "png"
    filepath = 'models/'+results_dir+'/model.{epoch:02d}_{val_TSS:.2f}.hdf5.keras'
    checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_TSS', verbose=1, save_best_only=False, mode='max')
    callbacks_list = [checkpoint]

    step_size_train = int(np.ceil(train_generator.n/train_generator.batch_size))
    step_size_val = int(np.ceil(val_generator.n/val_generator.batch_size))



In [None]:
#creating a new model
with tpu_strategy.scope():
    model = keras.applications.EfficientNetV2B3(
    include_top=False, #automatically removes the last layers (pool + dropout + dense)
    weights="imagenet",
    input_shape=(imagesize, imagesize, 3)
    )

    new_output = model.output # take the output as currently defined
    new_output = keras.layers.GlobalAveragePooling2D()(new_output)
    new_output = keras.layers.Dropout(0.5)(new_output)  
    
    new_output = keras.layers.Dense(2,activation='softmax')(new_output)
    model = keras.models.Model(inputs=model.input,outputs=new_output) 
    adam_opt = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=adam_opt,\
               metrics=[TNR(), TPR(), TSS(), "accuracy"])

In [None]:
#running an existing model
with tpu_strategy.scope():
    model = tf.keras.models.load_model(
        "/kaggle/input/efficientnetv2b3_epochs4-6_11.5_300x300/keras/default/1/models/png/model.01_0.47.hdf5.keras",
        custom_objects={'TNR': TNR(), 'TPR': TPR(), 'TSS': TSS(), 'F1Score' : F1Score()})
    adam_opt = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=adam_opt,\
               metrics=[TNR(), TPR(), TSS(), "accuracy", F1Score()])

In [None]:
with tpu_strategy.scope(): 
    history = model.fit(train_generator, steps_per_epoch=step_size_train, epochs=3, verbose=1,\
                     callbacks=callbacks_list, validation_data=val_generator, validation_steps=step_size_val,\
                     validation_freq=1, class_weight=class_weights)