In [None]:
import tensorflow as tf
import numpy as np
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, GlobalAveragePooling2D
from keras.models import Model,Dropout
from keras import backend as K
from keras import regularizers
from keras.applications import EfficientNetB0
from kerastuner.tuners import RandomSearch
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback

In [None]:
#NUMERICAL VALUES(DIMENSIONS) WILL CHANGE
#Connection of Input is required, connect to database and do train test validation splits
#hp(hyperparameter) tuning is applied(hp.something(..)), but depending on performance, could do less or more parameters?
#Possibly computationally expensive, could cut a layer from autoencoder, and be used later for feature extraction(type1,2...)
#Decoder part of the autoencoder is commented out
#CNN model used: efficientnet, could use simpler model for less use of computation resources, need to try
#Use of transfer learning in CNN by unfreezing only the top 20 layers, could also go for the route of training the whole CNN with our data
#Note that datasets will also change in the end: 1)data from one instrument, 2)data from another instrument, 3)hybrid dataset of these two => hp tuning for each one? too expensive?
#No cross validation, probably inefficient
#Use of a fine tuned(hp tuning), stacked(2 layers), sparse(encoder_l1 for regularization which leads to sparsity) autoencoder
#Use of transformer would require design decisions regarding ML pipeline: Autoencoder,Transformer,CNN: Which one(s) to use and in which order? In any case, implementation of a transformer would be at later stages 
    #AE -> TF -> Classifier
    #AE -> TF -> CNN -> Classifier
    #AE -> CNN -> TF -> Classifier
    #AE -> CNN -> Classifier
#testing and analysis at the end
#for hp tuning technique: instead of random search using hp, bahesian hyperparameter search using wandb
#the maxpooling layer before "encoder" is reached is optional, try with and without: do hp tuning for both and compare their bests? NEED TO TRY.
#for the second conv2d, should we use l1 regularization for sparcity again? Would this lead to overfitting and a more complex model or would it make it better? NEED TO TRY. In any case, do not tune that one, just give a constant value 
#Couldn't figure out how to save the best model???????????????

In [None]:
#Access the data from database and preprocessing
#.
#.
#.
#.
#.
#.
#.
#.

In [None]:
#train, test, val split the data, maybe some preprocessing as well
#.
#.
#.
#.
#.
#.
#.
#.
#.
#.
#.
#.

In [None]:
#Fix the random generator seeds for better reproducibility
tf.random.set_seed(67)
np.random.seed(67)

In [None]:
#Authorize wandb
!wandb login

In [None]:
#Can change to "grid" or "random"
method = 'bayes'

In [None]:
#Main function to compile, build and train the model
def train(): #give the data we pull from database to train function as parameter


    #Default values for hyper-parameters
    configs = {
        'encoder_filters': 32,
        'encoder_kernel_size': 3,
        'encoder_l1': 0.00001,
        'units': 32,
        'dropout': 0.0,
        'weight_initialization': 'glorot_uniform',
        'batch_size': 32,
        'learning_rate': 0.00001,
        'method': method
    }
        
   
    #Initilize a new wandb run
    wandb.init(project='automated radio wave spectrogram classifier', config=configs)
        
    #IMPORTANT
    cnf = wandb.config

    #Class to build the Model
    class Model:
        def __init__(self, input_shape, num_classes):
            self.input_shape = input_shape
            self.num_classes = num_classes
        def build(self):

        

            input_img = Input(shape=self.input_shape)

            # AUTOENCODER: how stacked should it be/How many layers? 
            #1st layer
            x = Conv2D(
                filters=cnf.encoder_filters,
                kernel_size=cnf.encoder_kernel_size, #could be changed manually
                activation='relu',
                padding='same',
                activity_regularizer=regularizers.l1(cnf.encoder_l1),
                kernel_initializer=cnf.weight_initialization
            )(input_img)
            x = MaxPooling2D((2, 2), padding='same')(x) #max pooling layer

            #2nd layer
            x = Conv2D(cnf.encoder_filters, (3, 3), activation='relu', padding='same')(x)
            x = Conv2D(
                filters=cnf.encoder_filters,
                kernel_size=cnf.encoder_kernel_size,
                activation='relu',
                padding='same',
                #activity_regularizer=regularizers.l1(cnf.encoder_l1),
            )(x)
            
            x = MaxPooling2D((2, 2), padding='same')(x) #may not add, try both
            #max pooling layer to give us the result of the encoding process: latent space
            encoded = Conv2D(32, (3, 3), activation='relu', padding='same')(x)


            #start of decoding, remove comment for decoding
            #x = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
            #x = UpSampling2D((2, 2))(x)
            #x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
            #x = UpSampling2D((2, 2))(x)
            #decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)


            #PRETRAINED EFFICIENTNET, could possibly use smaller but less computationaly expensive models: try,
            # note that use of LSTM is computationally more expensive and is not sure to bring better accuracy but still worth trying 
            #also note the use of tranfer learning here, but could also go with the route of training the whole model: after HP tuning with validation data ofc
            efficientnet = EfficientNetB0(weights='imagenet', include_top=False, input_shape=self.input_shape)

            for layer in efficientnet.layers[:-20]:  #freezing everything except top 20 layers, again could also go for layer.trainable=true for all layers: do we have enough computational resources?
                layer.trainable = False

            x = efficientnet(encoded) #!!!!!!!!!!!!!!!!!!!!!!!change to encoded, change dimensions in other places accordingly
            x = GlobalAveragePooling2D()(x) #helps reduce overfitting by reducing the total number of parameters in the model
            x = Dense(cnf.units,kernel_initializer=cnf.weight_initialization)(x) #A fully connected dense layer, possibly for feature extracion, could think of adding more layers here for feature classifications in the future
            x = Dropout(cnf.dropout)(x)#for regularization, again use of tuning: maybe too much?
            output = Dense(self.num_classes, activation='softmax')(x)#fully connected dense layer with softmax activation for producing the output probabilities of classes
                #This works as the classifier in our ML pipeline thanks to softmax


            model = Model(inputs=input_img, outputs=output)
            

            model.compile(optimizer=cnf.optimizer, 
                        loss='categorical_crossentropy', 
                        metrics=['accuracy'],learning_rate=cnf.learning_rate)

            return model
        
    s_model = Model(input_shape=(128, 128, 1) ,num_classes=2) #this part can be replaced with correct input shape
    s_model = s_model.build()  

    #Train the model
    labels = ["Yes", "No"]
    s_model.fit(
        x_train, y_train, 
        validation_data=(x_val, y_val),
        epochs=cnf.epochs,
        batch_size=cnf.batch_size,
        callbacks=[WandbCallback(data_type="image", 
                validation_data=(X_test, y_test), labels = labels)] #?????????????? THE EXAMPLE CODE REQUIRES A LABELS VARIABLE HERE
    )
    
    return s_model #this return is not very useful, when sweep operation takes place the last try will be the one kept, but still

In [None]:
#A function to specify the tuning configuration, it would also return a sweep id (required for running the sweep)
def get_sweep_id(method)
    sweep_config = {
    "name": "sweep",
    "method": method,  
    "metric":{
        "name": "accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "encoder_filters": {
            "values": [32, 64, 96, 128]
        },
        "encoder_kernel_size": {
            "values": [3, 5]
        },
        "encoder_l1": {
            "min": 0.00001,
            "max": 0.01
        },
        "weight_initialization": {
            "values": ['glorot_uniform', 'he_normal']
        },
        "units": {
            "min": 32,
            "max": 512
        },
        "dropout": {
            "min": 0.0,
            "max": 0.5
        },
        "learning_rate": {
            "min": 0.00001,
            "max": 0.01
        },
        "batch_size": {
            "min": 32,
            "max": 512
        },
        "optimizer": {
            "values": ['adam', 'sgd', 'rmsprop']
        },
    }
    }

    sweep_id = wandb.sweep(sweep_config, project = "automated radio wave spectrogram classifier")
    return sweep_id

In [None]:
#Create a sweep for bayes search
sweep_id = get_sweep_id('bayes')

In [None]:
#Run the sweep
wandb.agent(sweep_id, function=train)

In [None]:
#Testing and Analysis: ROC curves, confusion matrices, accuracy scores... MAY NOT BE REQUIRED WITH WANDB
#.
#.
#.
#.
#.
#.
#.
#.
#.
#.

In [None]:
#A requirement when using notebook
run.finish()