# About this notebook..
this is copy of [Sensory_GRU_Sweep_condition_labeled.ipynb](https://github.com/chhyyi/aiffelthon/blob/main/lms/Sensory_GRU_Sweep_condition_labeled.ipynb)

## Changelog:
- train-test split is omitted. instead, it loads train, test dataset respectively.

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import random
import os
import wandb
from wandb.keras import WandbCallback

In [9]:
def train():
    import pandas as pd
    import tensorflow as tf
    from tensorflow import keras
    import matplotlib.pyplot as plt
    import numpy as np
    import wandb
    from wandb.keras import WandbCallback

    def seq_acc(y_true, y_pred):
        y_bin=np.zeros_like(y_pred)
        for i, dd in enumerate(y_bin):
            for j in range(len(dd)):
                pred=y_pred[i][j]
                if pred>=0.5:
                    y_bin[i][j]=1
                else:
                    y_bin[i][j]=0

        predict_true = (y_true == y_bin)

        try:
            score = np.average(np.average(predict_true))
        except ValueError:
            score = mean_squared_error(y_true, y_bin)
        return score

    def my_seq_acc(y_true, y_pred):
        score = tf.py_function(func=seq_acc, inp=[y_true, y_pred], Tout=tf.float32,  name='custom_seq_acc') # tf 2.x
        return score


    class MySeqAccCallback(keras.callbacks.Callback):
        def on_epoch_end(self, epochs, logs=None):
            y_pred=self.model.predict(X_test)
            print('sequence accuracy is {}'.format(seq_acc(y_test, y_pred)))


    default_config={
        'target_type':'실측',
        'RNN_type':'GRU',
        'seq_field':72,
        'stride_inside_seq':9,
        'stride_between_seqs':2,
        'learning_rate':0.01,
        'epochs':12,
        'batch_size':64,
        'unit_RNN0':64}
    ######### Wandb.init() ##########
    wandb.init(config = default_config)
    
    def not_bin_in_occurence(x):
        if x==1 or x==0:
            return x
        else:
            #print('exceptional value(not 0 or 1) found. replaced by near one.')
            if x>=0.5:
                return 1
            else:
                return 0
            

    locations=['거문도', '울산', '거제도', '통영', '추자도']
    if wandb.config.target_type=='실측':
        df_merged_train=pd.read_csv("sample_data/class1_obs_train_refined.csv")
        df_merged_test=pd.read_csv("sample_data/class1_obs_test_refined.csv")
        df_merged_train['적조발생(실측)']=df_merged_train['적조발생(실측)'].apply(not_bin_in_occurence)
        df_merged_test['적조발생(실측)']=df_merged_test['적조발생(실측)'].apply(not_bin_in_occurence)
    elif wandb.config.target_type=='조건':
        df_merged_train=pd.read_csv("sample_data/class1_condition_train_refined.csv")
        df_merged_test=pd.read_csv("sample_data/class1_condition_test_refined.csv")
        df_merged_train['적조발생(조건)']=df_merged_train['적조발생(조건)'].apply(not_bin_in_occurence)
        df_merged_test['적조발생(조건)']=df_merged_test['적조발생(조건)'].apply(not_bin_in_occurence)


    if df_merged_train.columns[0]=='Unnamed: 0':
        df_merged_train = df_merged_train.iloc[:, 1:]
        print('drop "Unnamed: 0" column')

    if df_merged_test.columns[0]=='Unnamed: 0':
        df_merged_test = df_merged_test.iloc[:, 1:]
        print('drop "Unnamed: 0" column')
        
    print('dataset loaded. Generating sequences')
    for istest, df_merged in enumerate([df_merged_train, df_merged_test]):
        seq_length=wandb.config.seq_field//wandb.config.stride_inside_seq
        len_ds=len(df_merged)

        seqs_idx=[]

        start_idx=0
        while start_idx<=len_ds-wandb.config.seq_field:
            seqs_idx.append(list(range(start_idx, start_idx + wandb.config.seq_field, wandb.config.stride_inside_seq
        )))
            start_idx+=wandb.config.stride_between_seqs

        df_merged.reset_index(inplace=True, drop=True)
        print('Any missing values exist:', df_merged.isna().all().all())

        ds_train_cols=df_merged
        ds_train_cols.reset_index(inplace=True, drop=True)
        print('train dataset columns:',ds_train_cols.columns)

        seq_dataset=np.zeros([len(seqs_idx), len(seqs_idx[0]), len(ds_train_cols.columns)])

        for i, seq in enumerate(seqs_idx):
            for j, row_number in enumerate(seq):
                seq_dataset[i, j]=ds_train_cols.loc[row_number].to_numpy()
        
        if istest:
            test_xy=seq_dataset
        else:
            train_xy=seq_dataset
        
    X_train=train_xy[:,:,0:-1]
    y_train=train_xy[:,:,-1]
    X_test=test_xy[:,:,0:-1]
    y_test=test_xy[:,:,-1]

    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape,'\n\n')
    if wandb.config.RNN_type=='GRU':        
        model = keras.Sequential([
            keras.Input(shape=(seq_length, 25)),
            keras.layers.GRU(wandb.config.unit_RNN0),
            keras.layers.Dense(seq_length, activation="sigmoid"),
        ])
    elif wandb.config.RNN_type=='LSTM':
        model = keras.Sequential([
            keras.Input(shape=(seq_length, 25)),
            keras.layers.LSTM(wandb.config.unit_RNN0),
            keras.layers.Dense(seq_length, activation="sigmoid"),
        ])
    optimizer=keras.optimizers.Adam(learning_rate=wandb.config.learning_rate)

    model.compile(optimizer=optimizer, loss="binary_crossentropy")
    
    ######### WandbCallback ##########
    history=model.fit(X_train, y_train,
            batch_size=wandb.config.batch_size,
            epochs=wandb.config.epochs, 
            validation_data=(X_test, y_test),
            callbacks=[WandbCallback(training_data = (X_train, y_train),
                                     validation_data = (X_test, y_test)), MySeqAccCallback()])
    y_pred=model.predict(X_test)
    
    ######### Wandb.log() ##########
    wandb.log({"ValidationAcc":seq_acc(y_test, y_pred)})

In [10]:
wandb.login()

True

In [11]:
sweep_config = {
    "name": "sweep",
    "metric": {
        "goal": "minimize",
        "name": "best_val_loss"
    },
    "method": "random",
    "parameters": {
        "RNN_type":{
            "values":['GRU', 'LSTM']
        },
        "target_type":{
            "values":['실측', '조건']
        },
        "learning_rate":{"values":[0.0002, 0.001, 0.0032, 0.01]},
        "unit_RNN0":{"values":[32,64,128]},
        "stride_inside_seq":{"values":[3,9]}
    }
    }

In [12]:
sweep_id = wandb.sweep(sweep_config)

# run the sweep
wandb.agent(sweep_id,
            function=train,
            entity = 'chhyyi',
            project = 'redzone_RNN_class1_sweep')

Create sweep with ID: ltknb2cu
Sweep URL: https://wandb.ai/chhyyi/redzone_RNN_class1_sweep/sweeps/ltknb2cu


[34m[1mwandb[0m: Agent Starting Run: x1l2sufi with config:
[34m[1mwandb[0m: 	RNN_type: GRU
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	stride_inside_seq: 9
[34m[1mwandb[0m: 	target_type: 조건
[34m[1mwandb[0m: 	unit_RNN0: 128


drop "Unnamed: 0" column
drop "Unnamed: 0" column
dataset loaded. Generating sequences
Any missing values exist: False
train dataset columns: Index(['풍속(m/s)', '풍향(deg)', '기온(°C)', '수온(°C)', '강수량(mm)', '풍속(m/s).1',
       '풍향(deg).1', '기온(°C).1', '수온(°C).1', '강수량(mm).1', '풍속(m/s).2',
       '풍향(deg).2', '기온(°C).2', '수온(°C).2', '강수량(mm).2', '풍속(m/s).3',
       '풍향(deg).3', '기온(°C).3', '수온(°C).3', '강수량(mm).3', '풍속(m/s).4',
       '풍향(deg).4', '기온(°C).4', '수온(°C).4', '강수량(mm).4', '적조발생(조건)'],
      dtype='object')
Any missing values exist: False
train dataset columns: Index(['풍속(m/s)', '풍향(deg)', '기온(°C)', '수온(°C)', '강수량(mm)', '풍속(m/s).1',
       '풍향(deg).1', '기온(°C).1', '수온(°C).1', '강수량(mm).1', '풍속(m/s).2',
       '풍향(deg).2', '기온(°C).2', '수온(°C).2', '강수량(mm).2', '풍속(m/s).3',
       '풍향(deg).3', '기온(°C).3', '수온(°C).3', '강수량(mm).3', '풍속(m/s).4',
       '풍향(deg).4', '기온(°C).4', '수온(°C).4', '강수량(mm).4', '적조발생(조건)'],
      dtype='object')
(25131, 8, 25) (27940, 8, 25) (25131, 8) (27940, 8) 




Epoch 1/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best)... Done. 0.0s


sequence accuracy is 0.9988144237652111
Epoch 2/20
sequence accuracy is 0.9989620615604867
Epoch 3/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best)... Done. 0.0s


sequence accuracy is 0.9988681102362205
Epoch 4/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989128489620616
Epoch 5/20
sequence accuracy is 0.9986488904795991
Epoch 6/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185359-x1l2sufi/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989620615604867
Epoch 7/20
sequence accuracy is 0.9989620615604867
Epoch 8/20
sequence accuracy is 0.9989352183249821
Epoch 9/20
sequence accuracy is 0.9988994273443093
Epoch 10/20
sequence accuracy is 0.9989173228346456
Epoch 11/20
sequence accuracy is 0.9987473156764496
Epoch 12/20
sequence accuracy is 0.9989441660701504
Epoch 13/20
sequence accuracy is 0.9987428418038654
Epoch 14/20
sequence accuracy is 0.9985370436649964
Epoch 15/20
sequence accuracy is 0.9987338940586972
Epoch 16/20
sequence accuracy is 0.9986802075876879
Epoch 17/20
sequence accuracy is 0.9985728346456693
Epoch 18/20
sequence accuracy is 0.9985370436649964
Epoch 19/20
sequence accuracy is 0.9987786327845383
Epoch 20/20
sequence accuracy is 0.9985817823908375


0,1
ValidationAcc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▅▆▅▂▄▁▁▃▄▃▄▃▆▆▆▆█▆▅▇

0,1
ValidationAcc,0.99858
best_epoch,5.0
best_val_loss,0.00715
epoch,19.0
loss,0.00413
val_loss,0.00916


[34m[1mwandb[0m: Agent Starting Run: yqsr34ob with config:
[34m[1mwandb[0m: 	RNN_type: GRU
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	stride_inside_seq: 9
[34m[1mwandb[0m: 	target_type: 조건
[34m[1mwandb[0m: 	unit_RNN0: 128


drop "Unnamed: 0" column
drop "Unnamed: 0" column
dataset loaded. Generating sequences
Any missing values exist: False
train dataset columns: Index(['풍속(m/s)', '풍향(deg)', '기온(°C)', '수온(°C)', '강수량(mm)', '풍속(m/s).1',
       '풍향(deg).1', '기온(°C).1', '수온(°C).1', '강수량(mm).1', '풍속(m/s).2',
       '풍향(deg).2', '기온(°C).2', '수온(°C).2', '강수량(mm).2', '풍속(m/s).3',
       '풍향(deg).3', '기온(°C).3', '수온(°C).3', '강수량(mm).3', '풍속(m/s).4',
       '풍향(deg).4', '기온(°C).4', '수온(°C).4', '강수량(mm).4', '적조발생(조건)'],
      dtype='object')
Any missing values exist: False
train dataset columns: Index(['풍속(m/s)', '풍향(deg)', '기온(°C)', '수온(°C)', '강수량(mm)', '풍속(m/s).1',
       '풍향(deg).1', '기온(°C).1', '수온(°C).1', '강수량(mm).1', '풍속(m/s).2',
       '풍향(deg).2', '기온(°C).2', '수온(°C).2', '강수량(mm).2', '풍속(m/s).3',
       '풍향(deg).3', '기온(°C).3', '수온(°C).3', '강수량(mm).3', '풍속(m/s).4',
       '풍향(deg).4', '기온(°C).4', '수온(°C).4', '강수량(mm).4', '적조발생(조건)'],
      dtype='object')
(25131, 8, 25) (27940, 8, 25) (25131, 8) (27940, 8) 




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best)... Done. 0.0s


sequence accuracy is 0.998313350035791
Epoch 2/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989620615604867
Epoch 3/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989620615604867
Epoch 4/20
sequence accuracy is 0.9985952040085898
Epoch 5/20
sequence accuracy is 0.9987428418038654
Epoch 6/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185557-yqsr34ob/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989575876879027
Epoch 7/20
sequence accuracy is 0.9988636363636364
Epoch 8/20
sequence accuracy is 0.9988770579813887
Epoch 9/20
sequence accuracy is 0.998563886900501
Epoch 10/20
sequence accuracy is 0.9986623120973515
Epoch 11/20
sequence accuracy is 0.996832498210451
Epoch 12/20
sequence accuracy is 0.9987831066571224
Epoch 13/20
sequence accuracy is 0.9987562634216177
Epoch 14/20
sequence accuracy is 0.998604151753758
Epoch 15/20
sequence accuracy is 0.9985236220472441
Epoch 16/20
sequence accuracy is 0.998188081603436
Epoch 17/20
sequence accuracy is 0.9982417680744452
Epoch 18/20
sequence accuracy is 0.9986354688618468
Epoch 19/20
sequence accuracy is 0.998398353614889
Epoch 20/20
sequence accuracy is 0.9982641374373658


0,1
ValidationAcc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
val_loss,▄▂▁▅▃▁▁▂▂▂█▂▃▄▃▄▄▃▄▄

0,1
ValidationAcc,0.99826
best_epoch,5.0
best_val_loss,0.00712
epoch,19.0
loss,0.00526
val_loss,0.00942


[34m[1mwandb[0m: Agent Starting Run: ed48mwam with config:
[34m[1mwandb[0m: 	RNN_type: LSTM
[34m[1mwandb[0m: 	learning_rate: 0.0032
[34m[1mwandb[0m: 	stride_inside_seq: 3
[34m[1mwandb[0m: 	target_type: 조건
[34m[1mwandb[0m: 	unit_RNN0: 128


drop "Unnamed: 0" column
drop "Unnamed: 0" column
dataset loaded. Generating sequences
Any missing values exist: False
train dataset columns: Index(['풍속(m/s)', '풍향(deg)', '기온(°C)', '수온(°C)', '강수량(mm)', '풍속(m/s).1',
       '풍향(deg).1', '기온(°C).1', '수온(°C).1', '강수량(mm).1', '풍속(m/s).2',
       '풍향(deg).2', '기온(°C).2', '수온(°C).2', '강수량(mm).2', '풍속(m/s).3',
       '풍향(deg).3', '기온(°C).3', '수온(°C).3', '강수량(mm).3', '풍속(m/s).4',
       '풍향(deg).4', '기온(°C).4', '수온(°C).4', '강수량(mm).4', '적조발생(조건)'],
      dtype='object')
Any missing values exist: False
train dataset columns: Index(['풍속(m/s)', '풍향(deg)', '기온(°C)', '수온(°C)', '강수량(mm)', '풍속(m/s).1',
       '풍향(deg).1', '기온(°C).1', '수온(°C).1', '강수량(mm).1', '풍속(m/s).2',
       '풍향(deg).2', '기온(°C).2', '수온(°C).2', '강수량(mm).2', '풍속(m/s).3',
       '풍향(deg).3', '기온(°C).3', '수온(°C).3', '강수량(mm).3', '풍속(m/s).4',
       '풍향(deg).4', '기온(°C).4', '수온(°C).4', '강수량(mm).4', '적조발생(조건)'],
      dtype='object')
(25131, 24, 25) (27940, 24, 25) (25131, 24) (27940, 2



INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185752-ed48mwam/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185752-ed48mwam/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185752-ed48mwam/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989560963970413
Epoch 2/20
sequence accuracy is 0.9989620615604867
Epoch 3/20
sequence accuracy is 0.9989620615604867
Epoch 4/20
sequence accuracy is 0.9989620615604867
Epoch 5/20
sequence accuracy is 0.9989620615604867
Epoch 6/20
sequence accuracy is 0.9989620615604867
Epoch 7/20
sequence accuracy is 0.9989620615604867
Epoch 8/20
sequence accuracy is 0.998959078978764
Epoch 9/20




INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185752-ed48mwam/files/model-best/assets


INFO:tensorflow:Assets written to: /aiffel/aiffel/aiffelthon/wandb/run-20221120_185752-ed48mwam/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/aiffelthon/wandb/run-20221120_185752-ed48mwam/files/model-best)... Done. 0.0s


sequence accuracy is 0.9989620615604867
Epoch 10/20
sequence accuracy is 0.9989620615604867
Epoch 11/20
sequence accuracy is 0.9989322357432594
Epoch 12/20


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
