<a href="https://colab.research.google.com/github/fabriziobasso/Colab_backup/blob/main/brist1d_ricopue_conv2d_gru.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import GroupKFold
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error


import tensorflow as tf
from keras.utils import plot_model
import tensorflow.keras.backend as K

import gc,os,random
import warnings
warnings.filterwarnings("ignore")

In [None]:
def set_seeds(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)

set_seeds(seed=2024)

In [None]:
train=pd.read_csv('/kaggle/input/brist1d/train.csv')
test=pd.read_csv('/kaggle/input/brist1d/test.csv')

train.columns = train.columns.str.replace(':', '_')
train.columns = train.columns.str.replace('-', '_')
test.columns = test.columns.str.replace(':', '_')
test.columns = test.columns.str.replace('-', '_')

In [None]:
train_cols=train.columns.tolist()
bg_cols   = [col for col in train_cols if "bg_" in col ][-24:]
insu_cols = [col for col in train_cols if "insulin_" in col ][-24:]
carb_cols = [col for col in train_cols if "carbs_" in col ][-24:]
hr_cols   = [col for col in train_cols if "hr_" in col ][-24:]
step_cols = [col for col in train_cols if "steps_" in col ][-24:]
cals_cols = [col for col in train_cols if "cals_" in col ][-24:]

actv_cols = [col for col in train_cols if "activity_" in col ] # --> 98% of nan values
train.drop(columns=actv_cols,inplace=True)
test.drop(columns=actv_cols,inplace=True)

print(len(bg_cols),len(insu_cols),len(carb_cols),len(hr_cols),len(step_cols),len(cals_cols), len(actv_cols))
feature_cols =  carb_cols + hr_cols + bg_cols + insu_cols + step_cols + cals_cols

#target
train.columns = train.columns.str.replace('bg+1_00', 'target')
test['target']=0

#feature to create validation groups
print(train.p_num.unique())
print(test.p_num.unique())

#Time feature no used
train.drop(columns=['id','time',],inplace=True)
test.drop(columns=['id','time'],inplace=True)

#feature_cols=[col for col in feature_cols if '_0_' in col]

In [None]:
#impute missing --> from https://www.kaggle.com/code/greysky/brist1d-blood-glucose-prediction-tabnet
for colset in [bg_cols, insu_cols, carb_cols, hr_cols, step_cols, cals_cols]:
    train[colset] = train[colset].interpolate(axis=1)
    test[colset] = test[colset].interpolate(axis=1)

imputer = SimpleImputer()
train[feature_cols] = imputer.fit_transform(train[feature_cols])
test[feature_cols] = imputer.transform(test[feature_cols])

In [None]:
#Scale features
df_tmp=pd.concat([train,test])
scaler = MinMaxScaler(feature_range=(0,1))
df_tmp[feature_cols]=scaler.fit_transform(df_tmp[feature_cols])

train=df_tmp[:len(train)]
test=df_tmp[len(train):]
del df_tmp
gc.collect()


In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self,data,target,data_index,train_features,batch_size=1024*10,shuffle=False,mode='train',*args, **kwargs):

        super().__init__(*args, **kwargs)
        self.data=data
        self.target=target
        self.features=train_features
        self.data_index = data_index
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.mode = mode
        self.n = 0
        self.max = self.__len__()
        self.on_epoch_end()


    def __len__(self):
        'Denotes the number of batches per epoch'
        ct = int( np.ceil( len(self.data_index) / self.batch_size ) )
        return ct

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X,y = self.__data_generation(indexes)
        return X,y

    def __next__(self):
        if self.n >= self.max:
            self.n = 0
        result = self.__getitem__(self.n)
        self.n += 1
        return result

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = self.data_index
        if self.shuffle: np.random.shuffle(self.indexes)


    def __data_generation(self, indexes):
        'Generates data containing batch_size samples'

        X = self.data.iloc[indexes][self.features]
        X = X.values.reshape(len(indexes),6,int(len(feature_cols)/6))
        y = self.data.iloc[indexes][self.target].values

        return X,y

In [None]:
from matplotlib import pyplot as plt
gen = DataGenerator(train,'target',train.head(1024).index,feature_cols,shuffle=False, batch_size=1024,mode='train')
X,y=next(gen)

for i in range(0,1024,300):
    print(X[i].shape,y[i])
    print(X.shape)
    fig = plt.figure()
    plt.imshow(X[i])
    plt.show()

In [None]:
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true)))

def make_model(img_rows,img_cols,channnels):

    input_model = tf.keras.layers.Input(shape=(img_rows,img_cols,channnels))

    ########################################################

    conv1 = tf.keras.layers.Conv2D(32, kernel_size=(6,3), activation='relu')(input_model)
    conv1 = tf.keras.layers.Conv2D(32, kernel_size=(1,3),padding='same', activation='relu')(conv1)
    conv1 = tf.keras.layers.Reshape((22,32))(conv1)
    GRU1  = tf.keras.layers.GRU(32, return_sequences=True)(conv1)
    GRU1 = tf.keras.layers.Flatten()(GRU1)


    x = tf.keras.layers.Dense(128, activation='relu')(GRU1)
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(16, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    output = tf.keras.layers.Dense(1)(x)

    model = tf.keras.Model(inputs=input_model, outputs=output)

    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss = 'mse')

    return model


img_rows=6
img_cols=int(len(feature_cols)/6)
channnels=1
model=make_model(img_rows,img_cols,channnels)
model.summary()
#plot_model(model, show_shapes=True, show_layer_names=True, to_file='model.png')
#from IPython.display import Image
#Image(retina=True, filename='model.png')

In [None]:
cv = GroupKFold(5)
K.clear_session()
test_gen=DataGenerator(test,'target',test.index,feature_cols,shuffle=False, batch_size=256,mode='test')
train_oof= np.zeros((len(train)))
test_preds= np.zeros((len(test)))


for train_index, valid_index in cv.split(train, train.target, groups=train['p_num']):

    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, verbose=0, mode='min', baseline=None, restore_best_weights=True)
    rlr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=5,  mode='min', verbose=0)

    print('train_p_num:',train.iloc[train_index]['p_num'].unique(),'*********  valid_p_num:',train.iloc[valid_index]['p_num'].unique())

    train_gen=DataGenerator(train,'target',train_index,feature_cols,shuffle=False, batch_size=256,mode='train')
    valid_gen=DataGenerator(train,'target',valid_index,feature_cols,shuffle=False, batch_size=256,mode='train')
    print(len(train_index),len(valid_index))
    model=make_model(img_rows,img_cols,channnels)
    model.fit(train_gen,validation_data =valid_gen, verbose=1, epochs=50,callbacks = [rlr,es] )

    test_preds += model.predict(test_gen,verbose=0).ravel()/5

    oof_preds = model.predict(valid_gen,verbose=0).ravel()
    train_oof[valid_index] = oof_preds
    rmse = np.sqrt(mean_squared_error(train.iloc[valid_index].target, oof_preds))
    print(f"RMSE oof Score: {rmse}")

    K.clear_session()
    del model


rmse = np.sqrt(mean_squared_error(train.target, train_oof))
print(f"RMSE Score: {rmse}")

In [None]:
submission=pd.read_csv('/kaggle/input/brist1d/sample_submission.csv')
submission['bg+1:00']=test_preds
submission.to_csv('submission.csv', index=False)
submission.head()