In [1]:
! pip install tensorflow-addons



In [2]:
! pip install tslearn

Collecting tslearn
  Downloading tslearn-0.6.3-py3-none-any.whl (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tslearn
Successfully installed tslearn-0.6.3


In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import tensorflow_addons as tfa
from math import pi, ceil

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.models import Model
from tslearn.preprocessing import TimeSeriesScalerMeanVariance


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [4]:
ndim=20

In [5]:
class AEBase(keras.Model):
    def __init__(self, **kwargs):
        super(AEBase, self).__init__(**kwargs)
        self.encoder = self.construct_encoder()
        self.decoder = self.construct_decoder()
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.mean_squared_error(data, reconstruction), axis=(0, 1)
                )
            )
        grads = tape.gradient(reconstruction_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(reconstruction_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
        }

In [6]:
class LSTMAE(AEBase):
    def __init__(self, latent_dim=12, encoder_hiddens=[256, 128, 64], decoder_hiddens=[64, 128, 256], series_len=None):
        self.latent_dim = latent_dim
        self.encoder_hiddens = encoder_hiddens
        self.decoder_hiddens = decoder_hiddens
        self.series_len = series_len
        super(LSTMAE, self).__init__()

    def construct_encoder(self):
        encoder_inputs = keras.Input((None,self.series_len))
        #masked_seq = keras.layers.Masking(mask_value=0.0)(encoder_inputs)  # Mask any time step with value 0.0
        x = keras.layers.LSTM(self.encoder_hiddens[0], return_sequences=True)(encoder_inputs)
        for i in range(1, len(self.encoder_hiddens)-1):
            x = keras.layers.LSTM(self.encoder_hiddens[i], return_sequences=True)(x)
        x = keras.layers.LSTM(self.encoder_hiddens[-1])(x)
        x = layers.Dense(self.latent_dim)(x)
        encoder = keras.Model(encoder_inputs,  x, name="encoder")
        return encoder

    def construct_decoder(self):
        latent_inputs = keras.Input(shape=(self.latent_dim,))
        x = keras.layers.RepeatVector(1)(latent_inputs)
        # stacking LSTM layers
        for h in self.decoder_hiddens:
            x = keras.layers.LSTM(h, return_sequences=True)(x)

        decoder_outputs = keras.layers.TimeDistributed(keras.layers.Dense(self.series_len))(x)
        decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
        return decoder

In [7]:
nn5 = pd.read_csv("/kaggle/input/nn5-dataset/nn51.csv", header=None)
nn5.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,781,782,783,784,785,786,787,788,789,790
0,13.407029,14.725057,20.564059,34.70805,26.629819,16.609977,15.320295,11.607143,19.883787,23.767007,...,50.141723,33.701814,28.656463,26.417234,27.253401,44.373583,65.206916,49.744898,34.481293,32.667234
1,11.550454,13.59127,15.036848,21.570295,19.444444,0.0,9.722222,12.244898,15.504535,18.93424,...,34.665533,13.988095,13.874717,16.326531,17.488662,20.663265,27.097506,32.171202,13.676304,16.369048
2,5.64059,14.399093,24.418934,28.784014,20.620748,13.803855,11.536281,10.74263,14.824263,25.212585,...,38.364512,17.928005,18.041383,16.978458,22.293084,36.522109,42.786281,39.271542,17.446145,17.148526
3,13.180272,8.446712,19.515306,28.88322,19.472789,0.0,7.355442,10.827664,15.617914,21.159297,...,31.420068,17.276077,15.674603,12.471655,17.928005,25.028345,45.308957,32.242063,15.943878,16.638322
4,9.778912,10.813492,21.612812,38.520408,24.744898,12.329932,12.996032,11.040249,7.95068,19.515306,...,33.758503,18.99093,16.865079,16.000567,15.079365,20.833333,40.646259,33.304989,16.666667,14.101474


In [8]:
from datetime import datetime, timedelta
start_date = datetime(1996, 3, 18)

date_range = [start_date + timedelta(days=i) for i in range(nn5.shape[1])]

# Convert the date range to days of the week
days_of_week = [date.strftime('%A') for date in date_range]
nn5.columns = days_of_week

In [9]:
import pandas as pd
import numpy as np

# Assuming df is your DataFrame with columns as days of the week

# Function to replace zeros with the median of non-zero values for each day of the week
def replace_zeros_with_median(series):
    # Iterate over unique days of the week
    for day in nn5.columns:
        # Extract non-zero values for the specific day
        non_zero_values = series[series.index == day].replace(0, np.nan).dropna()
        
        # Calculate the median of non-zero values
        median_value = non_zero_values.median()
        
        # Replace zeros with the median for the specific day
        series.loc[series.index == day] = series.loc[series.index == day].replace(0, median_value)

# Apply the function to each row of the DataFrame
nn5.apply(replace_zeros_with_median, axis=1)

# Now, df has zeros replaced with the median of non-zero values for each day of the week in each series

0      None
1      None
2      None
3      None
4      None
       ... 
106    None
107    None
108    None
109    None
110    None
Length: 111, dtype: object

In [10]:
nn5.to_csv("nn5.csv", header=None, index=False)