*DISEASE CLASSIFICATION TIME-SERIES* MODEL

In [None]:
# imports
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline

import plotly.offline as pyo
import plotly.graph_objs as go
from plotly.offline import iplot

import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 


cf.go_offline()

Uploading CSV file

In [None]:
# File named in ./data.csv
# NOTE: Please "Allow 3rd Party Cookies" in Chrome Options

from google.colab import files
uploaded = files.upload()

In [None]:
print(uploaded)

In [None]:
import pandas as pd
import io
  
df = pd.read_csv(io.BytesIO(uploaded['dataset_min.csv']))
df_size = len(df.index)

df["Index"] = np.linspace(start = 0, stop = df_size-1, num = df_size, dtype = int)

In [None]:
# visualizing

print(df.head().to_string())

print(df[["RF", "MaxT"]])

print(df["RF"][0])

print(type(df['RF']))

print(type(df['RF'].to_numpy()))

Visualizing Data

In [None]:
# Creating Simple Dynamic Graph to see all data
%matplotlib notebook
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

fig = plt.figure(figsize=(23, 6), dpi=80)
ax = plt.axes()

# NOTE : Chane the value of var for any other header to get different results
var = "RF"
plt.plot(df['Date'].to_numpy(), df[var].to_numpy());

In [None]:
init_notebook_mode(connected=False)

In [None]:
# Create function for Colab
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
  '''))

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line', x='Index', y=['RF'], color=['white'], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line',x='Index',y=['MinT', "MaxT"], color=['white', 'gold'], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

Pre Processing Data

In [None]:
# preproceessing data
def one_hot_prob_dist(val):
  if val == 0 :
    return [1, 0, 0, 0]
  elif val == 1 :
    return [0, 1, 0, 0]
  elif val == 2 :
    return [0, 0, 1, 0]
  elif val == 3 :
    return [0, 0, 0, 1]
  else :
    print(val)
    raise ValueError

def create_timesteps(X, y, length, step, n_features):
  # Create Timestep Data
  X = X.reshape(length, 1, n_features)

  # Num samples = length - step + 1
  samples = length - step + 1
    
  if step > 1 :
    y = y[step-1:] 

    temp = np.empty(shape=[samples, step, n_features])
    for i in range(samples):
      temp[i] = X[i : i+step].reshape(1, step, n_features)
    return temp, y
  
  return X, y

def prepare_data(data, length, step, n_features):
  # Dividing X and y
  X = data[["RF", "MaxT", "MinT", "RH-I", "RH-II", "C1", "C2", "SS", "WD1", "WD2"]]
  y_temp = data["Disease"]
  y = []

  # print("X & y : ", "\n", X, "\n", y_temp)

  # Create Numpy arrays
  X = X.to_numpy()
  y_temp = y_temp.to_numpy()

  for i in range(len(y_temp)):
    arr = one_hot_prob_dist(y_temp[i])
    y.append(arr)

  y = np.array(y)

  # print("X & y (in numpy) : ", "\n", X, "\n", y)
  # print("X & y (shape) : ", X.shape, ", ", y.shape)

  # Normalizing values
  X = (X - X.min(0)) / X.ptp(0)
  # y = (y - y.min(0)) / y.ptp(0)

  # print("X & y (normalized) : ", "\n", X, "\n", y)

  # reshaping data into 3D structure [example, timesteps, features]
  X, y = create_timesteps(X, y, length, step, n_features)
  
  return X, y

def split_data(X, y, ratio=0.98):
  # Create X_test, X_train, y_test, y_train
  if ratio > 1:
    raise Error
  else :
    tot = X.shape[0]
    div = round(tot*ratio)

    # splitting
    X_train = X[:div, :, :]
    y_train = y[:div]

    X_test = X[div:, :, :]
    y_test = y[div:]
  
  return X_train, y_train, X_test, y_test

In [None]:
# defining data
step = 15
length = len(df.index)
n_features = 10

X, y = prepare_data(df, length, step, n_features)
length = y.shape[0]

print("X : \n", X.shape, "\n\ny : ", y.shape)


In [None]:
np.set_printoptions(precision=3)
print("X : \n", X[:6], "\n\ny : ", y[:6])

In [None]:
X_train, y_train, X_test, y_test = split_data(X, y, ratio=0.8)

In [None]:
print("X_train : ", X_train[:10], "\n\ny_test : ", y_train[:-10])
print("\n\nX_train and y_test (shape) : ", X_train.shape, ", ", y_test.shape)

Training Models

In [None]:
# impots
import tensorflow as tf
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import RNN
from tensorflow.keras.layers import Dense

from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten

from tensorflow.keras.optimizers import Adam

In [None]:
# This class defines the API to add Ops to train a model. 
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.framework import ops
from tensorflow.python.training import optimizer
import tensorflow as tf

Optimizer Class

In [None]:
# basic Optimizer
opti = Adam(learning_rate=0.00005)

In [None]:
class SGOptimizer(optimizer.Optimizer):
    def __init__(self, learning_rate=0.01, name="SGOptimizer", use_locking=False, **kwargs):
        """Call super().__init__() and use _set_hyper() to store hyperparameters"""
        super(SGOptimizer, self).__init__(use_locking, name)
        # self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) # handle lr=learning_rate
        self._lr = learning_rate
        self._is_first = True

        # Tensor versions of the constructor arguments, created in _prepare().
        self._lr_t = None
      
    def _prepare(self):
        self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")

    
    def _create_slots(self, var_list):
        """For each model variable, create the optimizer variable associated with it.
        TensorFlow calls these optimizer variables "slots".
        For momentum optimization, we need one momentum slot per model variable.
        """
        for var in var_list:
            self._zeros_slot(var, "pv", self._name) #previous variable i.e. weight or bias
        for var in var_list:
            self._zeros_slot(var, "pg", self._name) #previous gradient



    @tf.function
    def _resource_apply_dense(self, grad, var):
        """Update the slots and perform one optimization step for one model variable
        """
        var_dtype = var.dtype.base_dtype
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) # handle learning rate decay
        new_var_m = var - grad * lr_t
        pv_var = self.get_slot(var, "pv")
        pg_var = self.get_slot(var, "pg")
        
        if self._is_first:
            self._is_first = False
            new_var = new_var_m
        else:
            cond = grad*pg_var >= 0
            print(cond)
            avg_weights = (pv_var + var)/2.0
            new_var = tf.where(cond, new_var_m, avg_weights)
        pv_var.assign(var)
        pg_var.assign(grad)
        var.assign(new_var)

    def _resource_apply_sparse(self, grad, var):
        raise NotImplementedError

    def get_config(self):
        base_config = super().get_config()
        return {
            **base_config,
            "learning_rate": self._serialize_hyperparameter("learning_rate"),
        }


    def _resource_apply_sparse(self, grad, var):
        raise NotImplementedError

    def get_config(self):
        base_config = super().get_config()
        return {
            **base_config,
            "learning_rate": self._serialize_hyperparameter("learning_rate"),
            "decay": self._serialize_hyperparameter("decay"),
            "momentum": self._serialize_hyperparameter("momentum"),
        }

In [None]:
# New Custom Function
opti = SGOptimizer(learning_rate=0.00005)

New TensorFlow Session

In [None]:
tf.keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

Vanilla LSTM

In [None]:
# defining model
model = Sequential()

# model structure
model.add(LSTM(30, activation='relu', input_shape=(step, n_features)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dropout(0.4))
model.add(Dense(4, activation='softmax'))

# compile model
model.compile(optimizer=opti, loss='categorical_crossentropy', metrics=['accuracy'])

# model sumary
print(model.summary())

# train model
epochs = 100
history = model.fit(X_train, y_train, epochs=epochs, verbose=1)

# validation_data=(X_test, y_test), batch_size=20,

Predicting Data and Seeing results using Test Set 

In [None]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=32)
print("test loss, test acc:", results)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
num_tests = 10
print("Generate predictions for 3 samples")
predictions = model.predict(X_test[:num_tests])
print("predictions shape:", predictions.shape)

In [None]:
for i in range(num_tests):
  print("Test Value :", y_test[i])
  print("Predicted Value :", predictions[i])
  print("\n")

Visualizing Ouput

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Add data
# check for overfitting and underfitting
loss = history.history['loss']
acc = history.history['accuracy']
epoch = np.arange(epochs) + 1

# Note that even in the OO-style, we use `.pyplot.figure` to create the figure.
fig, ax = plt.subplots()  # Create a figure and an axes.
ax.plot(loss, epoch, label='loss')  # Plot some data on the axes.
ax.plot(acc, epoch, label='accuracy')  # Plot more data on the axes...

ax.set_xlabel('Epochs')  # Add an x-label to the axes.
ax.set_ylabel('Score')  # Add a y-label to the axes.
ax.set_title("Simple Plot")  # Add a title to the axes.
ax.legend()  # Add a legend.

