*DISEASE CLASSIFICATION TIME-SERIES* MODEL

In [None]:
# imports
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline

import plotly.offline as pyo
import plotly.graph_objs as go
from plotly.offline import iplot

import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 


cf.go_offline()

Uploading CSV file

In [None]:
# File named in ./data.csv
# NOTE: Please "Allow 3rd Party Cookies" in Chrome Options

from google.colab import files
uploaded = files.upload()

In [None]:
print(uploaded)

In [None]:
# print(np.linspace(0, 1, df_size))
print()

In [None]:
import pandas as pd
import io
  
df = pd.read_csv(io.BytesIO(uploaded['expanded.csv']))
df_size = len(df.index)

df["Index"] = np.linspace(start = 0, stop = df_size-1, num = df_size, dtype = int)

In [None]:
# visualizing

print(df)

print(df[["RF", "MaxT"]])

print(df["RF"][0])

print(type(df['RF']))

print(type(df['RF'].to_numpy()))

Visualizing Data

In [None]:
# Creating Simple Dynamic Graph to see all data
%matplotlib notebook
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

fig = plt.figure(figsize=(23, 6), dpi=80)
ax = plt.axes()

# NOTE : Chane the value of var for any other header to get different results
var = "RF"
plt.plot(df['Date'].to_numpy(), df[var].to_numpy());

In [None]:
init_notebook_mode(connected=False)

In [None]:
# Create function for Colab
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
  '''))

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line', x='Index', y=['RF'], color=['white'], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line',x='Index',y=['MinT', "MaxT"], color=['white', 'gold'], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line',x='Index',y=['RH-I', "RH-II"], color=['white', 'gold'], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line',x='Index',y=['C2', "SS"], color=['white', 'gold'], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

In [None]:
configure_plotly_browser_state()
df.iplot(kind='line',x='Index',y=['WD1', "WD2", "WS"], color=['white', 'gold', "red"], 
theme='solar', mode='markers',title='Annual Rainfall in the city Peachtree City, GA')
plt.show()

Pre Processing Data

In [None]:
# preproceessing data
def one_hot_prob_dist(val):
  if val == 0 :
    return [1, 0, 0, 0]
  elif val == 1 :
    return [0, 1, 0, 0]
  elif val == 2 :
    return [0, 0, 1, 0]
  elif val == 3 :
    return [0, 0, 0, 1]
  else :
    print(val)
    raise ValueError

def create_timesteps(X, y, length, step, n_features):
  if step > 1 :
    # Create Timestep Data
    X = X.reshape(length, 1, n_features)

    # Num samples = length - step + 1
    samples = length - step + 1

    y = y[step-1:] 

    temp = np.empty(shape=[samples, step, n_features])
    for i in range(samples):
      temp[i] = X[i : i+step].reshape(1, step, n_features)
    return temp, y
  
  return X, y

def prepare_data(data, length, step, n_features):
  # Dividing X and y
  X = data[["RF", "MaxT", "MinT", "RH-I", "RH-II", "C2", "SS", "WD1", "WD2", "WS"]]
  y_temp = data["Disease"]
  y = []

  # print("X & y : ", "\n", X, "\n", y_temp)

  # Create Numpy arrays
  X = X.to_numpy()
  y_temp = y_temp.to_numpy()

  for i in range(len(y_temp)):
    arr = one_hot_prob_dist(y_temp[i])
    y.append(arr)

  y = np.array(y)

  # print("X & y (in numpy) : ", "\n", X, "\n", y)
  # print("X & y (shape) : ", X.shape, ", ", y.shape)

  # Normalizing values
  X = (X - X.min(0)) / X.ptp(0)
  # y = (y - y.min(0)) / y.ptp(0)

  # print("X & y (normalized) : ", "\n", X, "\n", y)

  # reshaping data into 3D structure [example, timesteps, features]
  X, y = create_timesteps(X, y, length, step, n_features)
  
  return X, y

def split_data(X, y, ratio=0.98):
  # Create X_test, X_train, y_test, y_train
  if ratio > 1:
    raise Error
  else :
    tot = X.shape[0]
    div = round(tot*ratio)

    # splitting
    if step > 1:
      X_train = X[:div, :, :]
      X_test = X[div:, :, :]
    else:
      X_train = X[:div, :]
      X_test = X[div:, :]
    
    y_train = y[:div]
    y_test = y[div:]
  
  return X_train, y_train, X_test, y_test

In [None]:
# defining data
step = 1
length = len(df.index)
n_features = 10

X, y = prepare_data(df, length, step, n_features)
length = y.shape[0]

print("X : \n", X.shape, "\n\ny : ", y.shape)


In [None]:
np.set_printoptions(precision=3)
print("X : \n", X[:6], "\n\ny : ", y[:6])

Shuffling Data

In [None]:
indices = np.arange(y.shape[0])
np.random.shuffle(indices)

X = X[indices]
y = y[indices]

Split Data


In [None]:
X_train, y_train, X_test, y_test = split_data(X, y, ratio=0.8)

In [None]:
print("X_train : ", X_train[:10], "\n\ny_test : ", y_train[:-10])
print("\n\nX_train and y_test (shape) : ", X_train.shape, ", ", y_test.shape)

Training Models

In [None]:
# impots
import tensorflow as tf
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import RNN
from tensorflow.keras.layers import Dense

from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import normalize, to_categorical

from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten

from tensorflow.keras.optimizers import Adam

Fully Connected

In [None]:
# defining model
model = Sequential()

# model structure
model.add(Dense(10, activation='relu', input_shape=(step, n_features)))
# model.add(Dropout(0.3))
# model.add(Flatten())
model.add(Dropout(0.7))
model.add(Dense(4, activation='softmax'))

# compile model
model.compile(optimizer=Adam(learning_rate=0.00005), loss='categorical_crossentropy', metrics=['accuracy'])

# model sumary
print(model.summary())

# train model
epochs = 200
history = model.fit(X_train, y_train, epochs=epochs, verbose=1)

# validation_data=(X_test, y_test), batch_size=20,

In [None]:
# defining model
model = Sequential()

# model structure
model.add(Dense(10, activation='relu', input_shape=(step, n_features)))
# model.add(Dropout(0.3))
# model.add(Flatten())
model.add(Dropout(0.7))
model.add(Dense(4, activation='softmax'))

# compile model
model.compile(optimizer=Adam(learning_rate=0.005), loss='categorical_crossentropy', metrics=['accuracy'])

# model sumary
print(model.summary())

# train model
epochs = 200
history = model.fit(X_train, y_train, epochs=epochs, verbose=1)

# validation_data=(X_test, y_test), batch_size=20,

Predicting Data and Seeing results using Test Set 

In [None]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=32)
print("test loss, test acc:", results)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
num_tests = 10
print("Generate predictions for 3 samples")
predictions = model.predict(X_test[:num_tests])
print("predictions shape:", predictions.shape)

In [None]:
for i in range(num_tests):
  print("Test Value :", y_test[i])
  print("Predicted Value :", predictions[i])
  print("\n")

Visualizing Ouput

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Add data
# check for overfitting and underfitting
loss = history.history['loss']
acc = history.history['accuracy']
epoch = np.arange(epochs) + 1

# Note that even in the OO-style, we use `.pyplot.figure` to create the figure.
fig, ax = plt.subplots()  # Create a figure and an axes.
ax.plot(loss, epoch, label='loss')  # Plot some data on the axes.
ax.plot(acc, epoch, label='accuracy')  # Plot more data on the axes...

ax.set_xlabel('Epochs')  # Add an x-label to the axes.
ax.set_ylabel('Score')  # Add a y-label to the axes.
ax.set_title("Simple Plot")  # Add a title to the axes.
ax.legend()  # Add a legend.



**HANDLING Bi-DIRECTIONAL DATA**

Pre Processing Bi-Directional Data

In [None]:
def get_disease_count(values):
  index_0 = 0
  index_1 = 0
  index_2 = 0
  index_3 = 0

  for val in values:
    if val == 0:
      index_0 += 1
    elif val == 1:
      index_1 += 1
    elif val == 2:
      index_2 += 1
    else:
      index_3 += 1

  print("Number of 0s : ", index_0, "\nNumber of 1s : ", index_1, "\nNumber of 2s : ", index_2, "\nNumber of 3s : ", index_3)

def get_disease_count_one_hot(values):
  index_0 = 0
  index_1 = 0
  index_2 = 0
  index_3 = 0

  for val in values:
    if val[0] == 1:
      index_0 += 1
    elif val[1] == 1:
      index_1 += 1
    elif val[2] == 1:
      index_2 += 1
    else:
      index_3 += 1

  print("Number of 0s : ", index_0, "\nNumber of 1s : ", index_1, "\nNumber of 2s : ", index_2, "\nNumber of 3s : ", index_3)

In [None]:
df = df.sort_values(by='Disease', ascending=False)
print(df)
get_disease_count(df['Disease'].to_numpy())

In [None]:
# preproceessing data
def one_hot_prob_dist(val):
  if val == 0 :
    return [1, 0, 0, 0]
  elif val == 1 :
    return [0, 1, 0, 0]
  elif val == 2 :
    return [0, 0, 1, 0]
  elif val == 3 :
    return [0, 0, 0, 1]
  else :
    print(val)
    raise ValueError

def create_timesteps_bi(X, y, length, step, n_features):
  if step > 1 :
    # Create Timestep Data
    X = X.reshape(length, 1, n_features)

    # Num samples = length - step + 1
    samples = length - step + 1

    temp_x = np.empty(shape=[samples, step, n_features])
    temp_y = np.empty(shape=[samples, step, 4])
    for i in range(samples):
      temp_x[i] = X[i : i+step].reshape(1, step, n_features)
      temp_y[i] = y[i : i+step].reshape(1, step, 4)
    return temp_x, temp_y
  
  return X, y

def prepare_data_bi(data, length, step, n_features):
  # Dividing X and y
  X = data[["RF", "MaxT", "RH-I", "RH-II", "C2", "SS", "WD1", "WD2", "WS"]]
  y_temp = data["Disease"]
  y = []

  # print("X & y : ", "\n", X, "\n", y_temp)

  # Create Numpy arrays
  X = X.to_numpy()
  y_temp = y_temp.to_numpy()

  for i in range(len(y_temp)):
    arr = one_hot_prob_dist(y_temp[i])
    y.append(arr)

  y = np.array(y)

  # print("X & y (in numpy) : ", "\n", X, "\n", y)
  # print("X & y (shape) : ", X.shape, ", ", y.shape)

  # Normalizing values
  X = (X - X.min(0)) / X.ptp(0)
  # y = (y - y.min(0)) / y.ptp(0)

  # print("X & y (normalized) : ", "\n", X, "\n", y)

  # reshaping data into 3D structure [example, timesteps, features]
  X, y = create_timesteps_bi(X, y, length, step, n_features)
  
  return X, y

def split_data_bi(X, y, ratio=0.98):
  # Create X_test, X_train, y_test, y_train
  if ratio > 1:
    raise Error
  else :
    tot = X.shape[0]
    div = round(tot*ratio)

    # splitting
    if step > 1:


      X_train = X[:div, :, :]
      y_train = y[:div, :, :]

      X_test = X[div:, :, :]
      y_test = y[div:, :, :]
    else:
      X_train = X[:div, :]
      y_train = y[:div, :]

      X_test = X[div:, :]
      y_test = y[div:, :]

  return X_train, y_train, X_test, y_test

In [None]:
# defining data
step = 1
length = len(df.index)
n_features = 9

X_bi, y_bi = prepare_data_bi(df, length, step, n_features)
length = y.shape[0]

# print("X : \n", X.shape, "\n\ny : ", y.shape)

In [None]:
np.set_printoptions(precision=3)
print("X : \n", X_bi.shape, "\n\ny : ", y_bi.shape)

Split Data

In [None]:
X_train_bi, y_train_bi, X_test_bi, y_test_bi = split_data_bi(X_bi, y_bi, ratio=0.62)

In [None]:
print("X_train : ", X_train_bi[:10], "\n\ny_test : ", y_train_bi[:-10])
# for i in range(200):
#   print(y_train_bi[i], "\n")
print("\n\nX_train and y_test (shape) : ", X_train_bi.shape, ", ", y_test_bi.shape)

In [None]:
get_disease_count_one_hot(y_train_bi)
print()
get_disease_count_one_hot(y_test_bi)

Shuffle Data

In [None]:
indices = np.arange(y_train_bi.shape[0])
np.random.shuffle(indices)

X_train_bi = X_train_bi[indices]
y_train_bi = y_train_bi[indices]

Training Bidirectional Models

Fully Connected Layer

In [None]:
# defining model
model = Sequential()

# model structure
# model.add(Flatten())

model.add(Dense(100, activation='relu', input_shape=(step, n_features)))
model.add(Dropout(0.5))
# model.add(Dense(10, activation='relu', input_shape=(step, n_features)))
# model.add(Dropout(0.3))
model.add(Dense(4, activation='softmax'))

# compile model
model.compile(optimizer=Adam(learning_rate=0.005), loss='categorical_crossentropy', metrics=['accuracy'])

# model sumary
print(model.summary())

# train model
epochs = 200
history = model.fit(X_train_bi, y_train_bi, epochs=epochs, verbose=1)

# validation_data=(X_test, y_test), batch_size=20,

Predicting Data

In [None]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results_bi = model.evaluate(X_test_bi, y_test_bi, batch_size=32)
print("test loss, test acc:", results_bi)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
num_tests_bi = 30
print("Generate predictions for 3 samples")
predictions_bi = model.predict(X_train_bi[:num_tests_bi])
print("predictions shape:", predictions_bi.shape)

In [None]:
for i in range(num_tests_bi):
  print("Test Value :\n", y_train_bi[i])
  print("Predicted Value :\n", predictions_bi[i])
  print("\n")

Visualizing Output

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Add data
# check for overfitting and underfitting
loss = history.history['loss']
acc = history.history['accuracy']
epoch = np.arange(epochs) + 1

# Note that even in the OO-style, we use `.pyplot.figure` to create the figure.
fig, ax = plt.subplots()  # Create a figure and an axes.
ax.plot(loss, epoch, label='loss')  # Plot some data on the axes.
ax.plot(acc, epoch, label='accuracy')  # Plot more data on the axes...

ax.set_xlabel('Epochs')  # Add an x-label to the axes.
ax.set_ylabel('Score')  # Add a y-label to the axes.
ax.set_title("Simple Plot")  # Add a title to the axes.
ax.legend()  # Add a legend.
