## import modules

In [None]:
import numpy as np
import os

# to read data
import requests

# for neural network
import torch
from torch.utils.data import DataLoader, Dataset
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# to save parameters
import copy

# to plot the data
import matplotlib.pyplot as plt

# for splitting data and evaluating results
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# for filtering and normalization
from scipy.ndimage import gaussian_filter
from sklearn.preprocessing import MinMaxScaler

# to track progress
from tqdm.notebook import tqdm

## function definitions

### download file from url

In [None]:
def download_file(url,saveAs):
    if not os.path.exists('anamoly_dataset.pickle'):
        r = requests.get(url, allow_redirects=True)
        open(saveAs, 'wb').write(r.content)
        print('file downloaded')
    else:
        print('file already exists')

### create patches

In [None]:
def interpolate(arr,newSize):
  l = len(arr)
  indices = list(range(0,l))
  newIndices = np.linspace(0, l-1 , newSize)
  newArr = [np.interp(i,indices,arr) for i in newIndices]
  return newArr

In [None]:
# function to return list of patches for a given dataset
def dataToPatches(data, window_size, stride, resizeTo, smooth=False, normalize=False):
  inputs = []
  labels = []
    
  for sample in tqdm(data):
    input = sample[0]
    label = sample[1]

    # get length excluding nan values which indicate end of input
    lens = []
    for channel in input:
      channel = list(channel)
      len1 = np.where(np.isnan(list(channel)))[0][0] if any(np.isnan(channel)) else len(channel)
      lens.append(len1)
    inputLen = np.min(lens)

    for i in range(0, inputLen, stride):
      channels = []
      # verify if last stride is possible
      if i + window_size in range(inputLen + 1):
        for channel in input:
          values  = [0]*i
          values += list(channel)[i:i+window_size]
          values += [0]*(inputLen - i - window_size)
          values = interpolate(values,resizeTo)
          # apply gaussian filter for smoothing and reducing noise
          if smooth:
              values = gaussian_filter(values, sigma=1)

          indicator  = [0]*i
          indicator += [1]*window_size
          indicator += [0]*(inputLen - i - window_size)
          channels.append(values)
          # Normalize between 0 and 1
          if normalize:
              channels = np.array(channels)
              shape = channels.shape
              channels = list(MinMaxScaler().fit_transform(np.array(channels).reshape(-1,1)).reshape(shape))

        indicator = interpolate(indicator,resizeTo)
        channels.append(indicator)

        inputs.append(channels)
        labels.append(label)
        
  inputs, labels = np.array(inputs), np.array(labels, dtype=int)
  return inputs,labels

### create dataloaders

In [None]:
class mydataset(Dataset):
  def __init__(self, inputs, labels):
    self.inputs = inputs
    self.labels = labels

  def __len__(self):
    return len(self.inputs)

  def __getitem__(self, index):
    input = self.inputs[index]
    label = self.labels[index]
    return input,label

In [None]:
# function to create train, val and test loaders
def createLoaders(train_inputs, train_labels, test_inputs, test_labels, batch_size, val_percent=.25):
    train_inputs, val_inputs, train_labels, val_labels, = train_test_split(train_inputs, train_labels, test_size=val_percent, random_state=0)

    train_dataset = mydataset(train_inputs, train_labels)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    val_dataset = mydataset(val_inputs, val_labels)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    test_dataset = mydataset(test_inputs, test_labels)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader,val_loader,test_loader

### train network

In [None]:
def trainNet(net,criterion,optimizer,train_loader,val_loader,epochs,print_every=None):
    
    if not print_every:
        print_every = int(epochs / 6)

    avg_trainLosses = []
    avg_valLosses = []

    for epoch in tqdm(range(epochs)):  # loop over the dataset multiple times

        train_loss = []
        val_loss = []

        net.train()
        for i, (inputBatch,labelBatch) in enumerate(train_loader):

            inputBatch, labelBatch = inputBatch.to(device), labelBatch.to(device)
            inputBatch = inputBatch.float()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputBatch = net(inputBatch)
            loss = criterion(outputBatch, labelBatch)
            loss.backward()
            optimizer.step()

            # print statistics
            train_loss.append(loss.item())

        net.eval()
        for i, (inputBatch,labelBatch) in enumerate(val_loader):
          with torch.no_grad():

            inputBatch, labelBatch = inputBatch.to(device), labelBatch.to(device)
            inputBatch = inputBatch.float()

            # forward + backward + optimize
            outputBatch = net(inputBatch)
            loss = criterion(outputBatch, labelBatch)
            val_loss.append(loss.item())

        avg_trainLoss = sum(train_loss) / len(train_loss)
        avg_valLoss = sum(val_loss) / len(val_loss)
        avg_trainLosses.append(avg_trainLoss)
        
        if (epoch > 0) and (avg_valLoss < min(avg_valLosses)):
            best_params = copy.deepcopy(net.state_dict())
            best_epoch, best_loss = epoch, avg_valLoss
        avg_valLosses.append(avg_valLoss)

        # print statistics
        if epoch % print_every == print_every - 1:
          print('epoch: %d, train loss: %.3f, val loss: %.3f' % (epoch + 1, avg_trainLoss, avg_valLoss))

    print('Finished Training')
    plt.plot(avg_trainLosses, label='train loss')
    plt.plot(avg_valLosses, label='val loss')
    plt.plot([best_loss]*epochs, linestyle='dashed')
    plt.plot(best_epoch, best_loss, 'o')
    plt.legend()
    
    return best_params

### evaluate and print

In [None]:
def evaluate(net,data_loader,classes):
  y_true= []
  y_pred = []
  net.eval()

  for _, (inputBatch,labelBatch) in enumerate(tqdm(data_loader)):
    with torch.no_grad():
      inputBatch, labelBatch = inputBatch.to(device), labelBatch.to(device)
      inputBatch = inputBatch.float()
      outputBatch = net(inputBatch)

      for output,label in zip(outputBatch,labelBatch):
        output, label = output.cpu(), label.cpu()
        y_true.append(label)
        pred = np.argmax(output)
        y_pred.append(pred)

  print(classification_report(y_true, y_pred, target_names=classes, labels=range(len(classes)) ,digits=4))