In [None]:
# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
import torch.utils.data as data

# SciKit
from sklearn.model_selection import train_test_split

# Python
import pandas as pd
import numpy as np
import time

# Graphing
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

In [None]:
# ML Parameters
lr = 1e-3
epoch = 100
batch_size = 512

# Data Parameters
data_seq_len = 140
data_n_features = 1
data_embedding_dim = 64

In [None]:
#load train data
df_sensorA_normal = pd.read_csv('data/sensor_A_normal.csv', header=None)
df_sensorB_normal = pd.read_csv('data/sensor_B_normal.csv', header=None)
df_sensorC_normal = pd.read_csv('data/sensor_C_normal.csv', header=None)
df_sensorD_normal = pd.read_csv('data/sensor_D_normal.csv', header=None)
df_sensorE_normal = pd.read_csv('data/sensor_E_normal.csv', header=None)
df_train = [df_sensorA_normal, df_sensorB_normal, df_sensorC_normal, df_sensorD_normal, df_sensorE_normal]

# load val data
df_sensorA_public = pd.read_csv('data/sensor_A_public.csv', header=None)
df_sensorB_public = pd.read_csv('data/sensor_B_public.csv', header=None)
df_sensorC_public = pd.read_csv('data/sensor_C_public.csv', header=None)
df_sensorD_public = pd.read_csv('data/sensor_D_public.csv', header=None)
df_sensorE_public = pd.read_csv('data/sensor_E_public.csv', header=None)
df_test = [df_sensorA_public, df_sensorB_public, df_sensorC_public, df_sensorD_public, df_sensorE_public]

# load test data
df_sensorA_private = pd.read_csv('data/sensor_A_private.csv', header=None)
df_sensorB_private = pd.read_csv('data/sensor_B_private.csv', header=None)
df_sensorC_private = pd.read_csv('data/sensor_C_private.csv', header=None)
df_sensorD_private = pd.read_csv('data/sensor_D_private.csv', header=None)
df_sensorE_private = pd.read_csv('data/sensor_E_private.csv', header=None)
df_private = [df_sensorA_private, df_sensorB_private, df_sensorC_private, df_sensorD_private, df_sensorE_private]

In [None]:
for df in df_test:
    df.drop(df.index[-1], inplace=True)

In [None]:
class SensorDataset(data.Dataset):

    def __init__(self, df):
        self.sequences = df.astype(np.float64).to_numpy()

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        out = self.sequences[idx]
        out = out.reshape((-1, 1))   # change from (seq_len,) to (seq_len, n_features) (i.e., (140,1))
        return out

In [None]:
train_dataset = SensorDataset(df_sensorA_normal)
test_dataset = SensorDataset(df_sensorA_public)