<a href="https://colab.research.google.com/github/chenhao1umbc/WSCDL/blob/master/WSCDL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
cd /content/drive/My\ Drive/Streaming_AE

In [0]:
import torch
print(torch.cuda.is_available())
from utils.training_tools import *
from utils.custom_layers import *

In [0]:
"This is a pytorch version of Ensemble Auto-encoder "
##
import numpy
import torch
import torch.nn as nn
from utils.data_loader import *
from utils.validation_tools import *
import os
import pdb
import warnings
import time

##
class Att(nn.Module):
    def __init__(self, attention_size):
        super(Att, self).__init__()
        self.fc0 = nn.Linear(config['hidden_size'], config['attention_size'], bias=True)
        self.fc1 = nn.Linear(config['attention_size'], 1, bias=False)

    def forward(self, inputs):  # input shape is (time_size, bat_size, dim_size) = (T,B,D), D is hidden_size
        inputs = inputs.permute(1, 0, 2)  # to (B,T,D)
        v = torch.tanh(self.fc0(inputs))  # (B,T,D)*(D,A) = (B,T,A)
        vu = self.fc1(v)  # to (B,T)
        alphas = torch.nn.functional.softmax(vu)  # (B,T,1)
        output = torch.sum(inputs * alphas, dim=1)  # (B, D)
        return output


class Lstm(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(Lstm, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)  # input (time_size, bat_size, dim_size)

    def forward(self, inputs):  # input shape is (time_size, bat_size, dim_size)
        output, _ = self.lstm(inputs)
        return output


class Net(nn.Module):
    def __init__(self, config):
        super(Net, self).__init__()

        # this part need some calculation based on how the CNN is defined
        pooling_size = 1
        cnn_out_size0 = int((config['seq_len'] - 2 * (config['CNN_kernel_size'] - 1)) / pooling_size) \
                        - 2 * (config['CNN_kernel_size'] - 1)
        cnn_out_size1 = cnn_out_size0 + 4*(config['CNN_kernel_size'] - 1) # means 4 convolutions

        # this part is the FC encoder
        self.FC_encoder = nn.Sequential(
            nn.Linear(config['seq_len'], config['feature_size']),  # (input_size, output_size)
            nn.Dropout(config['drop_rate'])  # activate function could be changed
        )

        # this part is the FC decoder
        self.FC_decoder = nn.Sequential(
            nn.Linear(config['feature_size'], config['seq_len']),  # (input_size, output_size)
            nn.Dropout(config['drop_rate'])  # activate function could be changed
        )

        # this part is the CNN encoder
        kernel_size = config['CNN_kernel_size']
        self.CNN_encoder = nn.Sequential(
            nn.Conv1d(1, 128, kernel_size, stride=1, padding=0),  # Channels_in, Chnannels_out, Kernal_size
            nn.BatchNorm1d(128, eps=0.001, momentum=0.01),  # L, input is N*C*L or N*C
            nn.Conv1d(128, 64, kernel_size),  # default stride=1, padding=0, dilation=1
            nn.BatchNorm1d(64, eps=0.001, momentum=0.01),
            nn.Conv1d(64, 32, kernel_size),
            nn.BatchNorm1d(32, eps=0.001, momentum=0.01),
            nn.Conv1d(32, 1, kernel_size),
            nn.Linear(cnn_out_size0, config['feature_size']),
            nn.ReLU(True),  # inplace=True, means save GPU memory, but covering old value
        )

        # this is the CNN decoder
        self.CNN_decoder = nn.Sequential(
            nn.Linear(config['feature_size'], cnn_out_size0), # refer to CNN_encoder for the parameters
            nn.ReLU(True),  # inplace=True, means save GPU memory, but covering old value
            nn.ConvTranspose1d(1, 32, kernel_size),
            nn.BatchNorm1d(32, eps=0.001, momentum=0.01),
            nn.ConvTranspose1d(32, 64, kernel_size),
            nn.BatchNorm1d(64, eps=0.001, momentum=0.01),
            nn.ConvTranspose1d(64, 128, kernel_size),
            nn.BatchNorm1d(128, eps=0.001, momentum=0.01),
            nn.ConvTranspose1d(128, 1, kernel_size, stride=1, padding=0),
            nn.Linear(cnn_out_size1, config['seq_len'])
        )

        # LSTM encoder
        self.LSTM_encoder = nn.Sequential(
            Lstm(1, config['hidden_size'], config['num_layers']),  # input (time_size, bat_size, dim_size)
            nn.Dropout(config['drop_rate']),
            Att(config['attention_size']),
            nn.Linear(config['hidden_size'], config['feature_size'], bias=True)
        )

        # LSTM decoder
        self.LSTM_decoder = nn.LSTM(2, config['hidden_size'], config['num_layers'])

        # this is stacked features into regular feature length
        self.dense = nn.Sequential(
            nn.Linear(3 * config['feature_size'], config['feature_size']),
            nn.Dropout(config['drop_rate'])
        )

        # Fully connected layers
        self.fc1 = nn.Linear(config['feature_size'], config['hidden_size'])
        self.fc2 = nn.Linear(config['hidden_size'], 1)

    # this part defines the structure of the network
    def forward(self, x):
        # FC_encoder
        x_part1 = self.FC_encoder(x)  # input x is (N,C,L)

        # CNN_encoder
        x_part2 = self.CNN_encoder(x)

        # LSTM_encoder
        x_part3 = self.LSTM_encoder(x.permute(2, 0, 1))  # input x is (L,N,C)

        # latent features for classification
        x_stack1 = self.dense(torch.cat((x_part1, x_part2, x_part3.unsqueeze(1)), dim=2))  # stack the encoder output

        # FC_decoder
        x_dc1 = self.FC_decoder(x_stack1)

        # CNN_decoder
        x_dc2 = self.CNN_decoder(x_stack1)

        # LSTM decoder
        x_stack2 = torch.cat((x_dc1, x_dc2), dim=1)  # statcked into 2 channels
        x_temp = self.fc1(x_stack1)  # (N, C, L=hidden_size)
        h_0 = x_temp.permute(1, 0, 2)  # (num_layers * num_directions, batch, hidden_size)
        c_0 = x_temp.permute(1, 0, 2)  # (num_layers * num_directions, batch, hidden_size)
        t_oupt, _ = self.LSTM_decoder(x_stack2.permute(2, 0, 1),
                                      (h_0, c_0))  # (L,N,C) the stacked features as good intialization
        output = self.fc2(t_oupt)

        return output.permute(1, 2, 0), x_stack1


##
# config setting
start_time =time.time()
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
config = {}
config['attention_size'] = 64
config['batch_size'] = 32
config['crd'] = 1
config['CNN_kernel_size'] = 4
config['drop_rate'] = 0.2
config['epochs'] = 300
config['feature_size'] = 64  # number of units in the latent space
config['hidden_size'] = 128
config['learning_rate'] = 0.005
config['log_dir'] = 'logs'
config['max_grad_norm'] = 5  # maximum gradient  norm during training, cap for gradient norm
config['num_layers'] = 1
config['save_model'] = False
config['sd'] = 1  ## input channel length, may need to coordinate with kernel specificiation
config['warning'] = False  # False means silent warnings
warnings.simplefilter("ignore") if not config['warning'] else ''
torch.set_default_dtype(torch.double) # this controls DNN dtype, change data dtype in load_data_stream

# training stage
dataset_list = ['50words', 'Adiac', 'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken', 'Car', 'CBF',
                'ChlorineConcentration', 'CinC_ECG_torso', 'Coffee', 'Computers', 'Cricket_X', 'Cricket_Y', 'Cricket_Z',
                'DiatomSizeReduction', 'DistalPhalanxOutlineAgeGroup', 'DistalPhalanxOutlineCorrect', 'DistalPhalanxTW',
                'Earthquakes', 'ECG200', 'ECG5000']
accuracies = {}
for dataset in dataset_list[0:1]:  # for the testing just first data set
#     X_train, X_test, y_train, y_test = load_data('Stream_data/', dataset) # shuffled

    # for colab
    X_train, y_train, X_test, y_test = load_data('Dataset/', dataset)
    X_train, X_test = torch.from_numpy(X_train).permute(0,2,1), torch.from_numpy(X_test).permute(0,2,1)
    
    config['seq_len'] = X_train.shape[2]  # data shape is #(N, C, L)
    loss_val_all = []
    model = Net(config)
    lossfunc = nn.MSELoss()
    if torch.cuda.device_count() > 0:
        model = nn.DataParallel(model.cuda())# .cuda() means GPU0 as controller
        X_train = X_train.cuda()
        X_test = X_test.cuda()
        print('\nGPU is available and training on GPU')
        print('Good, we are now using multiple GPUs') if torch.cuda.device_count() > 1 else ''
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
    ad_lr = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=1) # exponetially decay, lr = lr*gamma
    # training stage
    for i in range(config['epochs']):
        model.train()
        outputs, _ = model(X_train)
        loss = lossfunc(X_train, outputs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        ad_lr.step() # change the learning rate
        torch.cuda.empty_cache() # save GPU memory

        # check validate value to prevent overfitting
        for m in model.modules():
            if isinstance(m, nn.Dropout):
                m.eval()
        with torch.no_grad():
            loss_val = lossfunc(X_test, model(X_test)[0])
            loss_val_all.append(loss_val)
        if early_stop(loss_val_all):
            print('Finished epochs: %6s / %6s : training loss = %5.5f, validation loss = %5.5f'
                  % (i + 1, config['epochs'], loss.detach().cpu(), loss_val.cpu()))
            print('early stop')
            break
        # if (i+1)%10 == 0:
        print('Finished epochs: %6s / %6s : training loss = %5.5f, validation loss = %5.5f'
              % (i + 1, config['epochs'], loss.detach().cpu(), loss_val.cpu()))

    # validation stage    
    for m in model.modules():
        if isinstance(m, nn.Dropout):
            m.eval()
    with torch.no_grad():
        val, features_train = model(X_train)
        _, features_test = model(X_test)
        f_tr = features_train.detach().to('cpu').numpy().squeeze()
        f_te = features_test.detach().to('cpu').numpy().squeeze()
        clf_results = classify_features(f_tr, y_train, f_te, y_test)
        print(dataset, clf_results)
        accuracies[dataset] = clf_results
        torch.cuda.empty_cache()  # save GPU memory

print(accuracies)
print('complete')
print('The overall time is : ', time.time()-start_time)

In [0]:
plt.figure()
plt.imshow(X_train.cpu().numpy().squeeze())
plt.figure()
plt.imshow(val.cpu().numpy().squeeze())