In [None]:
import numpy as np
import pandas as pd

import os
from os import path
from tqdm import tqdm

import matplotlib.pyplot as plt
from pylab import rcParams

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import copy


In [None]:
project_root = '/home/project/gas_anormaly_detection/restaurant/2testing/'

In [None]:
data_root = path.join(project_root, 'Dataset/')
save_weight_root = path.join(project_root, 'pretrained_weights/')

In [None]:
#load abnormal restaurant ID
with open(os.path.join(project_root,'abnormal_restaurant_id.txt'), "r") as f:
    abnormal_restaurant_id = f.readlines()
    abnormal_restaurant_id = abnormal_restaurant_id[0].split(" ")

In [None]:
detect_results_root = path.join(project_root, 'detect_results/')

abnorm_finded = []

with open(os.path.join(detect_results_root,'zero_consumption_anomalies.txt'), "r") as f:
    zero_consumption_anomalies = f.readlines()
    if len(zero_consumption_anomalies)>0:
        zero_consumption_anomalies = zero_consumption_anomalies[0].split(" ")
        abnorm_finded.extend(zero_consumption_anomalies)

with open(os.path.join(detect_results_root,'negative_consumption_anomalies.txt'), "r") as f:
    negative_consumption_anomalies = f.readlines()
    if len(negative_consumption_anomalies)>0:
        negative_consumption_anomalies = negative_consumption_anomalies[0].split(" ")
        abnorm_finded.extend(negative_consumption_anomalies)


with open(os.path.join(detect_results_root,'burty_consumption_anomalies.txt'), "r") as f:
    burty_consumption_anomalies = f.readlines()
    if len(burty_consumption_anomalies)>0:
        burty_consumption_anomalies = burty_consumption_anomalies[0].split(" ")
        abnorm_finded.extend(burty_consumption_anomalies)

print(len(abnorm_finded))

In [None]:
abnorm_wait_detect = [val for val in abnorm_restaurant if val not in abnorm_finded]
print(len(abnorm_wait_detect))

In [None]:
# load test set

In [None]:
week_data_root = path.join(data_root, 'weekdata_affter_afterrules/')
week_data_list = os.listdir(week_data_root)

In [None]:
week_sample_list =[]
sample_index = 0

for f_name in tqdm(week_data_list):
    id_ = os.path.splitext(f_name)[0]
    user_data = np.loadtxt(os.path.join(week_data_root, f_name))
    week_sample_list.append(user_data)
    sample_index+=1

In [None]:
week_sample_np = np.array(week_sample_list)

## AE

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:

class cnn_Encoder(nn.Module):

    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(cnn_Encoder, self).__init__()

        self.seq_len, self.n_features = seq_len, n_features
        self.embedding_dim= embedding_dim
        self.hidden_dim = 32

        self.cnn1 = nn.Conv1d(
          in_channels= self.n_features,
          out_channels=self.hidden_dim,
          kernel_size=4,
          padding=0,
          dilation=2,
          stride=2
        )

        self.cnn2 = nn.Conv1d(
          in_channels=self.hidden_dim,
          out_channels=self.hidden_dim,
          kernel_size=4,
          padding=0,
          dilation=2,
          stride=2
        )
        
        self.cnn3 = nn.Conv1d(
          in_channels=self.hidden_dim,
          out_channels=self.embedding_dim,
          kernel_size=8,
          padding=0,
          dilation=2,
          stride=3
        )
        self.fc = nn.Linear(self.embedding_dim*8, self.embedding_dim)

    def forward(self, x):
        
        x = x.transpose(1,2)
        
        x = self.cnn1(x)
        x = F.relu(x)        
        
        x = self.cnn2(x)
        x = F.relu(x)
        
        x = self.cnn3(x)
        x = F.relu(x)
       
        x = x.reshape(x.size(0),-1)
        x = self.fc(x)
        x = x.reshape(x.size(0),self.embedding_dim,1)

        return x

In [None]:

class cnn_Decoder(nn.Module):

    def __init__(self, seq_len, input_dim=64, n_features=1):
        super(cnn_Decoder, self).__init__()
        
        self.seq_len, self.n_features = seq_len, n_features
        self.input_dim= input_dim


        self.cnn1 = nn.ConvTranspose1d(
          in_channels= self.input_dim,
          out_channels=64,
          kernel_size=4,
          dilation=2,
          stride=2)
        
        self.cnn2 = nn.ConvTranspose1d(
          in_channels= self.input_dim,
          out_channels=32,
          kernel_size=8,
          dilation=2,
          stride=2)
        
        self.cnn3 = nn.ConvTranspose1d(
          in_channels= 32,
          out_channels=16,
          kernel_size=8,
          dilation=3,
          stride=2)
        
        self.cnn4 = nn.ConvTranspose1d(
          in_channels= 16,
          out_channels=1,
          kernel_size=8,
          dilation=3,
          stride=2)

    def forward(self, x):
        x = self.cnn1(x)
        x = F.relu(x) 
        
        x = self.cnn2(x)
        x = F.relu(x) 
        
        x = self.cnn3(x)
        x = F.relu(x) 
        
        x = self.cnn4(x)
        
        x = x.transpose(1,2)
        return x

In [None]:

class CNNAutoencoder(nn.Module):

    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(CNNAutoencoder, self).__init__()

        self.encoder = cnn_Encoder(seq_len, n_features, embedding_dim).to(device)
        self.decoder = cnn_Decoder(seq_len, embedding_dim, n_features).to(device)

    def forward(self, x):
        x_embedding = self.encoder(x)
        x = self.decoder(x_embedding)

        return x_embedding, x

In [None]:
seq_len=168
n_features=1
model = CNNAutoencoder(seq_len, n_features, hidden_size)

model = model.to(device)

In [None]:
def anormaly_detect_addentropy(seq, type):
  # seq is a week data for one user (numpy or list)
  ## seq length should be 168 (1 week)
  ## from monday to sunday
  ## warnning: with missing values

  # type： restaurant/canteen

    assert type =='restaurant' or type =='canteen', 'user type cannot be recognized'
    if type == 'restaurant':
        checkpoint = torch.load(project_root+'pretrained_weights/cnnAE_restaurant')

    elif type == 'canteen':
        checkpoint = torch.load(project_root+'pretrained_weights/cnnAE_canteen')


    model = CNNAutoencoder(seq_len=168, n_features=1, embedding_dim=hidden_size)
    model.load_state_dict(checkpoint['net_state_dict'])
    model = model.to(device)

    criterion = nn.CosineEmbeddingLoss(reduction='mean').to(device)

    seq = np.array(seq)
    if np.isnan(seq).any():
        print('warnning: sequence with null values, fill with mean')
        seq[np.isnan(seq)]=np.nanmean(seq)

    # normalization    
    seq_max = max(seq)
    if seq_max!=0:
        seq = seq/seq_max
        
        
    seq = seq.reshape(1,-1,1)
    seq = torch.tensor(seq).float().cuda()
        
        
    with torch.no_grad():
        model = model.eval()

        seq_embedding, seq_pred = model(seq)
        
        y = torch.ones([1]).to(device)      
        anormaly_score = criterion(seq_pred, seq, y).item()
        
        seq_pred = seq_pred.cpu().numpy().reshape(1,-1)
        seq_embedding = seq_embedding.cpu().numpy().reshape(1,-1)


    return seq_embedding, seq_pred, anormaly_score

In [None]:
users_anormaly_score={}
test_index_id = {}
index=0

for f_name in tqdm(week_data_list):
    id_ = os.path.splitext(f_name)[0]
    user_data = np.loadtxt(os.path.join(week_data_root, f_name))
    __,__, users_anormaly_score[id_] =anormaly_detect_addentropy(user_data,'restaurant')
    
    test_index_id[index]=id_
    index+=1

In [None]:
topk=50

In [None]:
anormaly_score_np = np.array(list(users_anormaly_score.values()))
anormalies_index = np.argsort(anormaly_score_np)[-topk:]


In [None]:
pattern_anormalies = []
for index in anormalies_index:
    pattern_anormalies.append(test_index_id[index])

In [None]:
find_right = [val for val in pattern_anormalies if val in abnorm_wait_detect]


In [None]:
week1_path = data_root+'oneweek/'
plt.rcParams['figure.figsize']=(24.0,6.0)
for id_ in find_right:

    week_data = pd.read_csv(week_data_root+str(id_)+'.csv').iloc[0,:].values
    __,pred_week_data, anormaly_score= anormaly_detect_addentropy(week_data,'restaurant')

    plt.plot(week_data)
    plt.show()

    plt.plot(pred_week_data[0],'r')
    plt.show()