In [21]:
import numpy as np
import pandas as pd
import joblib
import pickle
from scipy import linalg # covaraince sclaer

In [22]:
def bootstrap_limit(stat, alpha=0.05, bootstrap=100, upper = True):
    '''
        @Description
            Bootstrap sampling을 활용한 Control Limit 산출 기법

        @Parameter
            stat : 통계량 (정상상태의 데이터 입력)
            alpha : Control Limit을 정하기 위한 유의수준 (0~1)
            bootstrap : 샘플링 횟수
        @Return
            limit : 임계값 (CL : Control Limit)
    '''
    
    alpha = alpha * 100
    if(upper) : alpha = 100 - alpha
    samsize = max(100, len(stat))
    
    stat = stat.reshape(len(stat)) # 2차원 array를 1차원 array로 변환
    
    # bootstrap 수 만큼 다음 작업을 반복 : samsize(최소 10000번)만큼 정상상태 데이터를 유의수준 만큼 복원 추출 후 평균 값 사용 
    limit = np.mean(list(map(lambda x:np.percentile(np.random.choice(stat,samsize,replace=True),alpha), range(0,bootstrap))))
    
    return limit

In [23]:
def L2norm(stat):
    return(np.sqrt(stat**2))

def matrix_inv(matrix):
    return linalg.pinv(matrix,cond=1.490116e-08)

class covariance_scaler() :
    
    def __init__(self) :
        
        self.cov_inv_matrix = None
            
    def fit(self, trdat):
        
        cov_mat = np.cov(trdat.transpose())
        self.cov_inv_matrix = matrix_inv(cov_mat)
        
    def transform(self, tsdat) :
        if isinstance(tsdat, list) :
            tsdat = np.array(tsdat).transpose()
            
        scaled_residual = np.dot(np.array(tsdat), self.cov_inv_matrix)
        return scaled_residual
    

In [24]:
def covariance_scaler(trdat, resi) :
    
    if isinstance(resi, list) :
        resi = np.array(resi).transpose()
        
    cov_matrix = np.cov(trdat.transpose()) # trdat의 공분산행렬
    cov_inv_matrix = matrix_inv(cov_matrix) # 공분산 행렬의 역행렬

    scaled_residual = np.dot(np.array(resi), cov_inv_matrix) # 공분산 스케일된 결과
    
    return scaled_residual

In [25]:
# MSET Linear Regression
def mset_regress(trdat, tsdat, alpha=0.05):
    '''
        @Description
            MSET Linear Regression

        @Parameter
            trdat : 학습데이터
            tsdat : 평가데이터
            alpha : control limit의 유의수준

        @Return
            trScore : MSET Linear Regression의 Train 잔차 (이상감지 통계량)
            tsScore : MSET Linear Regression의 Test 잔차 (이상감지 통계량)
            varTrScore : 변수 별 이상감지 통계량
            varTsScore : 변수 별 이상감지 통계량
            UCL : control limit
            LCL : control limit
            varUCL : 변수 별 control limit
            varLCL : 변수 별 control limit
    '''

    train_intercept = np.ones((trdat.shape[0], 1))
    test_intercept = np.ones((tsdat.shape[0], 1))

    # 초기값 선언
    y_hat_tr = np.zeros((trdat.shape[0], trdat.shape[1]))
    y_hat_ts = np.zeros((tsdat.shape[0], tsdat.shape[1]))

    # Control limit
    UCL = []
    LCL = []
    varUCL = []
    varLCL = []

    # train data, test data 학습 학습
    for i in range(len(trdat.columns)):
        # MSET 기반으로 학습시키기 위한 학습데이터 변환
        trainX = np.concatenate((train_intercept, np.delete(trdat.values, i, axis=1)), axis=1)
        trainY = trdat.values[:, i]
        testX = np.concatenate((test_intercept, np.delete(tsdat.values, i, axis=1)), axis=1)

        y_hat_tr[:, i] = trainX @ np.linalg.pinv(trainX.transpose() @ trainX) @ trainX.transpose() @ trainY
        y_hat_ts[:, i] = testX @ np.linalg.pinv(trainX.transpose() @ trainX) @ trainX.transpose() @ trainY

        # control limit (each variable)
        varUCL.append(bootstrap_limit(trainY-y_hat_tr[:, i], alpha=alpha / 2))
        varLCL.append(bootstrap_limit(trainY-y_hat_tr[:, i], alpha=alpha / 2, upper=False))

    varTrScore = trdat.values - y_hat_tr
    varTsScore = tsdat.values - y_hat_ts

    # covariance scaled with anomaly sscore
    trScore = L2norm(covariance_scaler(trdat, varTrScore)).sum(axis=1)
    tsScore = L2norm(covariance_scaler(trdat, varTsScore)).sum(axis=1)

    # control limit (UCL=LCL)
    UCL = bootstrap_limit(trScore, alpha=alpha)
    LCL = UCL


    return {"Trscore": trScore, "Tsscore": tsScore, "Vartrscore": varTrScore, "Vartsscore": varTsScore, "Ucl": str(UCL),
            "Lcl": str(LCL), "Varucl": varUCL, "Varlcl": varLCL}

In [26]:
df = pd.read_csv('test_data.csv', encoding='euc-kr')
    
trdat = df.iloc[0:600,:]
tsdat = df.iloc[600:610,:]

d = mset_regress(trdat, tsdat)
d['tsScore']

{'Trscore': array([5.80704732e+02, 4.64891428e+02, 9.43723784e+02, 6.83912169e+02,
        3.28603742e+02, 5.67857124e+02, 5.26797928e+02, 1.63538114e+02,
        9.28473465e+02, 5.75304274e+02, 1.14931865e+03, 3.83019862e+02,
        3.59581613e+02, 9.02709786e+02, 5.14646319e+02, 4.09179002e+02,
        3.20373813e+02, 3.80213546e+02, 1.76136933e+03, 2.66623724e+02,
        2.76308994e+02, 1.05254793e+03, 4.36181217e+02, 1.03308816e+03,
        2.58344738e+02, 6.76446021e+02, 3.90513930e+02, 2.57372625e+02,
        2.69836026e+02, 3.20947917e+02, 4.87079937e+02, 5.93853268e+02,
        2.41608637e+02, 3.21347991e+02, 3.75780146e+02, 5.68374407e+02,
        5.01188408e+02, 7.66863747e+02, 1.10201007e+03, 4.25254851e+02,
        4.24720588e+02, 3.81932813e+02, 2.41700627e+02, 2.26771667e+02,
        4.43715380e+02, 1.06048749e+03, 3.82843917e+02, 2.86984053e+03,
        5.91856816e+02, 5.75964867e+02, 2.48012134e+02, 4.22829534e+02,
        5.76016090e+02, 4.66797640e+02, 5.79456982e+0

# MSET Linear Regression 양측검정
def mset_regress(trdat, tsdat, alpha=0.05):
    '''
        @Description
            MSET Linear Regression

        @Parameter
            trdat : 학습데이터
            tsdat : 평가데이터
            alpha : control limit의 유의수준

        @Return
            trScore : MSET Linear Regression의 Train 잔차 (이상감지 통계량)
            tsScore : MSET Linear Regression의 Test 잔차 (이상감지 통계량)
            CL : control limit (교수님과 회의 후 설정 예정)
    '''   
    
    train_intercept = np.ones((trdat.shape[0],1))
    test_intercept = np.ones((tsdat.shape[0],1))

    # 초기값 선언
    y_hat_tr = np.zeros((trdat.shape[0], trdat.shape[1]))
    y_hat_ts = np.zeros((tsdat.shape[0], tsdat.shape[1]))
         
    # control limit 초기값
    ucl = []
    lcl = []
    
    # train data, test data 학습 학습
    for i in range(len(trdat.columns)):
        
        # MSET 기반으로 학습시키기 위한 학습데이터 변환
        trainX = np.concatenate((train_intercept, np.delete(trdat.values,i,axis=1)), axis=1)
        trainY = trdat.values[:,i]
        testX = np.concatenate((test_intercept, np.delete(tsdat.values,i,axis=1)), axis=1)
        
        y_hat_tr[:,i] = trainX @ np.linalg.pinv(trainX.transpose() @ trainX) @ trainX.transpose() @ trainY
        y_hat_ts[:,i] = testX @ np.linalg.pinv(trainX.transpose() @ trainX) @ trainX.transpose() @ trainY
        
        res = trdat.values[:,i]-y_hat_tr[:,i]
        ucl.append(bootstrap_limit(res, alpha=alpha/2))
        lcl.append(bootstrap_limit(res, alpha=alpha/2, upper=False))
        
    residual_tr =  trdat.values - y_hat_tr
    residual_ts =  tsdat.values - y_hat_ts
                                                                                                   
    return {"trScore" : residual_tr, "tsScore" : residual_ts, "ucl" : ucl, "lcl" : lcl}

import numpy as np
from sklearn.ensemble import RandomForestRegressor # randomforest library
from adFunction import *
import pandas as pd
import matplotlib.pyplot as plt
import XNDatabaseLib as DbLib
from XNCmmLib import XNCmmUnit as Cmm
from sklearn import *
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import json

class MSET:
    def regressionexcute(trdataID, tsdataID, cols, alpha):
        # TrdataID를 dataframe 변수
        txt = DbLib.DatabaseUnit.GetDataSet(trdataID)
        df = pd.read_csv(Cmm.StrToStringIO(txt), sep=',', encoding='utf-8-sig')
        trdf = Cmm.DfToCustomDf(df, Cmm.StrToList(cols, ','))

        # TsdataID를 dataframe 변수
        txt = DbLib.DatabaseUnit.GetDataSet(tsdataID)
        df = pd.read_csv(Cmm.StrToStringIO(txt), sep=',', encoding='utf-8-sig')
        tsdf = Cmm.DfToCustomDf(df, Cmm.StrToList(cols, ','))

        a = MSET.mset_regress(trdf,tsdf,alpha)

        jstr = Cmm.ClassToJson(a)

        return jstr

    # MSET Linear Regression
    def mset_regress(trdat, tsdat, alpha=0.05):
        '''
            @Description
                MSET Linear Regression

            @Parameter
                trdat : 학습데이터
                tsdat : 평가데이터
                alpha : control limit의 유의수준

            @Return
                trScore : MSET Linear Regression의 Train 잔차 (이상감지 통계량)
                tsScore : MSET Linear Regression의 Test 잔차 (이상감지 통계량)
                varTrScore : 변수 별 이상감지 통계량
                varTsScore : 변수 별 이상감지 통계량
                UCL : control limit
                LCL : control limit
                varUCL : 변수 별 control limit
                varLCL : 변수 별 control limit
        '''

        train_intercept = np.ones((trdat.shape[0], 1))
        test_intercept = np.ones((tsdat.shape[0], 1))

        # 초기값 선언
        y_hat_tr = np.zeros((trdat.shape[0], trdat.shape[1]))
        y_hat_ts = np.zeros((tsdat.shape[0], tsdat.shape[1]))

        # Control limit
        UCL = []
        LCL = []
        varUCL = []
        varLCL = []

        # train data, test data 학습 학습
        for i in range(len(trdat.columns)):
            # MSET 기반으로 학습시키기 위한 학습데이터 변환
            trainX = np.concatenate((train_intercept, np.delete(trdat.values, i, axis=1)), axis=1)
            trainY = trdat.values[:, i]
            testX = np.concatenate((test_intercept, np.delete(tsdat.values, i, axis=1)), axis=1)

            y_hat_tr[:, i] = trainX @ np.linalg.pinv(trainX.transpose() @ trainX) @ trainX.transpose() @ trainY
            y_hat_ts[:, i] = testX @ np.linalg.pinv(trainX.transpose() @ trainX) @ trainX.transpose() @ trainY

            # control limit (each variable)
            varUCL.append(bootstrap_limit(trainY-y_hat_tr[:, i], alpha=alpha / 2))
            varLCL.append(bootstrap_limit(trainY-y_hat_tr[:, i], alpha=alpha / 2, upper=False))

        varTrScore = trdat.values - y_hat_tr
        varTsScore = tsdat.values - y_hat_ts

        # covariance scaled with anomaly sscore
        trScore = L2norm(covariance_scaler(trdat, varTrScore)).sum(axis=1)
        tsScore = L2norm(covariance_scaler(trdat, varTsScore)).sum(axis=1)

        # control limit (UCL=LCL)
        UCL = bootstrap_limit(trScore, alpha=alpha)
        LCL = UCL

        lists = trScore.tolist()
        trScoreJson = Cmm.NumlistToOneValueArraystring(lists)
        lists = tsScore.tolist()
        tsScoreJson = Cmm.NumlistToOneValueArraystring(lists)
        lists = varTrScore.tolist()
        varTrScoreJson = Cmm.NumlistToArraystring(lists)
        lists = varTsScore.tolist()
        varTsScoreJson = Cmm.NumlistToArraystring(lists)

        return {"Trscore": trScoreJson, "Tsscore": tsScoreJson, "Vartrscore": varTrScoreJson, "Vartsscore": varTsScoreJson, "Ucl": str(UCL),
                "Lcl": str(LCL), "Varucl": varUCL, "Varlcl": varLCL}

a = MSET.regressionexcute("20220707-0003-CS","20220707-0003-CT","사출최대압력,보압절환압력",0.05)
print(a)


# print(type(np.transpose(resi)))
# print(type(ucl))
# print(type(lcl)
# ScoreJson = json.dumps(np.transpose(resi))
# uclJson = json.dumps(ucl)
# lclJson = json.dumps(lcl)

# # 예제
# import pandas as pd
# import numpy as np
# from test import *
# df = pd.read_csv("C:\\Users\\Administrator\\Desktop\\test_data.csv", encoding='euc-kr')
#
# # MSET Linear Regression
# trdat = df[0:500]
# tsdat = df[500:1000]
#
# model = mset_regress(trdat, tsdat, alpha=0.005)
# plt.figure(figsize=(12,4))
# plt.plot(model['varTsScore'][:,2], color='blue')
# plt.axhline(y=model['varUCL'][2], color='red')
# plt.axhline(y=model['varLCL'][2], color='red')
# plt.show()
#
# msetreg = mset_regress(trdat, tsdat, alpha=0.05)
# msetrf = mset_randomforest(trdat, tsdat, alpha=0.01, ntree=100)
#
# print(msetreg['tsScore'])
# print(msetrf['tsScore'])