In [116]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
import pandas as pd
import numpy as np
import joblib

In [117]:
def bootstrap_limit(stat, alpha=0.05, bootstrap=100, upper = True):
    '''
        @Description
            Bootstrap sampling을 활용한 Control Limit 산출 기법

        @Parameter
            stat : 통계량 (정상상태의 데이터 입력)
            alpha : Control Limit을 정하기 위한 유의수준 (0~1)
            bootstrap : 샘플링 횟수
        @Return
            limit : 임계값 (CL : Control Limit)
    '''
    
    alpha = alpha * 100
    if(upper) : alpha = 100 - alpha
    samsize = max(100, len(stat))
    
    stat = stat.reshape(len(stat)) # 2차원 array를 1차원 array로 변환
    
    # bootstrap 수 만큼 다음 작업을 반복 : samsize(최소 10000번)만큼 정상상태 데이터를 유의수준 만큼 복원 추출 후 평균 값 사용 
    limit = np.mean(list(map(lambda x:np.percentile(np.random.choice(stat,samsize,replace=True),alpha), range(0,bootstrap))))
    
    return limit

In [118]:
# One Class SVM
class OCSVM():
    
    def __init__(self):
        self.trScore = None
        self.tsScore = None
        self.CL = None
        self.scaler = StandardScaler()
        self.model = None
        
    def fit(self, trdat, nu, kernel, alpha=0.05):
        
        # One class SVM
        self.model = OneClassSVM(nu=nu, kernel=kernel, gamma='auto').fit(trdat)
        
        # Train Score
        self.trScore = self.model.score_samples(trdat)
        
        # Control Limit
        self.CL = bootstrap_limit(self.trScore, alpha=alpha)
         
        return {'trScore':self.trScore}
    
    def predict(self, tsdat):        
        # test Score
        self.tsScore = self.model.score_samples(tsdat)
        
        return {'tsScore':self.tsScore}

def ocSVM(trdat, tsdat, kernel='linear', alpha=0.05):
    # 데이터가 unit norm vector로 normalize 될 경우 SVDD = 1-SVM
    
    model = OCSVM()
    model.scaler.fit(trdat)
    
    #  nu : 에러 허용 정도 ex. nu=0.05 100개 중 5개를 이상치라고 가정하고 hyplane을 나누겠다는 의미
    fit = model.fit(trdat, nu=alpha, kernel=kernel, alpha=0.05)
    CL = model.CL
        
    pred = model.predict(tsdat)
    
    # model pickle 저장
    saved_model = joblib.dump(model, 'SVDD.pkl')
    
    return {'trScore':np.round(fit['trScore'],3), 'tsScore':np.round(pred['tsScore'],3), 'CL':np.round(CL,3)}

def ocSVM_loader(wd, tsdat):
    wd = 'ocSVM.pkl'
    model = joblib.load(wd)

    pred = model.predict(tsdat)
    
    return {'trScore':np.round(model.trScore,3), 'tsScore': np.round(model.tsScore,3), 'CL':np.round(model.CL,3)}

In [119]:
from sklearn.datasets import load_iris

data = load_iris(as_frame=True)
df = data['data']

trdat = df.iloc[0:50,:]
tsdat = df.iloc[50:150,:]

In [121]:
svdd = ocSVM(trdat, tsdat, kernel='linear', alpha=0.05)
score = np.concatenate([svdd['trScore'], svdd['tsScore']])
plt.figure(figsize=(8,4)) 
plt.plot(score, color='blue')
plt.axhline(y=svdd['CL'], color='red')
plt.show()