In [1]:
import os
import sys
import time
import threading
import pandas as pd

from ts_detector.common.tsd_errorcode import *
from ts_detector.common.tsd_common import *

from ts_detector.algorithm import xgboosting
from ts_detector.feature import feature_service
from ts_detector import detect

## 样本处理

In [2]:
def sample_process_test(df):
    sample_list = []
    #df.iterrows() 
    for i in range(len(df)):
        sample_list.append({
            "data": df['dataC'][i] + ',' + df['dataB'][i]  + ',' + df['dataA'][i] ,  #期望格式： c,b,a,flag
            "flag": 1 if df['正样本/负样本'][i]  == 'positive' else 0
        })
    return sample_list


In [3]:
pdata = pd.read_csv(os.path.join(os.getcwd(), 'data/data_cba_train.csv'))
samples = sample_process_test(pdata)
samples

[{'data': '59,63,62,56,76,67,65,55,69,61,60,56,68,65,56,50,62,55,69,65,55,54,52,60,53,52,62,58,62,67,55,56,59,65,54,50,59,56,58,58,50,68,50,72,59,56,67,62,55,51,64,53,53,59,58,66,60,76,56,59,64,59,75,74,65,52,71,55,50,78,55,58,50,68,71,69,67,50,68,54,50,51,58,69,54,53,53,52,68,65,68,55,68,53,71,63,69,61,66,79,67,55,51,70,57,51,68,68,52,66,52,63,53,55,53,51,55,59,51,64,63,63,52,77,64,59,77,52,75,63,57,60,55,64,50,59,60,54,69,68,58,64,62,54,50,69,61,64,52,64,67,50,66,67,74,54,61,52,64,56,61,66,67,76,67,69,64,64,52,62,62,56,68,54,56,69,50,66,52,60,67,63,68,58,77,60,59,65,61,56,66,55,64,68,62,63,69,66,58,57,73,66,69,65,50,54,61,61,65,61,50,58,65,51,56,54,50,75,60,67,54,53,65,57,69,79,65,50,63,67,62,69,76,77,63,63,65,79,56,58,51,62,59,59,69,64,66,54,56,62,54,51,62,52,56,69,60,51,55,64,58,69,51,63,51,62,72,66,59,62,66,62,51,61,52,69,64,65,69,57,67,52,73,76,57,53,57,51,50,50,50,77,55,64,64,69,52,52,63,59,63,54,58,53,64,67,66,54,53,67,56,58,74,53,60,52,64,51,78,57,63,61,52,56,66,58,73,63,71,66

## 生成XGB模型

In [4]:
def generate_model(data, task_id):
    """
    Start train a model

    :param data: Training dataset.
    :param task_id: The id of the training task.
    """
    xgb_obj = xgboosting.XGBoosting()
    # pylint: disable=unused-variable
    ret_code, ret_data = xgb_obj.xgb_train(data, task_id)
    current_timestamp = int(time.time())
    
    if ret_code == 0:
        train_status = "complete"
        params = {
            "task_id": task_id,
            "end_time": current_timestamp,
            "status": train_status,
            "model_name": task_id + "_model"
        }   
    else:
        train_status = "failed"
        params = {
            "task_id": task_id,
            "end_time": current_timestamp,
            "status": train_status,
            "ret_data": ret_data

        }
    print(params)

## 训练模型

In [5]:
def process_train(samples):
    """
    Start a process to train model
    :param data: Training dataset.
    """

    samples_list = []
    positive_count = 0
    negative_count = 0
    for index in samples:
        samples_list.append({"flag": index["flag"], "data": list(map(int, index["data"].split(',')))})  # Python3与2不同,3的map函数返回map对象，需要通list来转换
        
        if index["flag"] == 1:
            positive_count = positive_count + 1
        else:
            negative_count = negative_count + 1
    task_id = str(int(round(time.time() * 1000)))

    if positive_count == 0 or negative_count == 0:
        return {"code": TSD_LACK_SAMPLE, "msg": ERR_CODE[TSD_LACK_SAMPLE], "data": ""} #缺少正/负样本
    
    # 使用训练数据生成模型
    try:
        print('TRAINING START...')
        print('==================================')
        generate_model(samples_list, task_id)
        # t = threading.Thread(target=generate_model, args=(samples_list, task_id, ))
        # t.setDaemon(False) # 子线程独立执行训练任务
        # t.start()
    except Exception as e:
        print(e)
    return {"code": TSD_OP_SUCCESS, "msg": ERR_CODE[TSD_OP_SUCCESS], "data": ""}


In [7]:
process_train(samples)

TRAINING START...
xgb training ...
{'task_id': '1694759288368', 'end_time': 1694759288, 'status': 'complete', 'model_name': '1694759288368_model'}


{'code': 0, 'msg': '操作成功', 'data': ''}

## 预测