In [6]:
import numpy as np

In [6]:
class FOD:
    def __init__(self, n_class: int):
        self.k = n_class # 划分的类别数
    
    def mean(self, X: np.ndarray, low: int, high: int):
        '''
        计算区间均值
        X: n_sample × n_feature 的二维数据
        low: 区间左端点
        high: 区间右端点
        '''
        return X[low-1: high].mean(axis=0)
    
    def varition(self, X: np.ndarray, low: int, high: int):
        '''
        计算区间的变差
        X: n_sample × n_feature 的二维数据
        low: 区间左端点
        high: 区间右端点
        '''
        data = X[low-1: high]
        devia = data - self.mean(X, low, high)
        vari = (devia ** 2).sum()
        return vari
    
    def fit_predict(self, X: np.ndarray):
        table = self.split_k(X) # 推导过程表
        split_points = table[self.k, X.shape[0]][1]
        tmp = split_points # 分割点
        split_points = np.insert(split_points, 0, 1)
        split_points = np.append(split_points, X.shape[0]+1)
        split_res = [[split_points[i], split_points[i+1] - 1] for i in range(len(split_points)-1)] # 分割结果
        return split_res, tmp, table
    
    def split_k(self, X: np.ndarray):
        '''
        将数据分为k类
        X: n_sample × n_feature 的二维数据
        '''
        n = X.shape[0]
        table = np.zeros((self.k+1, n+1), dtype='object')
        for j in range(2, n+1):
            table[2, j] = self.split_2(X, j)
        for i in range(3, self.k+1):
            for j in range(i, n+1):
                cache = {}
                for m in range(i-1, j):
                    loss = table[i-1, m][0] + self.varition(X, m+1, j)
                    cache[loss] = m
                min_loss = min(cache.keys())
                min_cutp = cache[min_loss]
                table[i, j] = (min_loss, np.append(table[i-1, min_cutp][1], min_cutp+1))
        return table
    
    def split_2(self, X: np.ndarray, endp: int):
        '''
        将数据分为2类
        X: n_sample × n_feature 的二维数据
        '''
        loss2cutp = {}
        for i in range(2, endp+1):
            loss2cutp[self.varition(X, 1, i-1) + self.varition(X, i, endp)] = i
        min_loss = min(loss2cutp.keys())
        min_cutp = loss2cutp[min_loss]
        res = (min_loss, np.array([min_cutp]))
        return res

In [7]:
# 测试
data = np.array([1] * 10 + [2] * 20 + [3] * 30 + [4] * 40).reshape(-1, 1)
fod = FOD(4)
res, _, _ = fod.fit_predict(data)
res

[[1, 10], [11, 30], [31, 60], [61, 100]]