In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json

from statsmodels import api as sm
from scipy.stats import pearsonr
import scipy.signal as signal

In [None]:
def isPeriodic(tempNorm):
    periodFlag = 0
    try:
        acf = sm.tsa.acf(tempNorm, nlags=len(tempNorm)) # 计算自相关系数

        peak_ind = signal.argrelextrema(acf, np.greater)[0] # 寻找局部峰值
        fwbest = acf[signal.argrelextrema(acf, np.greater)]

        index = -1
        ran = 0
        fwbestlen = len(fwbest)
        if fwbestlen == 0:
            periodFlag = 0
            return periodFlag
        
        for i in range(ran, fwbestlen):
            if fwbest[i] > 0:
                j = i
                while fwbest[j] > 0:
                    j += 1
                    if j > fwbestlen - 1:
                        periodFlag = 1
                        return periodFlag
                index = (i + j - 1) // 2
                break

        fd = peak_ind[index] # 频率
        numlist = []
        Q = len(tempNorm) // fd # 周期
        if Q == 1:
            periodFlag = 0
            return periodFlag
        else:
            for i in range(Q): # 分段
                numlist.append(tempNorm[i * fd: (i + 1) * fd])

            listlen = len(numlist) # 段数
            flag = 0
            for i in range(1, listlen):
                a = pearsonr(numlist[i-1], numlist[i])[0] # 相邻两段的皮尔森系数
                if a < 0.85:
                    flag += 1 # 小于阈值的数量

            if flag <= listlen // 3: # 小于阈值的低于总段数的1/3
                periodFlag = 1
                return periodFlag
            else:
                periodFlag = 0
                return periodFlag

    except Exception as e:
        print(f"处理数据时出现错误：{str(e)}")
        return periodFlag

In [None]:
# 曲线平滑处理
def Smooth(ts):
    dif = ts.diff().dropna() # 差分序列，1-69119行
    td = dif.describe() # 描述性统计得到：min，25%，50%，75%，max值
    
    high = td['75%'] + 1.5 * (td['75%'] - td['25%']) # 定义高点阈值，1.5倍四分位距之外
    low = td['25%'] - 1.5 * (td['75%'] - td['25%']) # 定义低点阈值，同上
    
    # 变化幅度超过阈值的点的索引
    forbid_index = dif[(dif > high) | (dif < low)].index 
    i = 0
    while i < len(forbid_index) - 1:
        n = 1 # 发现连续多少个点变化幅度过大，大部分只有单个点
        start = forbid_index[i] # 异常点的起始索引
        while (i+n)<len(forbid_index) and forbid_index[i+n] == start + n:
            n += 1
        i += n - 1
    
        end = forbid_index[i] # 异常点的结束索引
        # 用前后值的中间值均匀填充
        value = np.linspace(ts[start-1], ts[end+1], n) # 创建等差数列
        ts[start:end+1] = value
        i += 1
        
    return ts

In [None]:
def process_container_utilization(containerUtilDict, batch_size=100):
    nonperiodic_cpu = pd.DataFrame(columns=['id'])
    periodic_cpu = pd.DataFrame(columns=['id'])
    nonperiodic_mem = pd.DataFrame(columns=['id'])
    periodic_mem = pd.DataFrame(columns=['id'])

    # 分批处理数据
    for batch_start in range(0, len(containerUtilDict), batch_size):
        batch_end = min(batch_start + batch_size, len(containerUtilDict))
        batch_dict = {k: containerUtilDict[k] for k in list(containerUtilDict.keys())[batch_start:batch_end]}
        
        for cid, utilList in batch_dict.items():
            try:
                cpu_list = [util[0] for util in utilList]
                mem_list = [util[1] for util in utilList]
                
                #平滑处理
                cpu_smoothed = Smooth(pd.Series(cpu_list))
                mem_smoothed = Smooth(pd.Series(mem_list))
                
                # 判断周期性
                flag_cpu = isPeriodic(cpu_smoothed.values.tolist())
                if flag_cpu == 0:
                    nonperiodic_cpu.loc[len(nonperiodic_cpu)] = [cid]
                elif flag_cpu == 1:
                    periodic_cpu.loc[len(periodic_cpu)] = [cid]
                    
                flag_mem = isPeriodic(mem_smoothed.values.tolist())
                if flag_mem == 0:
                    nonperiodic_mem.loc[len(nonperiodic_mem)] = [cid]
                elif flag_mem == 1:
                    periodic_mem.loc[len(periodic_mem)] = [cid]
            
            except Exception as e:
                print(f"处理容器 {cid} 时出错：{str(e)}")

    return nonperiodic_cpu, periodic_cpu, nonperiodic_mem, periodic_mem

In [None]:
if __name__ == '__main__':
    try:
        with open('../containerUtilDict.txt', 'r') as file:
            containerUiltDict = json.load(file)
        print('加载容器资源消耗序列字典成功')
        
        # 处理容器资源利用率数据
        nonperiodic_cpu, periodic_cpu, nonperiodic_mem, periodic_mem = process_container_utilization(containerUiltDict)
        
        # 打印结果或做进一步处理
        print("非周期性 CPU 容器:", nonperiodic_cpu)
        print("周期性 CPU 容器:", periodic_cpu)
        print("非周期性 内存 容器:", nonperiodic_mem)
        print("周期性 内存 容器:", periodic_mem)
        
    except FileNotFoundError:
        print("文件 containerUtilDict.txt 未找到。")
    except json.JSONDecodeError:
        print("解析 JSON 数据时出错。")
    except Exception as e:
        print(f"发生未知错误：{str(e)}")