In [106]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from skforecast.ForecasterAutoreg import ForecasterAutoreg

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries

In [107]:
data = pd.read_csv('EOP.csv',encoding='utf-8').iloc[:, :4] 
data.columns = ['零件号','日期','需求数量','仓库']
data.head()

Unnamed: 0,零件号,日期,需求数量,仓库
0,KITPAB100,20221130,1,3002
1,KITPAB100,20211208,2,3002
2,KITPAB100,20220906,2,3002
3,KITPAB100,20211111,2,3002
4,KITPAB100,20230613,1,3002


In [108]:
data = data[~data['仓库'].isin(['8013','8021','8025','8031'])]
# data =data[data['零件号']!='6RD959801E']
data['日期'] = pd.to_datetime(data['日期'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
new_df = data.groupby(['零件号','日期','仓库'])['需求数量'].sum().reset_index()
new_df['日期'] = pd.to_datetime(new_df['日期'])

In [109]:
# new_df.loc[(new_df['零件号'] == '6RD959801E') & (new_df['日期'] == '2023/3/27')& 
#            (new_df['仓库'] == '1000-1'), '需求数量'] = 6

In [110]:
data_des = new_df.copy()
data_des["year"] = pd.to_datetime(data_des['日期']).dt.year.astype(int)
data_des["month"] = pd.to_datetime(data_des['日期']).dt.month.astype(int)
real_sum_counts = data_des.groupby(['零件号','仓库','year','month']).sum().reset_index()
real_sum_counts.sort_values(by=['零件号','仓库','year','month']).head(2)

Unnamed: 0,零件号,仓库,year,month,需求数量
0,02E300066L 00V,1000-1,2017,12,4
1,02E300066L 00V,1000-1,2018,1,8


In [111]:
# real_sum_counts[(real_sum_counts['零件号']=='3CC945208A')&(real_sum_counts['仓库']=='1000-1')].tail(10)

In [112]:
# 生成日期范围
date_range = pd.date_range(start='2022-01-01', end='2023-10-31', freq='D')
# 创建空的DataFrame，准备存储填充后的结果
filled_df = pd.DataFrame()

# 针对每个 SKU 进行填充操作
for sku, group in new_df.groupby(['零件号','仓库']):
    sku_group = group.set_index('日期').reindex(date_range, fill_value=0).reset_index()
    sku_group['零件号'] = sku[0]
    sku_group['仓库'] = sku[1]
    filled_df = filled_df.append(sku_group, ignore_index=True)

  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.

In [113]:
import calendar  
import datetime  
  

    
def feature_processing(data,end_time):
    data['合并列'] = data['零件号'] + '_' + data['仓库']
    # 去除多余的列名
    data = data.drop(columns=['零件号', '仓库'])
    data = data.set_index(['index', '合并列'])['需求数量'].unstack()
    data.columns.name = None
    data = data.reset_index()
    new_df = data.copy().rename(columns={'index': 'date'})
    new_df['date'] = pd.to_datetime(new_df['date'], format='%Y-%m-%d')
    new_df = new_df.set_index('date')
    new_df = new_df.asfreq('D')
    new_df = new_df.sort_index()
    data_train = new_df[new_df.index <= end_time].copy()
    return data_train


def demods_groby_month(data):
    """天级别聚合month"""
    data_prs = data.reset_index()
    data_prs = data_prs.rename(columns={'index': 'date'})
    data_prs["year"] = pd.to_datetime(data_prs['date']).dt.year.astype(int)
    data_prs["month"] = pd.to_datetime(data_prs['date']).dt.month.astype(int)
    data_prs_info = data_prs.groupby(['year', 'month']).sum().reset_index()
    data_prs_info = data_prs_info.set_index(['year', 'month']).stack()
    data_prs_info = data_prs_info.rename_axis(index=['year', 'month', '零件号'])
    data_prs_info = data_prs_info.reset_index()
    data_prs_info[['零件号', '仓库代码']] = data_prs_info['零件号'].str.split('_', expand=True).reset_index(drop=True)
    data_prs_info.columns =['year','month','零件号','pred_values','仓库']
    return data_prs_info


def find_outliers_3sigma(data):  
    
    mean = sum(data) / len(data)  
    std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5  
    if mean<=150:
        thr_sig_num_high  = mean + 3 * std_dev
    else:
        thr_sig_num_high  = mean
        
    thr_sig_num_low  = mean - 1 * std_dev
#     q3 = lambda x: x.quantile(0.75)  
#     q1 = lambda x: x.quantile(0.25)  
#     thr_sig_num_high = q3(x) + 1.5 * (q3(x) - q1(x)) 
#     thr_sig_num_low = q1(x) - 1.5 * (q3(x) - q1(x)) 

    return thr_sig_num_high,thr_sig_num_low



# 生成一个日期对象，表示2023年1月1日  
start_date = datetime.datetime(2023, 1, 1)  
  
# 初始化一个空列表来保存每个月的最后一天  
month_end_dates = []  
  
# 使用 calendar.monthrange() 函数获取每个月的天数  
# 然后加1天，因为我们想要的是当月的最后一天  
for month in range(2, 8):  # 这里 13 是因为一年只有12个月  
    _, num_days = calendar.monthrange(2023, month)  # 获取2023年的日历  
    end_date = start_date.replace(month=month, day=num_days)  # 创建日期对象  
    month_end_dates.append(end_date)  # 添加到列表中  
pred_full_pp = pd.DataFrame()

for date in month_end_dates:  
    end_train = date.strftime("%Y-%m-%d")
    data_train = feature_processing(filled_df,date)
    sku_list = data_train.columns
    # 创建空的字典，用于存储每个时间序列的预测模型
    forecasters = {}
    # 循环遍历每批次SKU，分别训练预测模型
    for i in range(0, len(sku_list), 200):
        batch_skus = sku_list[i:i + 200]
        for sku in batch_skus:
            forecaster = ForecasterAutoreg(  
            regressor=Ridge(random_state=123),  
            lags=20,  
                    )
            # 拟合模型
            forecaster.fit(y=data_train[sku])
            forecasters[sku] = forecaster
        # print(f"Finished training batch {i // self.batch_size + 1}/{len(sku_list) // self.batch_size + 1}")
    # 进行未来预测
    predictions = pd.DataFrame()
    for sku, forecaster in forecasters.items():
        forecast = forecaster.predict(steps=30)
        predictions[sku] = forecast
    predictions[predictions < 0.1] = 0


    his_info = demods_groby_month(data_train)
    thr_std = his_info.groupby(['零件号','仓库'])['pred_values'].apply(find_outliers_3sigma).reset_index()
    thr_std['pred_values_high']= [x[0] for x in thr_std['pred_values']]
    thr_std['pred_values_lower']= [x[1] for x in thr_std['pred_values']]
    for i in ['pred_values_high','pred_values_lower']:
        thr_std[i] =[0 if x<0 else x for x in thr_std[i]]
    thr_std = thr_std.drop(['pred_values'],axis =1 )
    
    

    pred_info = demods_groby_month(predictions)
    pred_info['pred_values'] = pred_info['pred_values'].round(2)
    
    
    full_pred_info = pd.merge(pred_info,thr_std,on =['零件号','仓库'],how ='left')
    full_pred_info['pred_values'] = full_pred_info.apply(lambda row: row['pred_values_high'] 
                                if row['pred_values'] > row['pred_values_high'] 
                                 else (row['pred_values_lower'] if row['pred_values'] < row['pred_values_lower'] 
                                       else row['pred_values']), axis=1)  
#     full_pred_info = full_pred_info.drop(['pred_values_lower','pred_values_high'],axis =1 )
    

    pred_full_pp = pred_full_pp.append(full_pred_info)


pred_full_pp.head()



  pred_full_pp = pred_full_pp.append(full_pred_info)
  pred_full_pp = pred_full_pp.append(full_pred_info)
  pred_full_pp = pred_full_pp.append(full_pred_info)
  pred_full_pp = pred_full_pp.append(full_pred_info)
  pred_full_pp = pred_full_pp.append(full_pred_info)
  pred_full_pp = pred_full_pp.append(full_pred_info)


Unnamed: 0,year,month,零件号,pred_values,仓库,pred_values_high,pred_values_lower
0,2023,3,02E300066L 00V,0.602957,1000-1,12.191128,0.602957
1,2023,3,03C907660S,30.59,1000-1,225.78479,0.0
2,2023,3,09G300033L,0.0,1000-2,1.192638,0.0
3,2023,3,09G300055J,0.0,1000-1,1.192638,0.0
4,2023,3,09M300036S,0.0,1000-1,2.296671,0.0


In [114]:
compar_pred_real_info = pd.merge(pred_full_pp,real_sum_counts,on =['零件号','仓库','year','month'],how ='left')
compar_pred_real_info.fillna(0,inplace=True)

In [115]:
svg_pred =pd.read_csv('svg预测结果2.csv',encoding='gb18030')
svg_pred.columns = ['零件代码','仓库','日期','预测值']

In [116]:
svg_pred['日期'] = pd.to_datetime(svg_pred['日期'], format='%Y%m').dt.strftime('%Y-%m')
svg_pred["year"] = pd.to_datetime(svg_pred['日期']).dt.year.astype(int)
svg_pred["month"] = pd.to_datetime(svg_pred['日期']).dt.month.astype(int)
svg_pred = svg_pred.groupby(['零件代码','仓库','year','month'])['预测值'].sum().reset_index()
svg_pred = svg_pred.rename(columns ={"零件代码":"零件号"})

In [117]:
compar_pred_real_info.head(10)

Unnamed: 0,year,month,零件号,pred_values,仓库,pred_values_high,pred_values_lower,需求数量
0,2023,3,02E300066L 00V,0.602957,1000-1,12.191128,0.602957,0.0
1,2023,3,03C907660S,30.59,1000-1,225.78479,0.0,0.0
2,2023,3,09G300033L,0.0,1000-2,1.192638,0.0,0.0
3,2023,3,09G300055J,0.0,1000-1,1.192638,0.0,0.0
4,2023,3,09M300036S,0.0,1000-1,2.296671,0.0,0.0
5,2023,3,09S927158AC,0.0,1000-1,27.055585,0.0,0.0
6,2023,3,0AM300058N 007,0.0,1000-1,2.188001,0.0,0.0
7,2023,3,0AM300060T 012,0.0,1000-1,4.220976,0.0,0.0
8,2023,3,0AM300066C 009,0.0,1000-1,9.034499,0.0,0.0
9,2023,3,0B5300062G 001,0.0,1000-3,0.844047,0.0,0.0


In [118]:
# svg_pred[(svg_pred['零件号']=='03C907660S')&(svg_pred['year']==2023)&(svg_pred['month']==3)]

In [119]:
full_compart_info  = pd.merge(compar_pred_real_info,svg_pred,on = ['零件号','仓库','year','month'],how ='left')

full_compart_info.fillna(0,inplace =True)

full_compart_info['需求数量']= [1.01 if x ==0 else x for x in full_compart_info['需求数量']]

full_compart_info['pred_values']= [1 if x ==0 else x for x in full_compart_info['pred_values']]

full_compart_info['chumi_mape'] = (abs(full_compart_info['需求数量'] - full_compart_info['pred_values']) 
                                 / full_compart_info['需求数量']) 
full_compart_info['svg_mape'] = (abs(full_compart_info['需求数量'] - full_compart_info['预测值']) 
                                 / full_compart_info['需求数量']) 
full_compart_info.sort_values(['零件号','chumi_mape'])

Unnamed: 0,year,month,零件号,pred_values,仓库,pred_values_high,pred_values_lower,需求数量,预测值,chumi_mape,svg_mape
96,2023,7,02E300066L 00V,1.000000,1000-1,11.542968,0.000000,1.00,0.078368,0.000000,0.921632
72,2023,6,02E300066L 00V,1.000000,1000-1,11.727033,0.000000,1.01,0.130700,0.009901,0.870594
120,2023,8,02E300066L 00V,1.000000,1000-1,11.294232,0.000000,1.01,2.376667,0.009901,1.353136
0,2023,3,02E300066L 00V,0.602957,1000-1,12.191128,0.602957,1.01,0.605147,0.403013,0.400845
24,2023,4,02E300066L 00V,0.334849,1000-1,12.062120,0.334849,1.01,0.363097,0.668466,0.640498
...,...,...,...,...,...,...,...,...,...,...,...
18,2023,3,KITPAB100,5.930000,6000,25.398673,0.390919,1.01,2.000000,4.871287,0.980198
119,2023,7,KITPAB100,12.040000,8023,62.284600,2.868096,2.00,6.370968,5.020000,2.185484
140,2023,8,KITPAB100,12.040000,8012,61.644589,0.000000,1.00,1.628203,11.040000,0.628203
92,2023,6,KITPAB100,13.390000,8012,64.140012,0.000000,1.00,4.290323,12.390000,3.290323


In [120]:
test = feature_processing(filled_df,'2023-10-01')

output = demods_groby_month(test).sort_values(['零件号','仓库','year','month'])
output[(output['零件号']=='KITPAB100')
                      &(output['仓库']=='8012')]


Unnamed: 0,year,month,零件号,pred_values,仓库
20,2022,1,KITPAB100,47,8012
44,2022,2,KITPAB100,9,8012
68,2022,3,KITPAB100,50,8012
92,2022,4,KITPAB100,13,8012
116,2022,5,KITPAB100,1,8012
140,2022,6,KITPAB100,48,8012
164,2022,7,KITPAB100,8,8012
188,2022,8,KITPAB100,20,8012
212,2022,9,KITPAB100,4,8012
236,2022,10,KITPAB100,21,8012


In [121]:
# full_compart_info.sort_values(['零件号','month']).to_excel('预测效果_20231101.xlsx')

In [122]:
full_compart_info.chumi_mape.mean()

1.8884581347363383

In [123]:
full_compart_info.svg_mape.mean()
# 1.提高模型响应速度 最近月份的比重
# 2.对话机器人的流程 chatgpt 

0.8464468467840773

In [124]:
full_compart_info.groupby(['month'])['chumi_mape','svg_mape'].mean().reset_index()

  full_compart_info.groupby(['month'])['chumi_mape','svg_mape'].mean().reset_index()


Unnamed: 0,month,chumi_mape,svg_mape
0,3,1.888129,0.761103
1,4,1.911361,0.737364
2,5,2.071726,0.890008
3,6,2.029872,0.887022
4,7,1.665057,0.839089
5,8,1.764603,0.964095
