In [653]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from skforecast.ForecasterAutoreg import ForecasterAutoreg

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries

In [673]:
data = pd.read_csv('20231031.csv',encoding='utf-8')
data = data[~data['仓库'].isin(['8013','8021','8025','8031'])]
# data =data[data['零件号']!='6RD959801E']
data['日期'] = pd.to_datetime(data['日期'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
new_df = data.groupby(['零件号','日期','仓库'])['需求数量'].sum().reset_index()
new_df['日期'] = pd.to_datetime(new_df['日期'])

In [680]:
new_df[new_df['零件号']=='6RD959801E'].to_csv('111.csv',encoding ='gb18030')

In [675]:
data_des = new_df.copy()
data_des["year"] = pd.to_datetime(data_des['日期']).dt.year.astype(int)
data_des["month"] = pd.to_datetime(data_des['日期']).dt.month.astype(int)
real_sum_counts = data_des.groupby(['零件号','仓库','year','month']).sum().reset_index()
real_sum_counts.sort_values(by=['零件号','仓库','year','month'])

Unnamed: 0,零件号,仓库,year,month,需求数量
0,11D941078C,1000-2,2023,1,1
1,11D941078C,1000-2,2023,2,3
2,11D941078C,1000-2,2023,3,2
3,11D941078C,1000-2,2023,4,1
4,11D941078C,1000-2,2023,5,2
...,...,...,...,...,...
168,6RD959801E,1000-1,2023,6,69
169,6RD959801E,1000-1,2023,7,4
170,6RD959801E,1000-1,2023,8,8
171,6RD959801E,1000-1,2023,9,4


In [657]:
# 生成日期范围
date_range = pd.date_range(start='2022-01-01', end='2023-10-31', freq='D')
# 创建空的DataFrame，准备存储填充后的结果
filled_df = pd.DataFrame()

# 针对每个 SKU 进行填充操作
for sku, group in new_df.groupby(['零件号','仓库']):
    sku_group = group.set_index('日期').reindex(date_range, fill_value=0).reset_index()
    sku_group['零件号'] = sku[0]
    sku_group['仓库'] = sku[1]
    filled_df = filled_df.append(sku_group, ignore_index=True)
filled_df.head(1)

  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)
  filled_df = filled_df.append(sku_group, ignore_index=True)


Unnamed: 0,index,零件号,仓库,需求数量
0,2022-01-01,11D941078C,1000-2,0


In [658]:
def feature_processing(data):
    end_time = '2023-03-01'
    data['合并列'] = data['零件号'] + '_' + data['仓库']
    # 去除多余的列名
    data = data.drop(columns=['零件号', '仓库'])
    data = data.set_index(['index', '合并列'])['需求数量'].unstack()
    data.columns.name = None
    data = data.reset_index()
    new_df = data.copy().rename(columns={'index': 'date'})
    new_df['date'] = pd.to_datetime(new_df['date'], format='%Y-%m-%d')
    new_df = new_df.set_index('date')
    new_df = new_df.asfreq('D')
    new_df = new_df.sort_index()
    data_train = new_df[new_df.index < end_time].copy()
    data_test = new_df[new_df.index >= end_time].copy()
    return data_train,data_test

data_train,data_test = feature_processing(filled_df)

In [659]:

sku_list = data_train.columns

# 创建空的字典，用于存储每个时间序列的预测模型
forecasters = {}
# 循环遍历每批次SKU，分别训练预测模型
for i in range(0, len(sku_list), 200):
    batch_skus = sku_list[i:i + 200]
    for sku in batch_skus:
        forecaster = ForecasterAutoreg(  
        regressor=Ridge(random_state=123),  
        lags=30,  
                )
        # 拟合模型
        forecaster.fit(y=data_train[sku])
        forecasters[sku] = forecaster
    # print(f"Finished training batch {i // self.batch_size + 1}/{len(sku_list) // self.batch_size + 1}")
# 进行未来预测
predictions = pd.DataFrame()
for sku, forecaster in forecasters.items():
    forecast = forecaster.predict(steps=180)
    predictions[sku] = forecast
predictions[predictions < 0.1] = 0

In [660]:
def demods_groby_month(data):
    """天级别聚合month"""
    data_prs = data.reset_index()
    data_prs = data_prs.rename(columns={'index': 'date'})
    data_prs["year"] = pd.to_datetime(data_prs['date']).dt.year.astype(int)
    data_prs["month"] = pd.to_datetime(data_prs['date']).dt.month.astype(int)
    data_prs_info = data_prs.groupby(['year', 'month']).sum().reset_index()
    data_prs_info = data_prs_info.set_index(['year', 'month']).stack()
    data_prs_info = data_prs_info.rename_axis(index=['year', 'month', '零件号'])
    data_prs_info = data_prs_info.reset_index()
    data_prs_info[['零件号', '仓库代码']] = data_prs_info['零件号'].str.split('_', expand=True).reset_index(drop=True)
    data_prs_info.columns =['year','month','零件号','pred_values','仓库']
    return data_prs_info

his_info = demods_groby_month(data_train)


def find_outliers_3sigma(data):  
    
    mean = sum(data) / len(data)  
    std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5  
    thr_sig_num_high  = mean + 2 * std_dev 
    thr_sig_num_low  = mean - 1 * std_dev
    
    return thr_sig_num_high,thr_sig_num_low


thr_std = his_info.groupby(['零件号','仓库'])['pred_values'].apply(find_outliers_3sigma).reset_index()
thr_std['pred_values_high']= [x[0] for x in thr_std['pred_values']]
thr_std['pred_values_lower']= [x[1] for x in thr_std['pred_values']]
for i in ['pred_values_high','pred_values_lower']:
    thr_std[i] =[0 if x<0 else x for x in thr_std[i]]
    
thr_std = thr_std.drop(['pred_values'],axis =1 )
thr_std
    

Unnamed: 0,零件号,仓库,pred_values_high,pred_values_lower
0,11D941078C,1000-2,1.876504,0.0
1,3CC945208A,1000-1,2.147138,0.0
2,3CC945208A,3001,0.586507,0.0
3,3CC945208A,5001,0.586507,0.0
4,3CC945208A,5002,0.0,0.0
5,3CC945208A,6000,1.330036,0.0
6,3CC945208A,8011,0.0,0.0
7,3CC945208A,8012,0.0,0.0
8,3CC945208A,8015,0.586507,0.0
9,3CC945208A,8022,0.586507,0.0


In [661]:
pred_info = demods_groby_month(predictions)
pred_info['pred_values'] = pred_info['pred_values'].round(2)
full_pred_info = pd.merge(pred_info,thr_std,on =['零件号','仓库'],how ='left')
full_pred_info.to_csv('111.csv',encoding ='gb18030')

In [662]:
pred_info = demods_groby_month(predictions)
pred_info['pred_values'] = pred_info['pred_values'].round(2)
full_pred_info = pd.merge(pred_info,thr_std,on =['零件号','仓库'],how ='left')


full_pred_info['pred_values'] = full_pred_info.apply(lambda row: row['pred_values_high'] 
                            if row['pred_values'] > row['pred_values_high'] 
                             else (row['pred_values_lower'] if row['pred_values'] < row['pred_values_lower'] 
                                   else row['pred_values']), axis=1)  

full_pred_info = full_pred_info.drop(['pred_values_lower','pred_values_high'],axis =1 )

full_pred_info.head()


Unnamed: 0,year,month,零件号,pred_values,仓库
0,2023,3,11D941078C,1.876504,1000-2
1,2023,3,3CC945208A,1.19,1000-1
2,2023,3,3CC945208A,0.0,3001
3,2023,3,3CC945208A,0.0,5001
4,2023,3,3CC945208A,0.0,5002


In [663]:
compar_pred_real_info = pd.merge(full_pred_info,real_sum_counts,on =['零件号','仓库','year','month'],how ='left')
compar_pred_real_info.fillna(0,inplace=True)

In [664]:
svg_pred =pd.read_csv('svg预测结果.csv',encoding='gb18030', header=1)
svg_pred['日期'] = pd.to_datetime(svg_pred['日期'], format='%Y%m').dt.strftime('%Y-%m')
svg_pred["year"] = pd.to_datetime(svg_pred['日期']).dt.year.astype(int)
svg_pred["month"] = pd.to_datetime(svg_pred['日期']).dt.month.astype(int)
svg_pred = svg_pred.groupby(['零件代码','仓库','year','month'])['预测值'].sum().reset_index()
svg_pred = svg_pred.rename(columns ={"零件代码":"零件号"})
svg_pred.head()

Unnamed: 0,零件号,仓库,year,month,预测值
0,11D941078C,1000-2,2023,2,1.0
1,11D941078C,1000-2,2023,3,2.333333
2,11D941078C,1000-2,2023,4,2.166667
3,11D941078C,1000-2,2023,5,1.7
4,11D941078C,1000-2,2023,6,1.8


In [665]:
full_compart_info  = pd.merge(compar_pred_real_info,svg_pred,on = ['零件号','仓库','year','month'],how ='left')
full_compart_info.head()

Unnamed: 0,year,month,零件号,pred_values,仓库,需求数量,预测值
0,2023,3,11D941078C,1.876504,1000-2,2.0,2.333333
1,2023,3,3CC945208A,1.19,1000-1,7.0,8.833333
2,2023,3,3CC945208A,0.0,3001,1.0,
3,2023,3,3CC945208A,0.0,5001,1.0,
4,2023,3,3CC945208A,0.0,5002,0.0,


In [666]:
full_compart_info.fillna(0,inplace =True)

full_compart_info['需求数量']= [1.01 if x ==0 else x for x in full_compart_info['需求数量']]

full_compart_info['pred_values']= [1 if x ==0 else x for x in full_compart_info['pred_values']]

full_compart_info['chumi_mape'] = (abs(full_compart_info['需求数量'] - full_compart_info['pred_values']) 
                                 / full_compart_info['需求数量']) 
full_compart_info['svg_mape'] = (abs(full_compart_info['需求数量'] - full_compart_info['预测值']) 
                                 / full_compart_info['需求数量']) 
full_compart_info

Unnamed: 0,year,month,零件号,pred_values,仓库,需求数量,预测值,chumi_mape,svg_mape
0,2023,3,11D941078C,1.876504,1000-2,2.00,2.333333,0.061748,0.166667
1,2023,3,3CC945208A,1.190000,1000-1,7.00,8.833333,0.830000,0.261905
2,2023,3,3CC945208A,1.000000,3001,1.00,0.000000,0.000000,1.000000
3,2023,3,3CC945208A,1.000000,5001,1.00,0.000000,0.000000,1.000000
4,2023,3,3CC945208A,1.000000,5002,1.01,0.000000,0.009901,1.000000
...,...,...,...,...,...,...,...,...,...
61,2023,8,3CC945208A,1.000000,8011,4.00,1.300000,0.750000,0.675000
62,2023,8,3CC945208A,1.000000,8012,4.00,1.333333,0.750000,0.666667
63,2023,8,3CC945208A,1.000000,8015,4.00,1.904762,0.750000,0.523810
64,2023,8,3CC945208A,1.000000,8022,4.00,1.222222,0.750000,0.694445


In [667]:
full_compart_info.sort_values(by=['pred_values']).tail(20)

Unnamed: 0,year,month,零件号,pred_values,仓库,需求数量,预测值,chumi_mape,svg_mape
3,2023,3,3CC945208A,1.0,5001,1.0,0.0,0.0,1.0
2,2023,3,3CC945208A,1.0,3001,1.0,0.0,0.0,1.0
23,2023,5,3CC945208A,1.0,1000-1,6.0,10.666667,0.833333,0.777778
24,2023,5,3CC945208A,1.0,3001,4.0,0.0,0.75,1.0
25,2023,5,3CC945208A,1.0,5001,4.0,0.0,0.75,1.0
26,2023,5,3CC945208A,1.0,5002,1.01,0.0,0.009901,1.0
27,2023,5,3CC945208A,1.0,6000,2.0,0.0,0.5,1.0
28,2023,5,3CC945208A,1.0,8011,1.01,0.0,0.009901,1.0
29,2023,5,3CC945208A,1.0,8012,1.01,0.0,0.009901,1.0
30,2023,5,3CC945208A,1.0,8015,2.0,0.0,0.5,1.0


In [668]:
# full_compart_info.columns = ['year', 'month', '零件号', '仓库', 'algo_预测', '真实值', 'svg预测',
#        'algo_mape', 'svg_mape']
# full_compart_info

In [669]:
full_compart_info.to_excel('预测效果_20231031_补0.xlsx')

In [670]:
full_compart_info.chumi_mape.mean()

0.42812341615669974

In [671]:
full_compart_info.svg_mape.mean()

0.9400379701703961

In [672]:
full_compart_info.groupby(['month'])['chumi_mape','svg_mape'].mean().reset_index()

  full_compart_info.groupby(['month'])['chumi_mape','svg_mape'].mean().reset_index()


Unnamed: 0,month,chumi_mape,svg_mape
0,3,0.165568,0.857143
1,4,0.376304,1.787879
2,5,0.399006,0.902525
3,6,0.258496,0.835931
4,7,0.629112,0.673651
5,8,0.740254,0.583099
