In [81]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries

In [82]:
data = pd.read_csv('19G963553A.csv',encoding='utf-8')
data['日期'] = pd.to_datetime(data['日期'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
new_df = data.groupby(['零件号','日期'])['需求数量'].sum().reset_index()
new_df['日期'] = pd.to_datetime(new_df['日期'])

In [83]:
# 生成日期范围
date_range = pd.date_range(start='2021-11-15', end='2023-10-31', freq='D')
# 创建空的DataFrame，准备存储填充后的结果
filled_df = pd.DataFrame()

# 针对每个 SKU 进行填充操作
for sku, group in new_df.groupby('零件号'):
    sku_group = group.set_index('日期').reindex(date_range, fill_value=0).reset_index()
    sku_group['零件号'] = sku
    filled_df = filled_df.append(sku_group, ignore_index=True)

df=filled_df.set_index(['index','零件号'])['需求数量'].unstack()
df.columns.name=None
df=df.reset_index()

  filled_df = filled_df.append(sku_group, ignore_index=True)


In [84]:
data = df.copy()
data = data.rename(columns = {'index':'date'})
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.asfreq('D')
data = data.sort_index()
data.head()

Unnamed: 0_level_0,19G963553A
date,Unnamed: 1_level_1
2021-11-15,1
2021-11-16,0
2021-11-17,0
2021-11-18,0
2021-11-19,0


In [85]:
# Split data into train-val-test
# ==============================================================================
end_train = '2023-12-31'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)



import numpy as np
import pandas as pd
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from sklearn.linear_model import Ridge

sku_list = data_train.columns

# 每批次处理的SKU数量
batch_size = 200

# 创建空的字典，用于存储每个时间序列的预测模型
forecasters = {}

# 循环遍历每批次SKU，分别训练预测模型
for i in range(0, len(sku_list), batch_size):
    batch_skus = sku_list[i:i+batch_size]
    
    for sku in batch_skus:
        forecaster = ForecasterAutoreg(
                         regressor          = Ridge(random_state=123),
                         lags               = 20,
                     )
        
        # 拟合模型
        forecaster.fit(y=data_train[sku])
        
        forecasters[sku] = forecaster
    
    print(f"Finished training batch {i//batch_size + 1}/{len(sku_list)//batch_size + 1}")

# 进行未来预测
predictions = pd.DataFrame()

for sku, forecaster in forecasters.items():
    forecast = forecaster.predict(steps=365)
    predictions[sku] = forecast

# 打印前 3 个预测结果
predictions.head(3)

Train dates : 2021-11-15 00:00:00 --- 2022-12-31 00:00:00   (n=412)
Test dates  : 2022-12-31 00:00:00 --- 2023-10-31 00:00:00   (n=305)
Finished training batch 1/1


Unnamed: 0,19G963553A
2023-01-01,0.186363
2023-01-02,0.141748
2023-01-03,0.198389


In [100]:
day_info = predictions.reset_index()

filled_df

Unnamed: 0,index,零件号,需求数量
0,2021-11-15,19G963553A,1
1,2021-11-16,19G963553A,0
2,2021-11-17,19G963553A,0
3,2021-11-18,19G963553A,0
4,2021-11-19,19G963553A,0
...,...,...,...
711,2023-10-27,19G963553A,0
712,2023-10-28,19G963553A,0
713,2023-10-29,19G963553A,0
714,2023-10-30,19G963553A,0


In [104]:
info_day = pd.merge(day_info,filled_df,on = ['index'],how ='inner')
info_day.to_excel('19G963553A预测明细.xlsx')

In [86]:
# predictions[predictions < 0.1] = 0.1

In [87]:
test_pred = predictions.reset_index()
test_pred["year"] = pd.to_datetime(test_pred['index']).dt.year.astype(int)
test_pred["month"] = pd.to_datetime(test_pred['index']).dt.month.astype(int)
pred_info = test_pred.groupby(['year','month']).sum().reset_index()


pred_info=pred_info.set_index(['year','month']).stack()
pred_info=pred_info.rename_axis(index=['year','month','零件号'])
pred_info.name='pred_values'
pred_info = pred_info.reset_index()
pred_info.head()

Unnamed: 0,year,month,零件号,pred_values
0,2023,1,19G963553A,3.989259
1,2023,2,19G963553A,3.074915
2,2023,3,19G963553A,3.336838
3,2023,4,19G963553A,3.214065
4,2023,5,19G963553A,3.319549


In [88]:
data_test_real = data.reset_index()
data_test_real["year"] = pd.to_datetime(data_test_real['date']).dt.year.astype(int)
data_test_real["month"] = pd.to_datetime(data_test_real['date']).dt.month.astype(int)
real_info = data_test_real.groupby(['year','month']).sum().reset_index()
real_info=real_info.set_index(['year','month']).stack()
real_info=real_info.rename_axis(index=['year','month','零件号'])
real_info.name='real_values'
real_info = real_info.reset_index()
real_info.head()

Unnamed: 0,year,month,零件号,real_values
0,2021,11,19G963553A,1
1,2021,12,19G963553A,1
2,2022,1,19G963553A,1
3,2022,2,19G963553A,3
4,2022,3,19G963553A,1


In [89]:
output = pd.merge(real_info,pred_info,on =['year','month','零件号'],how ='inner')

for i in ['real_values','pred_values']:
    output[i]= [1.01 if x ==0 else x for x in output[i]]
output['chumi_mape'] = (abs(output['real_values'] - output['pred_values']) 
                                 / output['real_values']) 
output.head()

Unnamed: 0,year,month,零件号,real_values,pred_values,chumi_mape
0,2023,1,19G963553A,7,3.989259,0.430106
1,2023,2,19G963553A,2,3.074915,0.537458
2,2023,3,19G963553A,7,3.336838,0.523309
3,2023,4,19G963553A,4,3.214065,0.196484
4,2023,5,19G963553A,14,3.319549,0.762889


In [90]:
svg_pred =pd.read_csv('19G963553A预测.csv',encoding= 'utf-8')
svg_pred['日期'] = pd.to_datetime(svg_pred['日期'], format='%Y%m').dt.strftime('%Y-%m')
svg_pred["year"] = pd.to_datetime(svg_pred['日期']).dt.year.astype(int)
svg_pred["month"] = pd.to_datetime(svg_pred['日期']).dt.month.astype(int)

ll = pd.merge(output,svg_pred,on = ['year','month'],how ='inner')
ll.head()

Unnamed: 0,year,month,零件号,real_values,pred_values,chumi_mape,零件代码,仓库,日期,预测值
0,2023,1,19G963553A,7,3.989259,0.430106,19G963553A,1000-3,2023-01,4.028571
1,2023,2,19G963553A,2,3.074915,0.537458,19G963553A,1000-3,2023-02,6.849133
2,2023,3,19G963553A,7,3.336838,0.523309,19G963553A,1000-3,2023-03,4.887097
3,2023,4,19G963553A,4,3.214065,0.196484,19G963553A,1000-3,2023-04,6.265913
4,2023,5,19G963553A,14,3.319549,0.762889,19G963553A,1000-3,2023-05,4.903226


In [91]:
ll['svg_mape'] = (abs(ll['real_values'] - ll['预测值']) 
                                 / ll['real_values']) 
ll.head()

Unnamed: 0,year,month,零件号,real_values,pred_values,chumi_mape,零件代码,仓库,日期,预测值,svg_mape
0,2023,1,19G963553A,7,3.989259,0.430106,19G963553A,1000-3,2023-01,4.028571,0.42449
1,2023,2,19G963553A,2,3.074915,0.537458,19G963553A,1000-3,2023-02,6.849133,2.424567
2,2023,3,19G963553A,7,3.336838,0.523309,19G963553A,1000-3,2023-03,4.887097,0.301843
3,2023,4,19G963553A,4,3.214065,0.196484,19G963553A,1000-3,2023-04,6.265913,0.566478
4,2023,5,19G963553A,14,3.319549,0.762889,19G963553A,1000-3,2023-05,4.903226,0.64977


In [96]:
tests = ll[['month','chumi_mape','svg_mape','real_values','pred_values','预测值']]
tests

Unnamed: 0,month,chumi_mape,svg_mape,real_values,pred_values,预测值
0,1,0.430106,0.42449,7,3.989259,4.028571
1,2,0.537458,2.424567,2,3.074915,6.849133
2,3,0.523309,0.301843,7,3.336838,4.887097
3,4,0.196484,0.566478,4,3.214065,6.265913
4,5,0.762889,0.64977,14,3.319549,4.903226
5,6,0.770563,0.417278,14,3.212113,8.158113
6,7,0.446811,0.045699,6,3.319136,5.725806
7,8,0.792555,0.598979,16,3.319128,6.41634
8,9,0.830944,0.665354,19,3.212058,6.358269
9,10,0.585109,0.216464,8,3.319126,6.268291


In [93]:
tests.chumi_mape.mean()

0.5876227846752544

In [94]:
tests.svg_mape.mean()

0.6310920745300751

In [105]:
llll = new_df.copy()
llll['日期'] = pd.to_datetime(llll['日期'], format='%Y%m').dt.strftime('%Y-%m')
llll["year"] = pd.to_datetime(llll['日期']).dt.year.astype(int)
llll["month"] = pd.to_datetime(llll['日期']).dt.month.astype(int)
llll.groupby(['year','month']).sum().reset_index()

Unnamed: 0,year,month,需求数量
0,2021,11,1
1,2021,12,1
2,2022,1,1
3,2022,2,3
4,2022,3,1
5,2022,5,2
6,2022,6,1
7,2022,7,5
8,2022,8,7
9,2022,9,5
