In [84]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries

In [85]:
data = pd.read_csv('19G963553A.csv',encoding='utf-8')
data['日期'] = pd.to_datetime(data['日期'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
new_df = data.groupby(['零件号','日期'])['需求数量'].sum().reset_index()
new_df['日期'] = pd.to_datetime(new_df['日期'])

In [86]:
# 生成日期范围
date_range = pd.date_range(start='2021-11-15', end='2023-10-31', freq='D')
# 创建空的DataFrame，准备存储填充后的结果
filled_df = pd.DataFrame()

# 针对每个 SKU 进行填充操作
for sku, group in new_df.groupby('零件号'):
    sku_group = group.set_index('日期').reindex(date_range, fill_value=0).reset_index()
    sku_group['零件号'] = sku
    filled_df = filled_df.append(sku_group, ignore_index=True)

df=filled_df.set_index(['index','零件号'])['需求数量'].unstack()
df.columns.name=None
df=df.reset_index()

  filled_df = filled_df.append(sku_group, ignore_index=True)


In [87]:
data = df.copy()
data = data.rename(columns = {'index':'date'})
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.asfreq('D')
data = data.sort_index()
data.head()

Unnamed: 0_level_0,19G963553A
date,Unnamed: 1_level_1
2021-11-15,1
2021-11-16,0
2021-11-17,0
2021-11-18,0
2021-11-19,0


In [88]:
# Split data into train-val-test
# ==============================================================================

import calendar  
import datetime  
  
# 生成一个日期对象，表示2023年1月1日  
start_date = datetime.datetime(2023, 1, 1)  
  
# 初始化一个空列表来保存每个月的最后一天  
month_end_dates = []  
  
# 使用 calendar.monthrange() 函数获取每个月的天数  
# 然后加1天，因为我们想要的是当月的最后一天  
for month in range(1, 13):  # 这里 13 是因为一年只有12个月  
    _, num_days = calendar.monthrange(2023, month)  # 获取2023年的日历  
    end_date = start_date.replace(month=month, day=num_days)  # 创建日期对象  
    month_end_dates.append(end_date)  # 添加到列表中  
llout = pd.DataFrame()
infout = pd.DataFrame()
for date in month_end_dates:  
    end_train = date.strftime("%Y-%m-%d")
    
    data_train = data.loc[:end_train, :].copy()
    data_test  = data.loc[end_train:, :].copy()



    import numpy as np
    import pandas as pd
    from skforecast.ForecasterAutoreg import ForecasterAutoreg
    from sklearn.linear_model import Ridge

    sku_list = data_train.columns

    # 每批次处理的SKU数量
    batch_size = 200

    # 创建空的字典，用于存储每个时间序列的预测模型
    forecasters = {}

    # 循环遍历每批次SKU，分别训练预测模型
    for i in range(0, len(sku_list), batch_size):
        batch_skus = sku_list[i:i+batch_size]

        for sku in batch_skus:
            forecaster = ForecasterAutoreg(
                             regressor          = Ridge(random_state=123),
                             lags               = 7,
                         )

            # 拟合模型
            forecaster.fit(y=data_train[sku])

            forecasters[sku] = forecaster


    # 进行未来预测
    predictions = pd.DataFrame()

    for sku, forecaster in forecasters.items():
        forecast = forecaster.predict(steps=30)
        predictions[sku] = forecast
#     predictions[predictions < 0.1] = 0
    infout=infout.append(predictions)
    test_pred = predictions.reset_index()
    test_pred["year"] = pd.to_datetime(test_pred['index']).dt.year.astype(int)
    test_pred["month"] = pd.to_datetime(test_pred['index']).dt.month.astype(int)
    pred_info = test_pred.groupby(['year','month']).sum().reset_index()


    pred_info=pred_info.set_index(['year','month']).stack()
    pred_info=pred_info.rename_axis(index=['year','month','零件号'])
    pred_info.name='pred_values'
    pred_info = pred_info.reset_index()
    llout = llout.append(pred_info)
llout

  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)
  infout=infout.append(predictions)
  llout = llout.append(pred_info)


Unnamed: 0,year,month,零件号,pred_values
0,2023,2,19G963553A,3.357659
1,2023,3,19G963553A,0.225649
0,2023,3,19G963553A,3.26283
0,2023,4,19G963553A,3.503912
0,2023,5,19G963553A,3.545671
0,2023,6,19G963553A,4.745159
0,2023,7,19G963553A,5.561352
0,2023,8,19G963553A,4.600319
0,2023,9,19G963553A,6.430759
0,2023,10,19G963553A,5.872592


In [93]:
infout.reset_index().groupby(['index']).sum().reset_index().to_csv('111.csv',encoding ='utf-8')

In [2]:
1100000*0.001

1100.0

In [None]:
# 内存空间
# ibm