In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import *
from statsmodels.graphics.tsaplots import *
from catboost import *
from sklearn.model_selection import *
from sklearn.metrics import *
from sklearn.datasets import load_diabetes
from sklearn.model_selection import *
from sklearn.linear_model import *
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import *
from sklearn.ensemble import *
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.svm import *
from sklearn.neighbors import *
from sklearn.neural_network import *
from sklearn.metrics import *
import pickle
import tensorflow as tf
import shap
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import *
import os

# tf.compat.v1.disable_v2_behavior()
forceCpu = False
# 使用CPU
if forceCpu:
    cpu = tf.config.list_physical_devices("CPU")
    tf.config.set_visible_devices(cpu)
    print(tf.config.list_logical_devices())

# 动态显存
if not forceCpu:
    physical_devices = tf.config.list_physical_devices('GPU')
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except:
        print('Invalid device or cannot modify virtual devices once initialized')

plt.rcParams['font.sans-serif'] = ['SimHei']  # 黑体
plt.rcParams['axes.unicode_minus'] = False  # 解决无法显示符号的问题
palette = 'deep'
sns.set(font='SimHei', font_scale=1.2, style='whitegrid', palette=palette)  # 解决Seaborn中文显示问题

rawDataPivot = pd.read_excel('../../preprocess/左侧填充-物流网络历史货量数据.xlsx')

# # 宽表转长表
rawData = pd.melt(rawDataPivot, id_vars=['场地1', '场地2'], var_name='日期', value_name='货量')

In [7]:
rawDataPivot.head()

Unnamed: 0,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-22,2022-12-23,2022-12-24,2022-12-25,2022-12-26,2022-12-27,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,1,8,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
1,2,3,9,9,9,9,9,9,9,9,...,1,1,1,1,1,1,1,1,1,1
2,2,4,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
3,2,5,6,6,6,6,6,6,6,6,...,11,11,11,11,11,11,11,11,11,11
4,2,8,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2


In [8]:
rawData.head()

Unnamed: 0,场地1,场地2,日期,货量
0,1,8,2021-01-01,3
1,2,3,2021-01-01,9
2,2,4,2021-01-01,2
3,2,5,2021-01-01,6
4,2,8,2021-01-01,2


In [9]:
def getxy():
    targetKey = '货量'
    # 对场地进行编码

    df = rawData.copy()
    # df = df[(df['场地1'] == 'DC14') & (df['场地2'] == 'DC10')].reset_index(drop=True)
    # df['场地1'] = df['场地1'].str.replace('DC', '')
    # df['场地1'] = df['场地1'].astype('int64')
    # df['场地2'] = df['场地2'].str.replace('DC', '')
    # df['场地2'] = df['场地2'].astype('int64')

    df['日期'] = pd.to_datetime(df['日期'])
    df['日期'] = df['日期'] - df['日期'].min()
    df['日期'] = df['日期'].apply(lambda x: x.days)

    df['货量'] = np.log(df['货量'])
    return df.drop(targetKey, axis=1), df[targetKey], df


def getPibotXy():
    # 对场地进行编码

    df = rawDataPivot.copy()
    # df['场地1'] = df['场地1'].str.replace('DC', '')
    # df['场地1'] = df['场地1'].astype('int64')
    # df['场地2'] = df['场地2'].str.replace('DC', '')
    # df['场地2'] = df['场地2'].astype('int64')

    df.iloc[:, 2:] = np.log(df.iloc[:, 2:])
    return df

In [10]:
_, _, data = getxy()


In [11]:
filterSize = 33

In [12]:
modelPath = '../算法评估/LSTM/LSTM.33.h5'


# 定义自定义指标函数
def r_square(y_true, y_pred):
    SS_res = tf.keras.backend.sum(tf.keras.backend.square(y_true - y_pred))
    SS_tot = tf.keras.backend.sum(tf.keras.backend.square(y_true - tf.keras.backend.mean(y_true)))
    return (1 - SS_res / (SS_tot + tf.keras.backend.epsilon()))


model = tf.keras.models.load_model(modelPath, custom_objects={'r_square': r_square})
metrics = r_square
metricsName = metrics.__name__
model.compile(loss='mse', optimizer='adam', metrics=[metrics])

# # 测试模型
# y_pred = model.predict(trainX, batch_size=5000)
# y_pred = np.round(np.exp(y_pred))

In [13]:
trainX = rawDataPivot.iloc[:, -4:]
trainX

Unnamed: 0,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,3,3,3,3
1,1,1,1,1
2,2,2,2,2
3,11,11,11,11
4,2,2,2,2
...,...,...,...,...
1044,4,4,4,4
1045,4,4,4,4
1046,1,1,1,1
1047,1,1,1,1


In [14]:
import datetime

rawDataPivotPredict = getPibotXy()
print(rawDataPivotPredict.dtypes)
rawDataPivotPredict.head()

场地1             int64
场地2             int64
2021-01-01    float64
2021-01-02    float64
2021-01-03    float64
               ...   
2022-12-27    float64
2022-12-28    float64
2022-12-29    float64
2022-12-30    float64
2022-12-31    float64
Length: 732, dtype: object


Unnamed: 0,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-22,2022-12-23,2022-12-24,2022-12-25,2022-12-26,2022-12-27,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,1,8,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,...,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612
1,2,3,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,4,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147
3,2,5,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,...,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895
4,2,8,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147


In [15]:
# 假设rawDataPivotPredict是一个DataFrame对象
for col in rawDataPivotPredict.columns[2:]:
    # 将日期时间转换为日期，并将结果存储回原来的列名
    new_col_name = pd.to_datetime(col)
    rawDataPivotPredict = rawDataPivotPredict.rename(columns={col: new_col_name})

rawDataPivotPredict = rawDataPivotPredict.sort_values(['场地1', '场地2']).reset_index(drop=True)
rawDataPivotPredict.head()

Unnamed: 0,场地1,场地2,2021-01-01 00:00:00,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,...,2022-12-22 00:00:00,2022-12-23 00:00:00,2022-12-24 00:00:00,2022-12-25 00:00:00,2022-12-26 00:00:00,2022-12-27 00:00:00,2022-12-28 00:00:00,2022-12-29 00:00:00,2022-12-30 00:00:00,2022-12-31 00:00:00
0,1,8,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,...,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612
1,2,3,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,4,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147
3,2,5,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,...,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895,2.397895
4,2,8,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147


In [16]:
预测长度 = 7
look_back = filterSize - 1
for i in range(0, 预测长度):
    # 测试变量形状
    temp = pd.merge(rawDataPivotPredict.iloc[:, :2], rawDataPivotPredict.iloc[:, 0 - look_back:], left_index=True,
                    right_index=True)
    temp = pd.melt(temp, id_vars=['场地1', '场地2'], var_name='日期', value_name='货量')

    temp['日期'] = pd.to_datetime(temp['日期'])
    temp['日期'] = temp['日期'] - pd.to_datetime(rawData['日期']).min()
    temp['日期'] = temp['日期'].apply(lambda x: x.days)

    # print(temp.dtypes)

    # ------------------------------

    # 准备数据

    # 按照时间排序
    # data = temp.drop('日期', axis=1)
    data = temp
    # print(data.head())

    # 将每条有向边转换成一个序列
    dataset = []
    for _, group in data.groupby(['场地1', '场地2']):
        dataset.append(group.values)

    # print(f'dataset数量: {len(dataset)}')
    # print(dataset[0].shape)
    # print(dataset[211])
    # break

    # ------------------------------

    trainX = np.reshape(np.asarray(dataset), (len(dataset), look_back, 4))
    trainX = trainX.astype('float64')
    # print(trainX.shape)
    # print(trainX[211])
    # break

    # ------------------------------

    # 先预测后一天
    y_pred = model.predict(trainX, batch_size=1000)
    nextCol = rawDataPivotPredict.columns[-1] + datetime.timedelta(days=1)
    rawDataPivotPredict[nextCol] = y_pred
    # print(rawDataPivotPredict)
    # break

    # 再自回归
    # model.fit(trainX, y_pred, epochs=1, batch_size=5000, verbose=0)




In [17]:
trainX[4]

array([[2.00000000e+00, 8.00000000e+00, 7.04000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.05000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.06000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.07000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.08000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.09000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.10000000e+02, 0.00000000e+00],
       [2.00000000e+00, 8.00000000e+00, 7.11000000e+02, 4.12713439e+00],
       [2.00000000e+00, 8.00000000e+00, 7.12000000e+02, 3.95124372e+00],
       [2.00000000e+00, 8.00000000e+00, 7.13000000e+02, 3.95124372e+00],
       [2.00000000e+00, 8.00000000e+00, 7.14000000e+02, 1.38629436e+00],
       [2.00000000e+00, 8.00000000e+00, 7.15000000e+02, 6.93147181e-01],
       [2.00000000e+00, 8.00000000e+00, 7.16000000e+02, 6.93147181e-01],
       [2.00000000e+00, 8.00000000e+00, 7.17000000e

In [18]:
rawDataPivotPredict.head()

Unnamed: 0,场地1,场地2,2021-01-01 00:00:00,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,...,2022-12-29 00:00:00,2022-12-30 00:00:00,2022-12-31 00:00:00,2023-01-01 00:00:00,2023-01-02 00:00:00,2023-01-03 00:00:00,2023-01-04 00:00:00,2023-01-05 00:00:00,2023-01-06 00:00:00,2023-01-07 00:00:00
0,1,8,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,1.098612,...,1.098612,1.098612,1.098612,1.283937,1.407025,1.522386,1.630108,1.727087,1.809719,1.875506
1,2,3,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,2.197225,...,0.0,0.0,0.0,0.181937,0.317955,0.433632,0.533023,0.622084,0.704427,0.782376
2,2,4,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,...,0.693147,0.693147,0.693147,0.885053,1.030437,1.166427,1.300791,1.444621,1.603264,1.768357
3,2,5,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,1.791759,...,2.397895,2.397895,2.397895,2.348067,2.330025,2.320559,2.316535,2.316118,2.318461,2.322928
4,2,8,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,...,0.693147,0.693147,0.693147,0.851415,0.953142,1.047746,1.142077,1.239264,1.341592,1.44789


In [19]:
rawDataPivotPredict.iloc[:, 2:] = np.round(np.exp(rawDataPivotPredict.iloc[:, 2:]))

In [20]:
rawDataPivotPredict

Unnamed: 0,场地1,场地2,2021-01-01 00:00:00,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,...,2022-12-29 00:00:00,2022-12-30 00:00:00,2022-12-31 00:00:00,2023-01-01 00:00:00,2023-01-02 00:00:00,2023-01-03 00:00:00,2023-01-04 00:00:00,2023-01-05 00:00:00,2023-01-06 00:00:00,2023-01-07 00:00:00
0,1,8,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,4.0,4.0,5.0,5.0,6.0,6.0,7.0
1,2,3,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0
2,2,4,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,3.0,3.0,4.0,4.0,5.0,6.0
3,2,5,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
4,2,8,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1044,76,8,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,6.0,6.0
1045,76,10,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,4.0,4.0,4.0,4.0,5.0,5.0,5.0,6.0,6.0,6.0
1046,76,14,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0
1047,76,62,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0


In [21]:
print(rawDataPivotPredict.columns[4])
print(type(rawDataPivotPredict.columns[4]))
isinstance(rawDataPivotPredict.columns[4], datetime.datetime)

2021-01-03 00:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


True

In [22]:
for col in rawDataPivotPredict.columns:
    # 判断列是否为日期时间类型
    if isinstance(col, datetime.datetime):
        # 将日期时间转换为日期，并将结果存储回原来的列名
        new_col_name = col.strftime('%Y-%m-%d')
        print(new_col_name)
        rawDataPivotPredict = rawDataPivotPredict.rename(columns={col: new_col_name})
        rawDataPivotPredict[new_col_name] = rawDataPivotPredict[new_col_name].astype('int64')

rawDataPivotPredict.columns

2021-01-01
2021-01-02
2021-01-03
2021-01-04
2021-01-05
2021-01-06
2021-01-07
2021-01-08
2021-01-09
2021-01-10
2021-01-11
2021-01-12
2021-01-13
2021-01-14
2021-01-15
2021-01-16
2021-01-17
2021-01-18
2021-01-19
2021-01-20
2021-01-21
2021-01-22
2021-01-23
2021-01-24
2021-01-25
2021-01-26
2021-01-27
2021-01-28
2021-01-29
2021-01-30
2021-01-31
2021-02-01
2021-02-02
2021-02-03
2021-02-04
2021-02-05
2021-02-06
2021-02-07
2021-02-08
2021-02-09
2021-02-10
2021-02-11
2021-02-12
2021-02-13
2021-02-14
2021-02-15
2021-02-16
2021-02-17
2021-02-18
2021-02-19
2021-02-20
2021-02-21
2021-02-22
2021-02-23
2021-02-24
2021-02-25
2021-02-26
2021-02-27
2021-02-28
2021-03-01
2021-03-02
2021-03-03
2021-03-04
2021-03-05
2021-03-06
2021-03-07
2021-03-08
2021-03-09
2021-03-10
2021-03-11
2021-03-12
2021-03-13
2021-03-14
2021-03-15
2021-03-16
2021-03-17
2021-03-18
2021-03-19
2021-03-20
2021-03-21
2021-03-22
2021-03-23
2021-03-24
2021-03-25
2021-03-26
2021-03-27
2021-03-28
2021-03-29
2021-03-30
2021-03-31
2021-04-01

Index(['场地1', '场地2', '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
       '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
       ...
       '2022-12-29', '2022-12-30', '2022-12-31', '2023-01-01', '2023-01-02',
       '2023-01-03', '2023-01-04', '2023-01-05', '2023-01-06', '2023-01-07'],
      dtype='object', length=739)

In [23]:
rawDataPivotPredict

Unnamed: 0,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-29,2022-12-30,2022-12-31,2023-01-01,2023-01-02,2023-01-03,2023-01-04,2023-01-05,2023-01-06,2023-01-07
0,1,8,3,3,3,3,3,3,3,3,...,3,3,3,4,4,5,5,6,6,7
1,2,3,9,9,9,9,9,9,9,9,...,1,1,1,1,1,2,2,2,2,2
2,2,4,2,2,2,2,2,2,2,2,...,2,2,2,2,3,3,4,4,5,6
3,2,5,6,6,6,6,6,6,6,6,...,11,11,11,10,10,10,10,10,10,10
4,2,8,2,2,2,2,2,2,2,2,...,2,2,2,2,3,3,3,3,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1044,76,8,4,4,4,4,4,4,4,4,...,4,4,4,4,5,5,5,5,6,6
1045,76,10,3,3,3,3,3,3,3,3,...,4,4,4,4,5,5,5,6,6,6
1046,76,14,2,2,2,2,2,2,2,2,...,1,1,1,1,1,1,1,2,2,2
1047,76,62,2,2,2,2,2,2,2,2,...,1,1,1,1,1,1,2,2,2,2


In [24]:
rawDataPivotPredict.to_excel('第一问结果LTSM-物流网络历史货量数据.xlsx', index=False)