In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

plt.rcParams['font.sans-serif'] = ['SimHei']  # 黑体
plt.rcParams['axes.unicode_minus'] = False  # 解决无法显示符号的问题
palette = 'deep'
sns.set(font='SimHei', font_scale=1.2, style='whitegrid', palette=palette)  # 解决Seaborn中文显示问题

rawData = pd.read_excel('None空值填充-物流网络历史货量数据.xlsx')
# rawData['场地1'] = rawData['场地1'].str.replace('DC', '')
# rawData['场地1'] = rawData['场地1'].astype('int64')
# rawData['场地2'] = rawData['场地2'].str.replace('DC', '')
# rawData['场地2'] = rawData['场地2'].astype('int64')
print(rawData.dtypes)
rawData.head()

场地1                      int64
场地2                      int64
2021-01-01 00:00:00    float64
2021-01-02 00:00:00    float64
2021-01-03 00:00:00    float64
                        ...   
2022-12-27 00:00:00    float64
2022-12-28 00:00:00    float64
2022-12-29 00:00:00    float64
2022-12-30 00:00:00    float64
2022-12-31 00:00:00    float64
Length: 732, dtype: object


Unnamed: 0,场地1,场地2,2021-01-01 00:00:00,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,...,2022-12-22 00:00:00,2022-12-23 00:00:00,2022-12-24 00:00:00,2022-12-25 00:00:00,2022-12-26 00:00:00,2022-12-27 00:00:00,2022-12-28 00:00:00,2022-12-29 00:00:00,2022-12-30 00:00:00,2022-12-31 00:00:00
0,1,8,,,,,,,,,...,,,,,,,,,,
1,2,3,,,,,,,,,...,,,,,,,,,,
2,2,4,,,,,,,,,...,,,,,,,,,,
3,2,5,,,,,,,,,...,,,,,,,,,,
4,2,8,,,,,,,,,...,,,,,,,,,,


In [10]:
for col in rawData.columns:
    # 判断列是否为日期时间类型
    if isinstance(col, datetime.datetime):
        # 将日期时间转换为日期，并将结果存储回原来的列名
        new_col_name = col.strftime('%Y-%m-%d')
        rawData = rawData.rename(columns={col: new_col_name})

rawData.columns

Index(['场地1', '场地2', '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
       '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
       ...
       '2022-12-22', '2022-12-23', '2022-12-24', '2022-12-25', '2022-12-26',
       '2022-12-27', '2022-12-28', '2022-12-29', '2022-12-30', '2022-12-31'],
      dtype='object', length=732)

In [11]:
rawData.head()

Unnamed: 0,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-22,2022-12-23,2022-12-24,2022-12-25,2022-12-26,2022-12-27,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,1,8,,,,,,,,,...,,,,,,,,,,
1,2,3,,,,,,,,,...,,,,,,,,,,
2,2,4,,,,,,,,,...,,,,,,,,,,
3,2,5,,,,,,,,,...,,,,,,,,,,
4,2,8,,,,,,,,,...,,,,,,,,,,


In [12]:
newData = pd.melt(rawData, id_vars=['场地1', '场地2'], value_name='货量', var_name='日期')
newData.head()

Unnamed: 0,场地1,场地2,日期,货量
0,1,8,2021-01-01,
1,2,3,2021-01-01,
2,2,4,2021-01-01,
3,2,5,2021-01-01,
4,2,8,2021-01-01,


In [13]:
newData = newData.sort_values(by=['场地1', '场地2', '日期'])
newData = newData.reset_index(drop=True)
newData.head()

Unnamed: 0,场地1,场地2,日期,货量
0,1,8,2021-01-01,
1,1,8,2021-01-02,
2,1,8,2021-01-03,
3,1,8,2021-01-04,
4,1,8,2021-01-05,


In [14]:
newData.to_excel('None空值填充-长表-物流网络历史货量数据.xlsx', index=False)

In [15]:
# 进行groupby操作，将结果提取出来形成新的DataFrame
grouped_df_groupby = newData.groupby(['场地1', '场地2'])
grouped_df = [grouped_df_groupby.get_group(x) for x in grouped_df_groupby.groups]

print(len(grouped_df))
gpKeys = list(dict(grouped_df_groupby.groups).keys())
print(gpKeys)
print(gpKeys.index((10, 38)))
grouped_df[gpKeys.index((10, 38))]

1049
[(1, 8), (2, 3), (2, 4), (2, 5), (2, 8), (2, 9), (2, 10), (2, 14), (2, 62), (3, 4), (3, 5), (3, 8), (3, 9), (3, 10), (3, 12), (3, 14), (3, 19), (3, 21), (3, 22), (3, 23), (3, 28), (3, 30), (3, 35), (3, 36), (3, 38), (3, 40), (4, 3), (4, 5), (4, 8), (4, 9), (4, 10), (4, 14), (4, 17), (4, 19), (4, 20), (4, 21), (4, 22), (4, 23), (4, 30), (4, 34), (4, 35), (4, 36), (4, 38), (4, 40), (4, 62), (4, 64), (4, 65), (4, 70), (5, 3), (5, 4), (5, 8), (5, 9), (5, 10), (5, 12), (5, 14), (5, 15), (5, 19), (5, 20), (5, 21), (5, 22), (5, 23), (5, 30), (5, 31), (5, 32), (5, 35), (5, 36), (5, 40), (5, 51), (5, 61), (5, 62), (5, 70), (6, 4), (6, 10), (6, 14), (6, 23), (6, 62), (6, 70), (7, 3), (7, 4), (7, 5), (7, 8), (7, 9), (7, 10), (7, 12), (7, 14), (7, 34), (7, 35), (7, 40), (7, 62), (7, 65), (7, 70), (8, 3), (8, 4), (8, 5), (8, 9), (8, 10), (8, 13), (8, 14), (8, 15), (8, 25), (8, 27), (8, 32), (8, 33), (8, 34), (8, 35), (8, 38), (8, 40), (8, 43), (8, 44), (8, 59), (8, 62), (8, 64), (8, 65), (8, 7

Unnamed: 0,场地1,场地2,日期,货量
118990,10,38,2021-01-01,
118991,10,38,2021-01-02,
118992,10,38,2021-01-03,
118993,10,38,2021-01-04,15.0
118994,10,38,2021-01-05,
...,...,...,...,...
119715,10,38,2022-12-27,
119716,10,38,2022-12-28,
119717,10,38,2022-12-29,
119718,10,38,2022-12-30,


In [16]:
# 用上面已有值替换
for ndf in grouped_df:
    ndf.fillna(method='ffill', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf.fillna(method='ffill', inplace=True)


In [17]:
grouped_df[gpKeys.index((10, 38))]

Unnamed: 0,场地1,场地2,日期,货量
118990,10,38,2021-01-01,
118991,10,38,2021-01-02,
118992,10,38,2021-01-03,
118993,10,38,2021-01-04,15.0
118994,10,38,2021-01-05,15.0
...,...,...,...,...
119715,10,38,2022-12-27,8.0
119716,10,38,2022-12-28,8.0
119717,10,38,2022-12-29,8.0
119718,10,38,2022-12-30,8.0


In [18]:
# 用下面已有值替换
for ndf in grouped_df:
    ndf.fillna(method='bfill', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf.fillna(method='bfill', inplace=True)


In [19]:
grouped_df[gpKeys.index((10, 38))]

Unnamed: 0,场地1,场地2,日期,货量
118990,10,38,2021-01-01,15.0
118991,10,38,2021-01-02,15.0
118992,10,38,2021-01-03,15.0
118993,10,38,2021-01-04,15.0
118994,10,38,2021-01-05,15.0
...,...,...,...,...
119715,10,38,2022-12-27,8.0
119716,10,38,2022-12-28,8.0
119717,10,38,2022-12-29,8.0
119718,10,38,2022-12-30,8.0


In [20]:
completeDf = pd.concat([i for i in grouped_df], ignore_index=True)
completeDf.head()

Unnamed: 0,场地1,场地2,日期,货量
0,1,8,2021-01-01,3.0
1,1,8,2021-01-02,3.0
2,1,8,2021-01-03,3.0
3,1,8,2021-01-04,3.0
4,1,8,2021-01-05,3.0


In [21]:
completeDf = pd.pivot_table(completeDf, values='货量', index=['场地1', '场地2'], columns='日期').reset_index()
completeDf = completeDf.sort_values(by=['场地1', '场地2'])
completeDf.head()

日期,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-22,2022-12-23,2022-12-24,2022-12-25,2022-12-26,2022-12-27,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,1,8,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
1,2,3,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2,4,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
3,2,5,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0
4,2,8,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


In [22]:
completeDf.to_excel('左侧填充-物流网络历史货量数据.xlsx', index=False)
newData = pd.melt(completeDf, id_vars=['场地1', '场地2'], value_name='货量', var_name='日期')
newData = newData.sort_values(by=['场地1', '场地2', '日期'])
newData = newData.reset_index(drop=True)
newData.to_excel('左侧填充-长表-物流网络历史货量数据.xlsx', index=False)