In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

plt.rcParams['font.sans-serif'] = ['SimHei']  # 黑体
plt.rcParams['axes.unicode_minus'] = False  # 解决无法显示符号的问题
palette = 'deep'
sns.set(font='SimHei', font_scale=1.2, style='whitegrid', palette=palette)  # 解决Seaborn中文显示问题

rawData = pd.read_excel('None空值填充-物流网络历史货量数据.xlsx')
print(rawData.dtypes)
rawData.head()

场地1                     object
场地2                     object
2021-01-01 00:00:00    float64
2021-01-02 00:00:00    float64
2021-01-03 00:00:00    float64
                        ...   
2022-12-27 00:00:00    float64
2022-12-28 00:00:00    float64
2022-12-29 00:00:00    float64
2022-12-30 00:00:00    float64
2022-12-31 00:00:00    float64
Length: 732, dtype: object


Unnamed: 0,场地1,场地2,2021-01-01 00:00:00,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,...,2022-12-22 00:00:00,2022-12-23 00:00:00,2022-12-24 00:00:00,2022-12-25 00:00:00,2022-12-26 00:00:00,2022-12-27 00:00:00,2022-12-28 00:00:00,2022-12-29 00:00:00,2022-12-30 00:00:00,2022-12-31 00:00:00
0,DC1,DC8,,,,,,,,,...,,,,,,,,,,
1,DC10,DC12,306.0,328.0,163.0,193.0,565.0,964.0,623.0,284.0,...,,,,,,,,,,
2,DC10,DC13,,,,,,,,,...,,,,,,,,,,
3,DC10,DC14,2613.0,2941.0,5968.0,4459.0,7646.0,5106.0,7155.0,5939.0,...,8663.0,9818.0,12248.0,13255.0,9896.0,15492.0,8230.0,29021.0,9991.0,13254.0
4,DC10,DC17,,,,,,,,,...,,,,,,,,,,


In [2]:
for col in rawData.columns:
    # 判断列是否为日期时间类型
    if isinstance(col, datetime.datetime):
        # 将日期时间转换为日期，并将结果存储回原来的列名
        new_col_name = col.strftime('%Y-%m-%d')
        rawData = rawData.rename(columns={col: new_col_name})

rawData.columns

Index(['场地1', '场地2', '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
       '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
       ...
       '2022-12-22', '2022-12-23', '2022-12-24', '2022-12-25', '2022-12-26',
       '2022-12-27', '2022-12-28', '2022-12-29', '2022-12-30', '2022-12-31'],
      dtype='object', length=732)

In [3]:
rawData.head()

Unnamed: 0,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-22,2022-12-23,2022-12-24,2022-12-25,2022-12-26,2022-12-27,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,DC1,DC8,,,,,,,,,...,,,,,,,,,,
1,DC10,DC12,306.0,328.0,163.0,193.0,565.0,964.0,623.0,284.0,...,,,,,,,,,,
2,DC10,DC13,,,,,,,,,...,,,,,,,,,,
3,DC10,DC14,2613.0,2941.0,5968.0,4459.0,7646.0,5106.0,7155.0,5939.0,...,8663.0,9818.0,12248.0,13255.0,9896.0,15492.0,8230.0,29021.0,9991.0,13254.0
4,DC10,DC17,,,,,,,,,...,,,,,,,,,,


In [4]:
newData = pd.melt(rawData, id_vars=['场地1', '场地2'], value_name='货量', var_name='日期')
newData.head()

Unnamed: 0,场地1,场地2,日期,货量
0,DC1,DC8,2021-01-01,
1,DC10,DC12,2021-01-01,306.0
2,DC10,DC13,2021-01-01,
3,DC10,DC14,2021-01-01,2613.0
4,DC10,DC17,2021-01-01,


In [5]:
newData = newData.sort_values(by=['场地1', '场地2', '日期'])
newData = newData.reset_index(drop=True)
newData.head()

Unnamed: 0,场地1,场地2,日期,货量
0,DC1,DC8,2021-01-01,
1,DC1,DC8,2021-01-02,
2,DC1,DC8,2021-01-03,
3,DC1,DC8,2021-01-04,
4,DC1,DC8,2021-01-05,


In [6]:
# newData.to_excel('None空值填充-长表-物流网络历史货量数据.xlsx', index=False)

In [7]:
# 进行groupby操作，将结果提取出来形成新的DataFrame
grouped_df_groupby = newData.groupby(['场地1', '场地2'])
grouped_df = [grouped_df_groupby.get_group(x) for x in grouped_df_groupby.groups]

print(len(grouped_df))
gpKeys = list(dict(grouped_df_groupby.groups).keys())
print(gpKeys)
print(gpKeys.index(('DC10', 'DC38')))
grouped_df[gpKeys.index(('DC10', 'DC38'))]

1049
[('DC1', 'DC8'), ('DC10', 'DC12'), ('DC10', 'DC13'), ('DC10', 'DC14'), ('DC10', 'DC17'), ('DC10', 'DC19'), ('DC10', 'DC20'), ('DC10', 'DC21'), ('DC10', 'DC22'), ('DC10', 'DC23'), ('DC10', 'DC25'), ('DC10', 'DC27'), ('DC10', 'DC28'), ('DC10', 'DC3'), ('DC10', 'DC30'), ('DC10', 'DC32'), ('DC10', 'DC33'), ('DC10', 'DC34'), ('DC10', 'DC35'), ('DC10', 'DC36'), ('DC10', 'DC38'), ('DC10', 'DC4'), ('DC10', 'DC40'), ('DC10', 'DC47'), ('DC10', 'DC5'), ('DC10', 'DC51'), ('DC10', 'DC59'), ('DC10', 'DC60'), ('DC10', 'DC61'), ('DC10', 'DC62'), ('DC10', 'DC64'), ('DC10', 'DC65'), ('DC10', 'DC67'), ('DC10', 'DC70'), ('DC10', 'DC73'), ('DC10', 'DC76'), ('DC10', 'DC78'), ('DC10', 'DC8'), ('DC10', 'DC80'), ('DC10', 'DC9'), ('DC11', 'DC10'), ('DC11', 'DC14'), ('DC11', 'DC22'), ('DC11', 'DC3'), ('DC11', 'DC5'), ('DC11', 'DC9'), ('DC12', 'DC10'), ('DC12', 'DC14'), ('DC12', 'DC23'), ('DC12', 'DC3'), ('DC12', 'DC32'), ('DC12', 'DC35'), ('DC12', 'DC36'), ('DC12', 'DC38'), ('DC12', 'DC40'), ('DC12', 'DC5')

Unnamed: 0,场地1,场地2,日期,货量
14600,DC10,DC38,2021-01-01,
14601,DC10,DC38,2021-01-02,
14602,DC10,DC38,2021-01-03,
14603,DC10,DC38,2021-01-04,15.0
14604,DC10,DC38,2021-01-05,
...,...,...,...,...
15325,DC10,DC38,2022-12-27,
15326,DC10,DC38,2022-12-28,
15327,DC10,DC38,2022-12-29,
15328,DC10,DC38,2022-12-30,


In [8]:
# 用上面已有值替换
for ndf in grouped_df:
    ndf.fillna(method='ffill', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf.fillna(method='ffill', inplace=True)


In [9]:
grouped_df[gpKeys.index(('DC10', 'DC38'))]

Unnamed: 0,场地1,场地2,日期,货量
14600,DC10,DC38,2021-01-01,
14601,DC10,DC38,2021-01-02,
14602,DC10,DC38,2021-01-03,
14603,DC10,DC38,2021-01-04,15.0
14604,DC10,DC38,2021-01-05,15.0
...,...,...,...,...
15325,DC10,DC38,2022-12-27,8.0
15326,DC10,DC38,2022-12-28,8.0
15327,DC10,DC38,2022-12-29,8.0
15328,DC10,DC38,2022-12-30,8.0


In [10]:
# 用下面已有值替换
for ndf in grouped_df:
    ndf.fillna(method='bfill', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf.fillna(method='bfill', inplace=True)


In [11]:
grouped_df[gpKeys.index(('DC10', 'DC38'))]

Unnamed: 0,场地1,场地2,日期,货量
14600,DC10,DC38,2021-01-01,15.0
14601,DC10,DC38,2021-01-02,15.0
14602,DC10,DC38,2021-01-03,15.0
14603,DC10,DC38,2021-01-04,15.0
14604,DC10,DC38,2021-01-05,15.0
...,...,...,...,...
15325,DC10,DC38,2022-12-27,8.0
15326,DC10,DC38,2022-12-28,8.0
15327,DC10,DC38,2022-12-29,8.0
15328,DC10,DC38,2022-12-30,8.0


In [12]:
completeDf = pd.concat([i for i in grouped_df], ignore_index=True)
completeDf.head()

Unnamed: 0,场地1,场地2,日期,货量
0,DC1,DC8,2021-01-01,3.0
1,DC1,DC8,2021-01-02,3.0
2,DC1,DC8,2021-01-03,3.0
3,DC1,DC8,2021-01-04,3.0
4,DC1,DC8,2021-01-05,3.0


In [13]:
completeDf = pd.pivot_table(completeDf, values='货量', index=['场地1', '场地2'], columns='日期').reset_index()
completeDf.sort_values(by=['场地1', '场地2'])
completeDf.head()

日期,场地1,场地2,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,...,2022-12-22,2022-12-23,2022-12-24,2022-12-25,2022-12-26,2022-12-27,2022-12-28,2022-12-29,2022-12-30,2022-12-31
0,DC1,DC8,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
1,DC10,DC12,306.0,328.0,163.0,193.0,565.0,964.0,623.0,284.0,...,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0
2,DC10,DC13,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
3,DC10,DC14,2613.0,2941.0,5968.0,4459.0,7646.0,5106.0,7155.0,5939.0,...,8663.0,9818.0,12248.0,13255.0,9896.0,15492.0,8230.0,29021.0,9991.0,13254.0
4,DC10,DC17,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


In [14]:
completeDf.to_excel('左侧填充-物流网络历史货量数据.xlsx', index=False)