In [None]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('./data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
print(os.getcwd())


In [None]:
df_demand = pd.read_csv('./data/demand_train.csv',  encoding="gbk")
df_stock = pd.read_csv('./data/stock_train.csv',  encoding="gbk")

In [None]:
df_demand['过账日期'] = pd.to_datetime(df_demand['过账日期'])

df_demand.head(10)

In [None]:
df_stock['过账日期'] = pd.to_datetime(df_stock['过账日期'])

df_stock.head(10)

In [None]:
# 分析物料编码能否确定唯一工厂编码
df_group_test = df_demand.groupby(['物料编码'])['工厂编码'].nunique().reset_index()
df_group_test.describe()

In [None]:
# 分析物料编码能否确定唯一物料品类
df_group_test = df_demand.groupby(['物料编码'])['物料品类'].nunique().reset_index()
df_group_test.describe()

In [None]:
# 分析物料编码能否确定唯一物料品牌
df_group_test = df_demand.groupby(['物料编码'])['物料品牌'].nunique().reset_index()
df_group_test.describe()

In [None]:
# 分析物料编码能否确定唯一物料类型
df_group_test = df_demand.groupby(['物料编码'])['物料类型'].nunique().reset_index()
df_group_test.describe()

In [None]:
df_demand.describe()

In [None]:
df_stock.describe()

In [None]:
df_demand.info()

In [None]:
df_stock.info()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# plt.rcParams['font.sans-serif'] = ['SimHei']  # For Windows
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']  # For Mac
plt.rcParams['axes.unicode_minus'] = False  # This is to display minus signs correctly


df_demand_sorted = df_demand.sort_values('过账日期')

plt.figure(figsize=(12, 6))
sns.lineplot(x='过账日期', y='需求量', data=df_demand_sorted, marker='o', color='b')
plt.title('Demand Over Time')
plt.xlabel('Date')
plt.ylabel('Demand Quantity')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

In [None]:
df_filtered = df_demand[['需求量', '工厂编码']]
df_grouped = df_filtered.groupby('工厂编码').sum().reset_index()
df_grouped.describe()

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=df_grouped, x='工厂编码', y='需求量', palette='viridis', hue='工厂编码')
plt.xlabel('工厂编码')
plt.ylabel('需求量')
plt.show()

In [None]:
df_filtered = df_demand[['需求量', '物料编码']]
df_grouped = df_filtered.groupby('物料编码').sum().reset_index()
df_grouped.describe()
# plt.figure(figsize=(10, 6))
# sns.barplot(data=df_grouped, x='物料编码', y='需求量', palette='viridis')
# plt.title('Total Demand by Factory Code')
# plt.xlabel('Material Code')
# plt.ylabel('Total Demand')
# plt.show()

In [None]:
df_filtered = df_demand[['需求量', '物料类型']]
df_grouped = df_filtered.groupby('物料类型').sum().reset_index()
df_grouped.describe()

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=df_grouped, x='物料类型', y='需求量', palette='viridis')
plt.title('物料类型的需求量')
plt.xlabel('物料类型')
plt.ylabel('需求量')
plt.show()

In [None]:
df_demand['年月'] = df_demand['过账日期'].dt.to_period('M').astype(str)
df_demand['月'] = df_demand['年月'].str[-2:]
df_demand['季度'] = df_demand['过账日期'].dt.quarter
df_demand.head()

In [None]:
df_grouped = df_demand.groupby(['年月', '工厂编码', '物料编码'])['需求量'].sum().reset_index()
df_grouped.head()

In [None]:
plt.figure(figsize=(12, 6))

sns.lineplot(
    data=df_grouped,
    x='年月',
    y='需求量',
    hue='工厂编码',
    style='工厂编码',
    markers=True,
    dashes=False,
    palette='Set2',  # 配色方案
    linewidth=2.5,
    markersize=10
)

plt.title('各工厂年月需求量趋势', fontsize=14, pad=20)
plt.xlabel('年月', fontsize=12)
plt.ylabel('总需求量', fontsize=12)
plt.xticks(rotation=45)

plt.legend(
    title='工厂编码',
    bbox_to_anchor=(1.05, 1),
    loc='upper left',
    borderaxespad=0.
)

# 调整布局
plt.tight_layout()
plt.show()

In [None]:
df_grouped = df_demand.groupby(['月', '工厂编码', '物料编码'])['需求量'].sum().reset_index()
df_grouped.head()

In [None]:
plt.figure(figsize=(12, 6))

sns.lineplot(
    data=df_grouped,
    x='月',
    y='需求量',
    hue='工厂编码',
    style='工厂编码',
    markers=True,
    dashes=False,
    palette='Set2',  # 配色方案
    linewidth=2.5,
    markersize=10
)

plt.title('各工厂月度需求量趋势', fontsize=14, pad=20)
plt.xlabel('月', fontsize=12)
plt.ylabel('总需求量', fontsize=12)
plt.xticks(rotation=45)

plt.legend(
    title='工厂编码',
    bbox_to_anchor=(1.05, 1),
    loc='upper left',
    borderaxespad=0.
)

# 调整布局
plt.tight_layout()
plt.show()

In [None]:
df_grouped = df_demand.groupby(['季度', '工厂编码', '物料编码'])['需求量'].sum().reset_index()
df_grouped.head()

In [None]:
plt.figure(figsize=(12, 6))

sns.lineplot(
    data=df_grouped,
    x='季度',
    y='需求量',
    hue='工厂编码',
    style='工厂编码',
    markers=True,
    dashes=False,
    palette='Set2',  # 配色方案
    linewidth=2.5,
    markersize=10
)

plt.title('各工厂季度需求量趋势', fontsize=14, pad=20)
plt.xlabel('季度', fontsize=12)
plt.ylabel('总需求量', fontsize=12)
plt.xticks(rotation=45)

plt.legend(
    title='工厂编码',
    bbox_to_anchor=(1.05, 1),
    loc='upper left',
    borderaxespad=0.
)

# 调整布局
plt.tight_layout()
plt.show()