In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import datetime

In [None]:
FOLDER_PATH = "graph_ver.3.0/"

# 폴더 있는지 확인하고 없으면 새로 만든다.
import os
try:
    if not os.path.exists(FOLDER_PATH):
        os.makedirs(FOLDER_PATH)
except:
    print(f"Error: Do not create foler. {FOLDER_PATH}")

# 지역 추출 전 데이터
---

In [None]:
#############################################################################
FILE_PATH_EDA = "도매_EDA_완료_new_v3.0.csv"
#############################################################################
raw_data_eda = pd.read_csv(FILE_PATH_EDA)
raw_data_eda.drop(columns="Unnamed: 0", inplace=True)

In [None]:
feature_list = ["DELNG_DE","PBLMNG_WHSAL_MRKT_NM","CPR_NM",
                "CPR_USE_SPCIES_NM","PRICE","DELNGBUNDLE_QY",
                "STNDRD","DELNG_QY","SANJI_NM"]

data_eda1 = raw_data_eda.copy()
data_eda2 = data_eda1[["DELNG_DE", "PBLMNG_WHSAL_MRKT_NM", "PRICE"]].copy()
data_eda2["DELNG_DE"] = pd.to_datetime(data_eda2["DELNG_DE"], format="%Y%m%d")

In [None]:
# 연도 별 data 변수 생성
START_YEAR = 2016
year_list = list(range(START_YEAR, datetime.date.today().year + 1))

for year in year_list:
    temp_ds = data_eda2.loc[data_eda2["DELNG_DE"].dt.year == year].copy()
    temp_ds.sort_values("DELNG_DE", inplace=True)
    globals()["data" + str(year)] = temp_ds

## 시장별 / 연도별 변동성
---

In [None]:
data_list = [data2016, data2017, data2018, data2019, data2020, data2021]
market_list = \
    ["서울가락도매", "구리도매시장", "부산엄궁도매", "서울강서도매"]

for i, data in enumerate(data_list):
    # datetime to str
    ds = data.copy()
    temp_m = ds["DELNG_DE"].dt.month.astype("str")
    temp_d = ds["DELNG_DE"].dt.day.astype("str")
    ds["DELNG_DE"] = \
        temp_m.apply(lambda x: format(x, '0>2')) \
        + '/' \
        + temp_d.apply(lambda x: format(x, '0>2'))
    for market in market_list:
        ds_m = ds.loc[ds["PBLMNG_WHSAL_MRKT_NM"] == market].copy()
        plt.figure(figsize=(30, 15))
        sns.set_theme(
            style="whitegrid",
            font="AppleGothic", 
            rc={"axes.unicode_minus": False},
        )
        sns.boxplot(
            x="DELNG_DE",
            y="PRICE",
            data=ds_m,
            whis=1.5, # IQR
            linewidth=0.5, # 라인 굵기
            # fliersize=0.5, # 이상치 사이즈
            showfliers=False, # 이상치 숨김
            # showcaps=False, # 꼬리 숨김
            medianprops=dict(color="white", alpha=0.9, linewidth=1), # 중앙값
        )

        plt.title(
            f"{market}_{i+2016}년",
            pad=20,
            fontsize=40,
            fontweight="heavy",
            color="gray"
        )
        plt.yticks( # y축 값
            fontsize=20,
            color="gray",
        ) 
        plt.xticks( # x축 값
            rotation=90, 
            fontsize=6,
            color="gray",
        )
        plt.ylim(0, 10000) # y축 범위
        plt.ylabel(
            "PRICE(₩)",
            labelpad=10, # 축과 여백
            fontsize=30,
            fontweight="bold",
            color="gray"
            # loc="right" # 위치 ["top", "center", "bottom"]
        )
        plt.xlabel(
            "DATES",
            labelpad=10, # 축과 여백
            fontsize=30,
            fontweight="bold",
            color="gray"
        )
        
        plt.savefig(
            f"{FOLDER_PATH}{market}_{i + 2016}.png",
            transparent = True, # 배경색 투명하게
            dpi=200, # 선명도
        )

---
---

# 지역 추출 후 데이터
---

In [None]:
###########################################################################
FILE_PATH1 = "지역_추출_도매_데이터_new_v2.0.csv"
###########################################################################

raw_data = pd.read_csv(FILE_PATH1, index_col=0)
# raw_data.drop(
#     columns=[], 
#     inplace=True
# )

In [None]:
data1 = raw_data.copy()
data1["DELNG_DE"] = pd.to_datetime(data1["DELNG_DE"], format="%Y%m%d")
data1["COUNT"] = 1
data1.rename(columns={"VOLUME": "VOLUME"}, inplace=True)

In [None]:
agg_dict = {
    "PRICE": [
        ("PRICE_MAX", "max"), 
        ("PRICE_MIN", "min"), 
        ("PRICE_MEAN", np.mean),
        ("PRICE_MEDIAN", np.median),
        ("PRICE_STD", np.std)
    ],
    "VOLUME": [("VOLUME", np.sum)],
    "COUNT": [("TRANSACTIONS", "count")],
}

g_data = \
    data1.groupby(
        ["DELNG_DE", "SANJI_NM"],
        # as_index=False, # reset_index와 동일함
    ).agg(agg_dict).reset_index(col_level=1).droplevel(level=0, axis=1)