# Prerequisite

Following packages must be installed to run codes below

```bash
$ pip3 install openpyxl plotly jupyterlab>=3 ipywidgets>=7.6
```

# References
- [Pie charts in Python](https://plotly.com/python/pie-charts/)

# Add project path

In [1]:
import os
import sys
from pathlib import Path

PROJECT_PATH = Path().resolve().parent.parent
SRC_PATH = PROJECT_PATH / 'src'

sys.path.append(str(SRC_PATH))

# Load data

In [2]:
import pandas as pd
from project_paths import DATA_PATH
import os

xlsx_filepath = DATA_PATH / 'raw' / 'KOSIS'
xlsx_filepaths = [x for x in os.listdir(xlsx_filepath) if '.xlsx' in x]

## 지역, 업태별
- 9.8.2_사업자_현황Ⅱ_지역_업태_2005_20220830161041.xlsx
- 9.8.15_폐업자_현황_Ⅵ__지역__업태_2015_20220830161446.xlsx
- 9.8.13_폐업자_현황_Ⅳ__폐업사유__지역__업태_2005_20220830161418.xlsx
- 9.8.21_신규사업자_현황_Ⅴ_지역__업태_2015_20220830161604.xlsx

## 시, 군, 구별
- 9.8.8_가동사업자_현황Ⅲ__시·군·구_2008_20220830161151.xlsx
- 9.8.16__폐업자_현황_Ⅶ__시·군·구_2016_20220830161458.xlsx
- 9.8.22_신규사업자_현황_VI__시·군·구_2016_20220830161618.xlsx
  
## 월, 업종별
- 9.8.18_신규사업자_현황Ⅱ_월_업종_2012_20220830161523.xlsx
- 9.8.4_사업자_현황_Ⅳ_업종_2007_20220830161100.xlsx

# 지역, 업태별

In [3]:
class DataLoader:
    def __init__(self, data_path):
        self.data_path = data_path
    
    def load_total(self): 
        filename = '9.8.2_사업자_현황Ⅱ_지역_업태_2005_20220830161041.xlsx'
        filepath_abs = self.data_path / 'raw' / 'KOSIS' / filename
        data = (
            pd.read_excel(filepath_abs)
            .iloc[:, 1:]
            .set_index('시도․업태별(2)')
            .T
        )
        return data
    
    def load_closed(self):
        filename = '9.8.15_폐업자_현황_Ⅵ__지역__업태_2015_20220830161446.xlsx'
        filepath_abs = DATA_PATH / 'raw' / 'KOSIS' / filename
        data = (
            pd.read_excel(filepath_abs)
            .rename(columns={'사업자별(1)':'사업자별',
                            '업태별(1)':'업태별'})
            .assign(사업자별=lambda x : x['사업자별'].ffill())
            .assign(업태별=lambda x : x['사업자별']+'/'+x['업태별'])
            .iloc[:, 1:]
            .set_index('업태별')
            .T
        )
        return data
    
    def load_new(self):
        filename = '9.8.21_신규사업자_현황_Ⅴ_지역__업태_2015_20220830161604.xlsx'
        filepath_abs = DATA_PATH / 'raw' / 'KOSIS' / filename
        data = (
            pd.read_excel(filepath_abs)
            .rename(columns={'사업자별(1)':'사업자별',
                            '업태별(1)':'업태별'})
            .assign(사업자별=lambda x : x['사업자별'].ffill())
            .assign(업태별=lambda x : x['업태별'].ffill())
            .assign(업태별=lambda x : x['사업자별']+'/'+x['업태별'])
            .iloc[:, 1:]
            .set_index('업태별')
            .T
        )
        return data
    
data_loader = DataLoader(data_path=DATA_PATH)

In [4]:
city_names = ['서울', '인천', '경기',
       '강원', '대전', '충북', '충남', '세종', '광주', '전북', '전남', '대구', '경북', '부산', '울산',
       '경남', '제주']

sector_names = ['농ㆍ임ㆍ어업', '광업', '제조업', '전기ㆍ가스ㆍ수도업', '도매업', '소매업',
       '부동산매매업', '건설업', '음식업', '숙박업', '운수ㆍ창고ㆍ통신업', '부동산임대업', '대리ㆍ중개ㆍ도급업',
       '서비스업']

In [5]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'iframe'

In [6]:
data = data_loader.load_closed()#.loc[:, city_names]
for column in data.columns : 
    try : 
        data.loc[:, column] = data.loc[:, column].str.replace('-', '0')
        data.loc[:, column] = data.loc[:, column].astype(int)
    except : 
        pass
    


Workbook contains no default style, apply openpyxl's default



In [7]:
company_types = ['법인사업자', '일반사업자', '간이사업자', '면세사업자']

In [8]:
company_type = company_types[0]

fig = px.pie(data.filter(regex=company_type).iloc[1:, 1:].sum().to_frame().reset_index(drop=False), values=0, names='업태별')
fig.show()

fig = px.line(data.filter(regex=company_type).iloc[1:, 1:])
fig.show()



Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



In [11]:
company_type = company_types[1]

In [15]:
fig = px.pie(data.filter(regex=company_type).iloc[1:, 1:].sum().to_frame().reset_index(drop=False), values=0, names='업태별')
fig.show()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



In [16]:
fig = px.line(data.filter(regex=company_type).iloc[1:, 1:])
fig.show()

In [17]:
company_type = company_types[2]

In [18]:
fig = px.pie(data.filter(regex=company_type).iloc[1:, 1:].sum().to_frame().reset_index(drop=False), values=0, names='업태별')
fig.show()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



In [24]:
company_type = company_types[3]

In [25]:
fig = px.pie(data.filter(regex=company_type).iloc[1:, 1:].sum().to_frame().reset_index(drop=False), values=0, names='업태별')
fig.show()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



In [26]:
fig = px.line(data.filter(regex=company_type).iloc[1:, 1:])
fig.show()