**필수 라이브러리**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

**matplotlib 한글 설정**

- 운영체제에 따른 한글 지원 설정. 윈도우, 우분투, 구글 코랩 지원.
- 참고: [matplotlib에서 한글 지원하기](https://github.com/codingalzi/datapy/blob/master/matplotlib-korean.md)

In [2]:
import platform

if platform.system() == 'Windows': # 윈도우
    from matplotlib import font_manager, rc
    font_path = "C:/Windows/Fonts/NGULIM.TTF"
    font = font_manager.FontProperties(fname=font_path).get_name()
    rc('font', family=font)
elif platform.system() == 'Linux': # 우분투 또는 구글 코랩
    # please run the following commented out codes just once
#     if 'google.colab' in str(get_ipython()):
#         !apt-get install -y fonts-nanum*
#     else:
#         !sudo apt-get install -y fonts-nanum*
#     !fc-cache -fv
    
    applyfont = "NanumBarunGothic"
    import matplotlib.font_manager as fm
    if not any(map(lambda ft: ft.name == applyfont, fm.fontManager.ttflist)):
        fm.fontManager.addfont("/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf")
    plt.rc("font", family=applyfont)
    plt.rc("axes", unicode_minus=False)
    

**데이터**

데이터는 연도별로 정리되어 있음.

In [3]:
base_url = "https://github.com/codingalzi/water-data/raw/master/reservoirs/"

**승촌보-광산 (엑셀) 자료를 데이터프레임으로 불러오기**

- `header=0`: 0번 행을 header로 지정, 즉 열 인덱스로 사용.
- `sheet_name=None`: 모든 워크시트 가져오기. 워크시트별로 하나의 df 생성. 반환값은 사전.
- `na_values=0`: 0으로 입력된 값도 결측치로 처리
- `index_col=1`: 측정일을 행 인덱스로 사용
- `parse_dates=True`: 행 인덱스로 사용되는 날짜 대상 파싱 실행

주의: 아래 모듈을 먼저 설치해야 할 수도 있다.

```python
!pip install openpyxl
```

로컬 데이터 불러오기

- 인터넷이 있다면 `base_url`을 파일명에 추가하면 됨.

In [4]:
# scb_gw = pd.read_excel(base_url+"Seungchonbo-Gwangsan.xlsx",
scb_gw = pd.read_excel("Seungchonbo-Gwangsan.xlsx",
                            header=0, 
                            na_values=0,
                            index_col=1, 
                            parse_dates=True)

측정소와 횟수 삭제

In [5]:
scb_gw = scb_gw.iloc[:, 2:].copy()

특성 종류

In [6]:
scb_gw.columns

Index(['수온(℃)', 'DO(㎎/L)', 'BOD(㎎/L)', 'COD(㎎/L)', '클로로필 a(㎎/㎥)', 'TN(㎎/L)',
       'TP(㎎/L)', 'TOC(㎎/L)', '수소이온농도', '페놀류(㎎/L)', '전기전도도(μS/㎝)',
       '총대장균군수(총대장균군수/100ml)', '용존총질소(㎎/L)', '암모니아성 질소(㎎/L)', '질산성 질소(㎎/L)',
       '용존총인(㎎/L)', '인산염인(㎎/L)', 'SS(㎎/L)', '분원성대장균군수', '유량(㎥/s)'],
      dtype='object')

페놀류는 모두 0으로 되어 있어서 삭제 필요

In [7]:
scb_gw = scb_gw.drop(['페놀류(㎎/L)'], axis=1).copy()

In [8]:
scb_gw

Unnamed: 0_level_0,수온(℃),DO(㎎/L),BOD(㎎/L),COD(㎎/L),클로로필 a(㎎/㎥),TN(㎎/L),TP(㎎/L),TOC(㎎/L),수소이온농도,전기전도도(μS/㎝),총대장균군수(총대장균군수/100ml),용존총질소(㎎/L),암모니아성 질소(㎎/L),질산성 질소(㎎/L),용존총인(㎎/L),인산염인(㎎/L),SS(㎎/L),분원성대장균군수,유량(㎥/s)
년/월/일,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-01-04,5.9,12.7,5.8,8.5,66.9,9.644,0.272,7.2,7.0,631,3400,9.393,5.148,3.405,0.097,0.074,10.7,820,10.306
2021-01-14,5.3,13.5,4.7,8.6,47.4,10.516,0.120,5.1,7.4,508,4800,10.511,6.200,3.350,0.027,0.007,10.9,190,13.200
2021-01-19,5.2,12.4,3.9,7.7,49.4,10.447,0.218,4.0,7.2,478,3400,9.364,6.558,2.790,0.065,0.063,8.3,660,12.458
2021-01-25,8.7,9.0,4.6,8.4,51.4,8.090,0.305,5.6,7.1,500,21000,8.066,5.866,2.183,0.215,0.162,9.7,1200,12.248
2021-02-01,7.7,12.1,4.8,8.7,63.5,9.422,0.150,5.3,7.2,484,5600,8.830,5.695,3.112,0.032,0.009,12.5,800,31.740
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2006-08-10,28.8,5.7,4.8,6.8,0.4,12.864,0.301,,7.9,226,19700,10.260,0.018,0.035,0.251,0.197,13.2,3000,
2006-09-14,22.0,7.6,6.2,4.0,0.3,6.972,0.249,,7.9,220,29000,5.916,0.013,5.286,0.211,0.208,9.2,5400,
2006-10-13,20.0,8.4,4.0,4.0,0.3,12.734,0.488,,8.2,185,29000,11.892,0.355,8.363,0.457,0.420,9.2,5400,
2006-11-09,18.0,8.2,5.9,6.6,2.0,13.626,0.892,,7.5,390,500,13.553,0.014,9.374,0.595,0.430,0.4,1600,


측정일 기준으로 오름차순으로 정렬

In [9]:
scb_gw.sort_index(axis=0, inplace=True)

In [10]:
scb_gw

Unnamed: 0_level_0,수온(℃),DO(㎎/L),BOD(㎎/L),COD(㎎/L),클로로필 a(㎎/㎥),TN(㎎/L),TP(㎎/L),TOC(㎎/L),수소이온농도,전기전도도(μS/㎝),총대장균군수(총대장균군수/100ml),용존총질소(㎎/L),암모니아성 질소(㎎/L),질산성 질소(㎎/L),용존총인(㎎/L),인산염인(㎎/L),SS(㎎/L),분원성대장균군수,유량(㎥/s)
년/월/일,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2005-12-28,4.2,12.6,6.9,4.0,4.2,14.244,0.856,,6.9,884,300,12.672,0.035,11.530,0.670,0.658,16.0,100,
2006-01-25,6.9,13.5,7.4,4.1,17.3,15.108,0.558,,7.6,3,280,11.304,0.065,5.338,0.496,0.492,24.8,700,
2006-02-24,8.3,11.0,5.9,2.8,2.0,11.880,0.473,,7.2,223,280,11.340,0.070,7.301,0.412,0.389,16.8,700,
2006-03-24,13.6,5.8,9.2,5.7,18.3,14.880,1.002,,8.1,417,180,12.950,0.024,11.574,0.916,0.684,9.2,100,
2006-04-28,16.8,9.5,11.7,6.0,17.7,12.070,1.850,,8.1,539,150,10.699,0.033,11.397,1.727,1.054,34.4,9,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-22,13.3,9.4,4.1,8.3,52.2,4.738,0.097,5.1,6.9,384,2500,4.519,1.253,2.526,0.042,0.033,6.3,570,8.454
2021-11-29,11.6,8.2,4.0,8.0,28.7,6.322,0.206,5.7,6.9,430,1100,6.155,2.643,2.473,0.157,0.149,6.4,120,7.830
2021-12-06,9.3,10.4,4.9,7.1,42.5,5.166,0.191,5.3,7.0,413,6600,4.905,1.826,2.495,0.089,0.086,8.8,640,13.455
2021-12-13,10.4,10.7,5.5,8.9,58.3,6.337,0.158,6.0,7.1,448,4600,5.972,2.735,2.446,0.043,0.027,19.1,510,10.901


In [11]:
scb_gw.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 775 entries, 2005-12-28 to 2021-12-20
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   수온(℃)                 775 non-null    float64
 1   DO(㎎/L)               775 non-null    float64
 2   BOD(㎎/L)              775 non-null    float64
 3   COD(㎎/L)              775 non-null    float64
 4   클로로필 a(㎎/㎥)           775 non-null    float64
 5   TN(㎎/L)               775 non-null    float64
 6   TP(㎎/L)               775 non-null    float64
 7   TOC(㎎/L)              695 non-null    float64
 8   수소이온농도                775 non-null    float64
 9   전기전도도(μS/㎝)           775 non-null    int64  
 10  총대장균군수(총대장균군수/100ml)  775 non-null    int64  
 11  용존총질소(㎎/L)            775 non-null    float64
 12  암모니아성 질소(㎎/L)         775 non-null    float64
 13  질산성 질소(㎎/L)           774 non-null    float64
 14  용존총인(㎎/L)             775 non-null    float64
 15  인산염인

유량 특성만 233개의 결측치 포함한다.

In [12]:
mask = scb_gw['유량(㎥/s)'].isna()

mask.sum()

233

In [13]:
scb_gw[mask]

Unnamed: 0_level_0,수온(℃),DO(㎎/L),BOD(㎎/L),COD(㎎/L),클로로필 a(㎎/㎥),TN(㎎/L),TP(㎎/L),TOC(㎎/L),수소이온농도,전기전도도(μS/㎝),총대장균군수(총대장균군수/100ml),용존총질소(㎎/L),암모니아성 질소(㎎/L),질산성 질소(㎎/L),용존총인(㎎/L),인산염인(㎎/L),SS(㎎/L),분원성대장균군수,유량(㎥/s)
년/월/일,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2005-12-28,4.2,12.6,6.9,4.0,4.2,14.244,0.856,,6.9,884,300,12.672,0.035,11.530,0.670,0.658,16.0,100,
2006-01-25,6.9,13.5,7.4,4.1,17.3,15.108,0.558,,7.6,3,280,11.304,0.065,5.338,0.496,0.492,24.8,700,
2006-02-24,8.3,11.0,5.9,2.8,2.0,11.880,0.473,,7.2,223,280,11.340,0.070,7.301,0.412,0.389,16.8,700,
2006-03-24,13.6,5.8,9.2,5.7,18.3,14.880,1.002,,8.1,417,180,12.950,0.024,11.574,0.916,0.684,9.2,100,
2006-04-28,16.8,9.5,11.7,6.0,17.7,12.070,1.850,,8.1,539,150,10.699,0.033,11.397,1.727,1.054,34.4,9,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-12-05,8.1,13.4,1.8,5.2,4.0,4.867,0.408,3.8,7.4,428,4400,4.702,0.534,3.289,0.363,0.360,4.9,1600,
2011-12-14,8.3,13.8,3.3,6.2,15.4,6.262,0.424,4.7,7.1,474,4500,6.094,1.106,3.891,0.396,0.387,6.1,323,
2011-12-21,7.1,11.8,3.1,6.4,12.1,7.850,0.660,5.3,6.5,468,7800,7.769,1.932,4.258,0.593,0.530,5.6,2300,
2019-09-03,24.3,7.7,3.8,6.4,40.7,3.690,0.078,4.9,7.3,280,7900,3.456,1.444,1.526,0.036,0.035,8.7,5700,


유량 결측치가 없는 데이터

In [14]:
scb_gw = scb_gw[~mask]

In [15]:
scb_gw

Unnamed: 0_level_0,수온(℃),DO(㎎/L),BOD(㎎/L),COD(㎎/L),클로로필 a(㎎/㎥),TN(㎎/L),TP(㎎/L),TOC(㎎/L),수소이온농도,전기전도도(μS/㎝),총대장균군수(총대장균군수/100ml),용존총질소(㎎/L),암모니아성 질소(㎎/L),질산성 질소(㎎/L),용존총인(㎎/L),인산염인(㎎/L),SS(㎎/L),분원성대장균군수,유량(㎥/s)
년/월/일,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2011-01-05,6.0,12.6,5.1,8.6,26.7,9.512,0.771,6.2,6.3,474,6261,9.498,4.962,4.401,0.615,0.612,21.9,548,12.336
2011-01-19,4.0,12.3,4.9,8.5,16.9,10.802,0.479,7.0,7.3,541,16882,10.475,6.866,3.515,0.321,0.306,23.0,2677,15.096
2011-01-24,5.0,12.5,6.2,9.1,10.1,10.780,0.653,7.0,7.3,540,24125,10.417,7.420,2.962,0.540,0.511,29.7,6681,11.697
2011-01-31,4.0,11.3,6.3,9.3,10.8,10.810,0.380,7.2,7.0,537,4800,10.577,7.603,2.861,0.377,0.372,21.8,245,14.704
2011-02-08,7.0,10.1,5.9,9.8,25.7,10.625,0.561,6.8,7.2,466,43359,9.513,7.374,2.115,0.365,0.355,37.6,4109,14.325
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-22,13.3,9.4,4.1,8.3,52.2,4.738,0.097,5.1,6.9,384,2500,4.519,1.253,2.526,0.042,0.033,6.3,570,8.454
2021-11-29,11.6,8.2,4.0,8.0,28.7,6.322,0.206,5.7,6.9,430,1100,6.155,2.643,2.473,0.157,0.149,6.4,120,7.830
2021-12-06,9.3,10.4,4.9,7.1,42.5,5.166,0.191,5.3,7.0,413,6600,4.905,1.826,2.495,0.089,0.086,8.8,640,13.455
2021-12-13,10.4,10.7,5.5,8.9,58.3,6.337,0.158,6.0,7.1,448,4600,5.972,2.735,2.446,0.043,0.027,19.1,510,10.901
