In [1]:
import requests
import pandas as pd

url = 'https://apis.data.go.kr/1480523/WaterQualityService/getWaterMeasuringList'
key = '8yJUJUA516mCT6EWW5qx4wczXHJvMSZEtuueDB3fhxlXBxkvLuvquPdwtP4CDBkmiHrbiKunquvt/K46xNulRw=='

In [4]:
# 2. 주소에 있던 파라미터 그대로 설정
params = {
    'serviceKey': key,
    'pageNo': '1',
    'numOfRows': '3000',
    'resultType': 'json',
    'ptNoList': '2022A10,2022A30', # 측정소 코드
    'wmyrList': '2022,2023,2024,2025',      # 연도
    'wmodList': '01,02,03,04,05,06,07,08,09,10,11,12'        # 월(01, 02, 03월)
}

try:
    # 3. API 요청
    response = requests.get(url, params=params, verify=True) # https 대응
    
    if response.status_code == 200:
        data = response.json()
        
        # 4. 데이터 추출 (구조: getWaterMeasuringList -> item)
        items = data.get('getWaterMeasuringList', {}).get('item', [])
        
        if items:
            df = pd.DataFrame(items)
            df.set_index('ROWNO',inplace=True)

            # 5. 주요 수질 항목 한글 이름으로 변경 (보기 좋게)
            rename_map = {
                'PT_NM': '총량지점명',
                'WMCYMD': '일자',
                'ITEM_TEMP': '수온(℃)',
                'ITEM_PH': '수소이온농도(ph)',
                'ITEM_EC': '전기전도도(μS/㎝)',
                'ITEM_DOC': '용존산소(㎎/L)',
                'ITEM_BOD': 'BOD(㎎/L)',
                'ITEM_COD': 'COD(㎎/L)',
                'ITEM_SS': '부유물질(㎎/L)',
                'ITEM_TN': '총질소(T-N)(㎎/L)',
                'ITEM_TP': '총인(T-P)(㎎/L)',
                'ITEM_TOC': '총유기탄소(TOC)(㎎/L)',
                'ITEM_AMNT': '유량(㎥/s)',
                'ITEM_CLOA': '클로로필a'
            }
            
            # 2. 필요한 컬럼만 추출하여 새 DF 생성
            df_analysis = df[list(rename_map.keys())].copy()
            df_analysis.rename(columns=rename_map, inplace=True)

            # 3. 데이터 타입 숫자형으로 변환 (연산 가능하게)
            num_cols = ['수온(℃)', '수소이온농도(ph)', '전기전도도(μS/㎝)', '용존산소(㎎/L)', 'BOD(㎎/L)', 'COD(㎎/L)', '부유물질(㎎/L)', '총질소(T-N)(㎎/L)', '총인(T-P)(㎎/L)', '총유기탄소(TOC)(㎎/L)', '유량(㎥/s)']
            df_analysis[num_cols] = df_analysis[num_cols].apply(pd.to_numeric, errors='coerce')

            # # 4. 날짜 데이터 형식 변환 (YYYYMMDD -> datetime)
            df_analysis['일자'] = pd.to_datetime(df_analysis['일자'], errors='coerce')

            print("--- 분석 준비 완료: 핵심 수질 지표 ---")
            print(df_analysis.head())
        else:
            print("응답은 성공했으나 데이터가 없습니다.")
    else:
        print(f"API 요청 실패: {response.status_code}")

except Exception as e:
    print(f"오류 발생: {e}")

--- 분석 준비 완료: 핵심 수질 지표 ---
      총량지점명         일자  수온(℃)  수소이온농도(ph)  전기전도도(μS/㎝)  용존산소(㎎/L)  BOD(㎎/L)  \
ROWNO                                                                         
1        물금 2022-01-25    4.2         7.5        481.0       14.9       1.7   
2        물금 2022-01-17    3.8         7.5        458.0       15.7       1.6   
3        물금 2022-01-11    4.6         7.5        445.0       15.7       2.1   
4        물금 2022-01-03    4.7         7.6        444.0       14.3       1.9   
5        물금 2022-02-07    3.8         7.6        481.0       14.6       2.1   

       COD(㎎/L)  부유물질(㎎/L)  총질소(T-N)(㎎/L)  총인(T-P)(㎎/L)  총유기탄소(TOC)(㎎/L)  \
ROWNO                                                                      
1           4.8        2.4          2.991         0.030              3.0   
2           4.8        2.0          2.882         0.035              3.0   
3           4.9        3.6          2.613         0.033              3.3   
4           5.5        3.2          2.4

In [5]:
df_analysis

Unnamed: 0_level_0,총량지점명,일자,수온(℃),수소이온농도(ph),전기전도도(μS/㎝),용존산소(㎎/L),BOD(㎎/L),COD(㎎/L),부유물질(㎎/L),총질소(T-N)(㎎/L),총인(T-P)(㎎/L),총유기탄소(TOC)(㎎/L),유량(㎥/s),클로로필a
ROWNO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,물금,2022-01-25,4.2,7.5,481.0,14.9,1.7,4.8,2.4,2.991,0.030,3.0,64.864,9.1
2,물금,2022-01-17,3.8,7.5,458.0,15.7,1.6,4.8,2.0,2.882,0.035,3.0,74.478,10.1
3,물금,2022-01-11,4.6,7.5,445.0,15.7,2.1,4.9,3.6,2.613,0.033,3.3,86.129,12.8
4,물금,2022-01-03,4.7,7.6,444.0,14.3,1.9,5.5,3.2,2.496,0.039,3.2,59.416,19.5
5,물금,2022-02-07,3.8,7.6,481.0,14.6,2.1,5.2,3.2,2.980,0.032,3.1,37.047,16.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,금곡,2025-09-23,25.4,7.7,285.0,7.8,1.3,6.9,6.4,1.928,0.055,3.6,357.260,17.4
316,금곡,2025-09-09,28.6,8.6,219.0,7.6,1.7,8.6,9.6,1.579,0.059,6.1,340.400,52.6
317,금곡,2025-10-13,23.3,8.4,288.0,9.0,1.6,7.0,7.6,1.858,0.041,3.4,443.810,39.5
318,금곡,2025-10-27,19.0,7.8,192.0,9.5,1.1,6.4,6.4,2.719,0.082,3.2,443.180,20.4


In [4]:
df1 = df_analysis[df_analysis['유량(㎥/s)'].notna()]
df1

Unnamed: 0_level_0,총량지점명,일자,수온(℃),수소이온농도(ph),전기전도도(μS/㎝),용존산소(㎎/L),BOD(㎎/L),COD(㎎/L),부유물질(㎎/L),총질소(T-N)(㎎/L),총인(T-P)(㎎/L),총유기탄소(TOC)(㎎/L),유량(㎥/s)
ROWNO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,금곡,2022.01.25,4.3,7.6,471.0,15.2,2.4,5.2,2.8,3.035,0.028,3.1,65.36
2,금곡,2022.01.17,3.2,7.3,449.0,15.5,1.7,5.2,2.8,3.052,0.037,3.4,74.93
3,금곡,2022.01.11,4.7,7.5,447.0,15.8,2.2,5.9,5.2,2.592,0.030,3.8,86.70
4,금곡,2022.02.21,5.0,7.4,496.0,13.4,1.5,5.6,4.0,2.762,0.033,3.5,36.43
5,금곡,2022.02.15,5.7,8.0,488.0,13.8,2.4,5.6,5.6,3.035,0.029,3.6,70.82
...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,금곡,2025.09.23,25.4,7.7,285.0,7.8,1.3,6.9,6.4,1.928,0.055,3.6,357.26
137,금곡,2025.09.09,28.6,8.6,219.0,7.6,1.7,8.6,9.6,1.579,0.059,6.1,340.40
138,금곡,2025.10.13,23.3,8.4,288.0,9.0,1.6,7.0,7.6,1.858,0.041,3.4,443.81
139,금곡,2025.10.27,19.0,7.8,192.0,9.5,1.1,6.4,6.4,2.719,0.082,3.2,443.18


In [5]:
df2 = pd.read_csv('data/nakdong_water_quality.csv', encoding='euc-kr')
df2

Unnamed: 0,총량지점명,일자,수온(℃),수소이온농도(ph),전기전도도(μS/㎝),용존산소(㎎/L),BOD(㎎/L),COD(㎎/L),부유물질(㎎/L),총질소(T-N)(㎎/L),총인(T-P)(㎎/L),총유기탄소(TOC)(㎎/L),유량(㎥/s)
0,낙본M,2020.12.04,9.7,7.7,441,11.1,1.3,5.0,5.2,2.263,0.024,3.3,185.830
1,낙본N,2020.12.04,6.8,7.5,388,11.8,1.3,5.4,11.6,2.955,0.056,3.8,28.922
2,낙본M,2020.12.09,8.9,7.3,357,11.1,0.9,4.8,3.6,2.370,0.024,3.6,179.490
3,낙본N,2020.12.09,5.5,7.2,510,12.4,2.0,5.7,7.6,3.019,0.037,4.2,0.000
4,낙본M,2021.01.20,3.1,7.4,498,14.4,1.8,5.4,6.4,2.988,0.024,4.0,89.020
...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,낙본N,2025.10.13,23.8,8.9,996,11.4,4.0,12.3,14.4,1.909,0.050,6.6,0.000
364,낙본M,2025.10.21,20.6,7.8,257,8.5,1.3,6.2,10.0,2.663,0.069,4.7,365.147
365,낙본N,2025.10.21,19.7,7.5,940,8.2,2.5,9.3,10.4,2.489,0.047,7.4,0.000
366,낙본M,2025.10.27,19.3,8.2,272,9.7,1.4,7.0,8.4,2.661,0.080,3.9,208.869


In [11]:
waterDF = pd.concat([df1, df2], axis=0)
waterDF['일자'] = pd.to_datetime(waterDF['일자'], errors='coerce')

waterDF

Unnamed: 0,총량지점명,일자,수온(℃),수소이온농도(ph),전기전도도(μS/㎝),용존산소(㎎/L),BOD(㎎/L),COD(㎎/L),부유물질(㎎/L),총질소(T-N)(㎎/L),총인(T-P)(㎎/L),총유기탄소(TOC)(㎎/L),유량(㎥/s)
1,금곡,2022-01-25,4.3,7.6,471.0,15.2,2.4,5.2,2.8,3.035,0.028,3.1,65.360
2,금곡,2022-01-17,3.2,7.3,449.0,15.5,1.7,5.2,2.8,3.052,0.037,3.4,74.930
3,금곡,2022-01-11,4.7,7.5,447.0,15.8,2.2,5.9,5.2,2.592,0.030,3.8,86.700
4,금곡,2022-02-21,5.0,7.4,496.0,13.4,1.5,5.6,4.0,2.762,0.033,3.5,36.430
5,금곡,2022-02-15,5.7,8.0,488.0,13.8,2.4,5.6,5.6,3.035,0.029,3.6,70.820
...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,낙본N,2025-10-13,23.8,8.9,996.0,11.4,4.0,12.3,14.4,1.909,0.050,6.6,0.000
364,낙본M,2025-10-21,20.6,7.8,257.0,8.5,1.3,6.2,10.0,2.663,0.069,4.7,365.147
365,낙본N,2025-10-21,19.7,7.5,940.0,8.2,2.5,9.3,10.4,2.489,0.047,7.4,0.000
366,낙본M,2025-10-27,19.3,8.2,272.0,9.7,1.4,7.0,8.4,2.661,0.080,3.9,208.869
