# 서울시 기간별 시간평균 대기환경 정보 데이터 분석
- 데이터 제공 (서울 열린데이터 광장, 서울시 기간별 시간평균 대기환경 정보)
- > https://data.seoul.go.kr/dataList/datasetView.do?infId=OA-2221&srvType=S&serviceKind=1&currentPageNo=1

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

### 대기환경 정보 데이터를 불러옵니다.

In [3]:
df = pd.read_csv('./data/서울시 기간별 시간평균 대기환경 정보.csv', encoding = 'cp949')

df.head()

Unnamed: 0,시간,권역코드,권역명,측정소코드,측정소명,미세먼지 1시간(㎍/㎥),미세먼지 24시간(㎍/㎥),초미세먼지(㎍/㎥),오존(ppm),이산화질소농도(ppm),일산화탄소농도(ppm),아황산가스농도(ppm)
0,201908161200,100,도심권,111123,종로구,15,12,9,0.022,0.018,0.4,0.004
1,201908161200,100,도심권,111131,용산구,13,11,9,0.029,0.018,0.4,0.003
2,201908161200,100,도심권,111121,중구,16,12,9,0.024,0.018,0.1,0.002
3,201908161200,101,서북권,111201,마포구,0,0,13,0.027,0.012,0.6,0.004
4,201908161200,101,서북권,111181,은평구,20,18,13,0.027,0.017,0.4,0.004


In [4]:
# 컬럼정보를 확인하세요

df.columns

Index(['시간', '권역코드', '권역명', '측정소코드', '측정소명', '미세먼지 1시간(㎍/㎥)', '미세먼지 24시간(㎍/㎥)',
       '초미세먼지(㎍/㎥)', '오존(ppm)', '이산화질소농도(ppm)', '일산화탄소농도(ppm)',
       '아황산가스농도(ppm)'],
      dtype='object')

In [6]:
# 시간 컬럼의 이름을 측정일시로 바꿔주세요

df.rename(columns = {'시간':'측정일시'}, inplace = True)
df.head()

Unnamed: 0,측정일시,권역코드,권역명,측정소코드,측정소명,미세먼지 1시간(㎍/㎥),미세먼지 24시간(㎍/㎥),초미세먼지(㎍/㎥),오존(ppm),이산화질소농도(ppm),일산화탄소농도(ppm),아황산가스농도(ppm)
0,201908161200,100,도심권,111123,종로구,15,12,9,0.022,0.018,0.4,0.004
1,201908161200,100,도심권,111131,용산구,13,11,9,0.029,0.018,0.4,0.003
2,201908161200,100,도심권,111121,중구,16,12,9,0.024,0.018,0.1,0.002
3,201908161200,101,서북권,111201,마포구,0,0,13,0.027,0.012,0.6,0.004
4,201908161200,101,서북권,111181,은평구,20,18,13,0.027,0.017,0.4,0.004


In [7]:
# df의 정보를 조회하세요

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36925 entries, 0 to 36924
Data columns (total 12 columns):
측정일시              36925 non-null int64
권역코드              36925 non-null int64
권역명               36925 non-null object
측정소코드             36925 non-null int64
측정소명              36925 non-null object
미세먼지 1시간(㎍/㎥)     36925 non-null int64
미세먼지 24시간(㎍/㎥)    36925 non-null int64
초미세먼지(㎍/㎥)        36925 non-null int64
오존(ppm)           36464 non-null float64
이산화질소농도(ppm)      36595 non-null float64
일산화탄소농도(ppm)      36472 non-null float64
아황산가스농도(ppm)      36580 non-null float64
dtypes: float64(4), int64(6), object(2)
memory usage: 3.4+ MB


In [91]:
# 측정일시 컬럼의 data type을 datetime으로 바꾸고 다시 df 정보를 확인합니다.

df['측정일시']= pd.to_datetime(df['측정일시'],format='%Y%m%d%H%M')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36925 entries, 0 to 36924
Data columns (total 12 columns):
측정일시              36925 non-null datetime64[ns]
권역코드              36925 non-null int64
권역명               36925 non-null object
측정소코드             36925 non-null int64
측정소명              36925 non-null object
미세먼지 1시간(㎍/㎥)     36925 non-null int64
미세먼지 24시간(㎍/㎥)    36925 non-null int64
초미세먼지(㎍/㎥)        36925 non-null int64
오존(ppm)           36464 non-null float64
이산화질소농도(ppm)      36595 non-null float64
일산화탄소농도(ppm)      36472 non-null float64
아황산가스농도(ppm)      36580 non-null float64
dtypes: datetime64[ns](1), float64(4), int64(5), object(2)
memory usage: 3.4+ MB


In [9]:
# df에 누락데이터가 있는지 컬럼마다 누락데이터의 개수를 확인합니다.

df[:].isnull().sum()

측정일시                0
권역코드                0
권역명                 0
측정소코드               0
측정소명                0
미세먼지 1시간(㎍/㎥)       0
미세먼지 24시간(㎍/㎥)      0
초미세먼지(㎍/㎥)          0
오존(ppm)           461
이산화질소농도(ppm)      330
일산화탄소농도(ppm)      453
아황산가스농도(ppm)      345
dtype: int64

In [10]:
# df의 오존(ppm) 컬럼에 누락데이터를 포함하고 있는 행의 측정일시와 측정소코드, 오존(ppm) 만 출력해보세요.

null_index = df['오존(ppm)'].isnull()
df[null_index][['측정일시','측정소코드','오존(ppm)']]

Unnamed: 0,측정일시,측정소코드,오존(ppm)
1699,201908131700,111273,
1722,201908131600,111273,
1732,201908131500,111171,
1747,201908131500,111273,
1758,201908131400,111171,
...,...,...,...
35962,201906171400,111142,
35970,201906171400,111251,
35987,201906171300,111152,
36047,201906171100,111262,


In [94]:
# df에 있는 누락 데이터들을 0으로 바꿔주세요



df

Unnamed: 0,측정일시,권역코드,권역명,측정소코드,측정소명,미세먼지 1시간(㎍/㎥),미세먼지 24시간(㎍/㎥),초미세먼지(㎍/㎥),오존(ppm),이산화질소농도(ppm),일산화탄소농도(ppm),아황산가스농도(ppm)
0,2019-08-16 12:00:00,100,도심권,111123,종로구,15,12,9,0.022,0.018,0.4,0.004
1,2019-08-16 12:00:00,100,도심권,111131,용산구,13,11,9,0.029,0.018,0.4,0.003
2,2019-08-16 12:00:00,100,도심권,111121,중구,16,12,9,0.024,0.018,0.1,0.002
3,2019-08-16 12:00:00,101,서북권,111201,마포구,0,0,13,0.027,0.012,0.6,0.004
4,2019-08-16 12:00:00,101,서북권,111181,은평구,20,18,13,0.027,0.017,0.4,0.004
...,...,...,...,...,...,...,...,...,...,...,...,...
36920,2019-06-16 00:00:00,103,서남권,111241,동작구,22,27,12,0.021,0.029,0.4,0.002
36921,2019-06-16 00:00:00,104,동남권,111262,서초구,19,27,14,0.038,0.012,0.2,0.003
36922,2019-06-16 00:00:00,104,동남권,111273,송파구,27,31,15,0.016,0.032,0.4,0.002
36923,2019-06-16 00:00:00,104,동남권,111261,강남구,16,21,9,0.030,0.013,0.3,0.005


In [11]:
# 초 미세먼지 농도가 높은 순으로 정렬하여 10개만 출력해보세요.

df.sort_values(by = ['초미세먼지(㎍/㎥)'], ascending = False).head(10)

Unnamed: 0,측정일시,권역코드,권역명,측정소코드,측정소명,미세먼지 1시간(㎍/㎥),미세먼지 24시간(㎍/㎥),초미세먼지(㎍/㎥),오존(ppm),이산화질소농도(ppm),일산화탄소농도(ppm),아황산가스농도(ppm)
17933,201907171500,102,동북권,111142,성동구,128,76,98,0.057,0.043,0.6,0.005
17860,201907171800,102,동북권,111142,성동구,107,90,92,0.093,0.026,0.6,0.008
17883,201907171700,102,동북권,111142,성동구,115,88,91,0.084,0.03,0.6,0.006
17475,201907180900,100,도심권,111131,용산구,0,66,91,0.022,0.035,0.7,0.004
17908,201907171600,102,동북권,111142,성동구,118,83,90,0.085,0.031,0.6,0.005
17461,201907181000,102,동북권,111142,성동구,117,83,90,0.031,0.038,0.7,0.006
17491,201907180900,103,서남권,111241,동작구,99,80,87,0.012,0.033,0.6,0.004
17517,201907180800,103,서남권,111241,동작구,105,78,87,0.008,0.037,0.6,0.003
17436,201907181100,102,동북권,111142,성동구,92,83,86,0.057,0.034,0.7,0.005
17443,201907181100,103,서남권,111212,강서구,105,68,86,0.052,0.032,0.7,0.005


In [12]:
# 미세먼지 24시간 농도가 낮은 순으로 정렬하여 마지막 10개만 출력해보세요.

df.sort_values(by = '미세먼지 24시간(㎍/㎥)', ascending = True).tail(10)

Unnamed: 0,측정일시,권역코드,권역명,측정소코드,측정소명,미세먼지 1시간(㎍/㎥),미세먼지 24시간(㎍/㎥),초미세먼지(㎍/㎥),오존(ppm),이산화질소농도(ppm),일산화탄소농도(ppm),아황산가스농도(ppm)
17785,201907172100,102,동북권,111142,성동구,80,86,72,0.064,0.02,0.5,0.005
17684,201907180100,102,동북권,111142,성동구,94,86,79,0.03,0.035,0.7,0.004
17657,201907180200,102,동북권,111142,성동구,98,86,86,0.028,0.032,0.7,0.003
17813,201907172000,102,동북권,111142,성동구,92,86,75,0.065,0.028,0.6,0.007
17447,201907181100,104,동남권,111261,강남구,84,87,61,0.033,0.03,0.7,0.004
17709,201907180000,102,동북권,111142,성동구,93,87,80,0.044,0.022,0.6,0.004
17424,201907181200,104,동남권,111261,강남구,83,88,65,0.046,0.029,0.7,0.006
17883,201907171700,102,동북권,111142,성동구,115,88,91,0.084,0.03,0.6,0.006
17832,201907171900,102,동북권,111142,성동구,92,88,83,0.084,0.026,0.6,0.008
17860,201907171800,102,동북권,111142,성동구,107,90,92,0.093,0.026,0.6,0.008
