# Air Pollution in Seoul

### Air Pollution Measurement Information in Seoul, Korea

https://www.airkorea.or.kr/web/realSearch?pMENU_NO=97

![image](../../Images/AirPollution.png)

### Library & Data Import

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../../Datasets/AirPollution.csv')

#### 년-월-일:시 컬럼을 pandas에서 인식할 수 있는 datetime 형태로 변경하라. 서울시의 제공데이터의 경우 0시가 24시로 표현된다

In [3]:
df = df.rename(columns = {'(년-월-일:시)' : 'date'})

df

Unnamed: 0,date,PM10등급,PM10,PM2.5등급,PM2.5,오존등급,오존,이산화질소등급,이산화질소,일산화탄소등급,일산화탄소,아황산가스등급,아황산가스
0,2021-05-15:15,보통,47.0,보통,19.0,좋음,0.017,좋음,0.023,좋음,0.4,좋음,0.003
1,2021-05-15:14,보통,43.0,보통,20.0,좋음,0.024,좋음,0.019,좋음,0.3,좋음,0.003
2,2021-05-15:13,보통,34.0,보통,24.0,보통,0.035,좋음,0.017,좋음,0.4,좋음,0.004
3,2021-05-15:12,보통,41.0,보통,27.0,보통,0.037,좋음,0.020,좋음,0.4,좋음,0.004
4,2021-05-15:11,보통,51.0,보통,34.0,보통,0.033,좋음,0.023,좋음,0.4,좋음,0.005
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1234,2021-03-25:05,보통,39.0,보통,18.0,좋음,0.026,좋음,0.025,좋음,0.4,좋음,0.003
1235,2021-03-25:04,보통,34.0,좋음,15.0,좋음,0.017,보통,0.033,좋음,0.4,좋음,0.002
1236,2021-03-25:03,보통,35.0,좋음,13.0,좋음,0.029,좋음,0.025,좋음,0.4,좋음,0.003
1237,2021-03-25:02,보통,35.0,좋음,13.0,보통,0.031,좋음,0.025,좋음,0.3,좋음,0.003


In [4]:
def change_date(x):
    import datetime
    hour = x.split(':')[1]
    date = x.split(":")[0]
    
    if hour =='24':
        hour ='00:00:00'
        
        FinalDate = pd.to_datetime(date +" "+hour) +datetime.timedelta(days=1)
        
    else:
        hour = hour +':00:00'
        FinalDate = pd.to_datetime(date +" "+hour)
    
    return FinalDate

df.loc[:,'date'] = df['date'].apply(change_date)

df

Unnamed: 0,date,PM10등급,PM10,PM2.5등급,PM2.5,오존등급,오존,이산화질소등급,이산화질소,일산화탄소등급,일산화탄소,아황산가스등급,아황산가스
0,2021-05-15 15:00:00,보통,47.0,보통,19.0,좋음,0.017,좋음,0.023,좋음,0.4,좋음,0.003
1,2021-05-15 14:00:00,보통,43.0,보통,20.0,좋음,0.024,좋음,0.019,좋음,0.3,좋음,0.003
2,2021-05-15 13:00:00,보통,34.0,보통,24.0,보통,0.035,좋음,0.017,좋음,0.4,좋음,0.004
3,2021-05-15 12:00:00,보통,41.0,보통,27.0,보통,0.037,좋음,0.020,좋음,0.4,좋음,0.004
4,2021-05-15 11:00:00,보통,51.0,보통,34.0,보통,0.033,좋음,0.023,좋음,0.4,좋음,0.005
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1234,2021-03-25 05:00:00,보통,39.0,보통,18.0,좋음,0.026,좋음,0.025,좋음,0.4,좋음,0.003
1235,2021-03-25 04:00:00,보통,34.0,좋음,15.0,좋음,0.017,보통,0.033,좋음,0.4,좋음,0.002
1236,2021-03-25 03:00:00,보통,35.0,좋음,13.0,좋음,0.029,좋음,0.025,좋음,0.4,좋음,0.003
1237,2021-03-25 02:00:00,보통,35.0,좋음,13.0,보통,0.031,좋음,0.025,좋음,0.3,좋음,0.003


#### 일자별 영어요일 이름을 dayName 컬럼에 저장하라

In [5]:
df.loc[:, 'dayName'] = df['date'].dt.day_name()

df

Unnamed: 0,date,PM10등급,PM10,PM2.5등급,PM2.5,오존등급,오존,이산화질소등급,이산화질소,일산화탄소등급,일산화탄소,아황산가스등급,아황산가스,dayName
0,2021-05-15 15:00:00,보통,47.0,보통,19.0,좋음,0.017,좋음,0.023,좋음,0.4,좋음,0.003,Saturday
1,2021-05-15 14:00:00,보통,43.0,보통,20.0,좋음,0.024,좋음,0.019,좋음,0.3,좋음,0.003,Saturday
2,2021-05-15 13:00:00,보통,34.0,보통,24.0,보통,0.035,좋음,0.017,좋음,0.4,좋음,0.004,Saturday
3,2021-05-15 12:00:00,보통,41.0,보통,27.0,보통,0.037,좋음,0.020,좋음,0.4,좋음,0.004,Saturday
4,2021-05-15 11:00:00,보통,51.0,보통,34.0,보통,0.033,좋음,0.023,좋음,0.4,좋음,0.005,Saturday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1234,2021-03-25 05:00:00,보통,39.0,보통,18.0,좋음,0.026,좋음,0.025,좋음,0.4,좋음,0.003,Thursday
1235,2021-03-25 04:00:00,보통,34.0,좋음,15.0,좋음,0.017,보통,0.033,좋음,0.4,좋음,0.002,Thursday
1236,2021-03-25 03:00:00,보통,35.0,좋음,13.0,좋음,0.029,좋음,0.025,좋음,0.4,좋음,0.003,Thursday
1237,2021-03-25 02:00:00,보통,35.0,좋음,13.0,보통,0.031,좋음,0.025,좋음,0.3,좋음,0.003,Thursday


#### 일자별 각 PM10등급의 빈도수를 파악하라

In [6]:
new_df = df.groupby(['dayName','PM10등급'],as_index=False).size()

new_df.head(10)

Unnamed: 0,dayName,PM10등급,size
0,Friday,나쁨,31
1,Friday,매우나쁨,17
2,Friday,보통,120
3,Friday,좋음,21
4,Monday,나쁨,1
5,Monday,매우나쁨,21
6,Monday,보통,83
7,Monday,좋음,63
8,Saturday,나쁨,31
9,Saturday,매우나쁨,27


#### 일자별 각 PM10등급의 빈도수를 파악하라

In [7]:
new_df.pivot(index='dayName',columns='PM10등급',values='size').fillna(0)

PM10등급,나쁨,매우나쁨,보통,좋음
dayName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Friday,31.0,17.0,120.0,21.0
Monday,1.0,21.0,83.0,63.0
Saturday,31.0,27.0,71.0,54.0
Sunday,2.0,1.0,67.0,98.0
Thursday,41.0,0.0,144.0,5.0
Tuesday,13.0,10.0,71.0,74.0
Wednesday,26.0,0.0,95.0,46.0


시간이 연속적으로 존재하며 결측치가 없는지 확인하라

In [8]:
# 시간을 차분했을 경우 첫 값은 nan, 이후 모든 차분값이 동일하면 연속이라 판단한다.
check = len(df['date'].diff().unique())

if check ==2:
    Ans =True
else:
    Ans = False

#### 오전 10시와 오후 10시(22시)의 PM10의 평균값을 각각 구하여라

In [9]:
df.groupby(df['date'].dt.hour).mean().iloc[[10,22],[0]]

Unnamed: 0_level_0,PM10
date,Unnamed: 1_level_1
10,70.384615
22,69.941176


#### 날짜 컬럼을 index로 만들어라

In [10]:
df.set_index('date',inplace=True,drop=True)

df

Unnamed: 0_level_0,PM10등급,PM10,PM2.5등급,PM2.5,오존등급,오존,이산화질소등급,이산화질소,일산화탄소등급,일산화탄소,아황산가스등급,아황산가스,dayName
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-05-15 15:00:00,보통,47.0,보통,19.0,좋음,0.017,좋음,0.023,좋음,0.4,좋음,0.003,Saturday
2021-05-15 14:00:00,보통,43.0,보통,20.0,좋음,0.024,좋음,0.019,좋음,0.3,좋음,0.003,Saturday
2021-05-15 13:00:00,보통,34.0,보통,24.0,보통,0.035,좋음,0.017,좋음,0.4,좋음,0.004,Saturday
2021-05-15 12:00:00,보통,41.0,보통,27.0,보통,0.037,좋음,0.020,좋음,0.4,좋음,0.004,Saturday
2021-05-15 11:00:00,보통,51.0,보통,34.0,보통,0.033,좋음,0.023,좋음,0.4,좋음,0.005,Saturday
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-25 05:00:00,보통,39.0,보통,18.0,좋음,0.026,좋음,0.025,좋음,0.4,좋음,0.003,Thursday
2021-03-25 04:00:00,보통,34.0,좋음,15.0,좋음,0.017,보통,0.033,좋음,0.4,좋음,0.002,Thursday
2021-03-25 03:00:00,보통,35.0,좋음,13.0,좋음,0.029,좋음,0.025,좋음,0.4,좋음,0.003,Thursday
2021-03-25 02:00:00,보통,35.0,좋음,13.0,보통,0.031,좋음,0.025,좋음,0.3,좋음,0.003,Thursday


#### 데이터를 주단위로 뽑아서 최소,최대 평균, 표준표차를 구하여라

In [11]:
new_df = df.select_dtypes(exclude='object')
new_df.resample('W').agg(['min','max','mean','std'])

Unnamed: 0_level_0,PM10,PM10,PM10,PM10,PM2.5,PM2.5,PM2.5,PM2.5,오존,오존,...,이산화질소,이산화질소,일산화탄소,일산화탄소,일산화탄소,일산화탄소,아황산가스,아황산가스,아황산가스,아황산가스
Unnamed: 0_level_1,min,max,mean,std,min,max,mean,std,min,max,...,mean,std,min,max,mean,std,min,max,mean,std
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-03-28,6.0,160.0,72.747368,43.345462,4.0,113.0,44.705263,29.551928,0.002,0.085,...,0.044579,0.023722,0.3,1.4,0.611579,0.232408,0.002,0.006,0.003274,0.000961
2021-04-04,3.0,598.0,97.14881,129.911976,1.0,120.0,23.168675,22.399578,0.003,0.055,...,0.027929,0.014978,0.3,0.9,0.445833,0.135741,0.002,0.004,0.002732,0.000541
2021-04-11,17.0,102.0,41.059524,16.325911,7.0,70.0,21.761905,11.479343,0.009,0.07,...,0.022583,0.009562,0.3,0.7,0.389286,0.087573,0.002,0.004,0.002744,0.000569
2021-04-18,3.0,367.0,48.180723,43.254468,2.0,38.0,17.066265,7.867952,0.002,0.07,...,0.023753,0.013553,0.3,0.6,0.386747,0.084954,0.002,0.004,0.002464,0.000579
2021-04-25,17.0,126.0,55.119048,26.659936,7.0,61.0,26.392857,13.094788,0.006,0.09,...,0.028571,0.01464,0.3,0.8,0.457143,0.122142,0.001,0.011,0.003631,0.001763
2021-05-02,3.0,97.0,40.612121,24.813103,1.0,43.0,16.644578,8.850965,0.003,0.064,...,0.020428,0.011676,0.3,0.6,0.392771,0.092485,0.001,0.006,0.002524,0.000768
2021-05-09,8.0,1024.0,161.660714,239.679148,3.0,172.0,34.738095,39.788248,0.002,0.073,...,0.024187,0.012371,0.3,1.0,0.419277,0.10323,0.002,0.004,0.002771,0.000579
2021-05-16,16.0,111.0,40.014815,21.876855,7.0,76.0,21.577778,15.622633,0.004,0.123,...,0.030793,0.009503,0.3,0.8,0.440741,0.094075,0.001,0.006,0.002459,0.001696
