In [1]:
from __future__ import division
from numpy.random import randn
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
np.random.seed(12345)
plt.rc('figure', figsize=(10, 6))
from pandas import Series, DataFrame
import pandas
import pandas as pd
np.set_printoptions(precision=4, threshold=500)
pd.options.display.max_rows = 100
import json
from datetime import datetime
from dateutil.parser import parse

# 종관기상관측

종관기상관측장비(ASOS)로부터 관측된 자료의 연(年)단위 파일을 조회하고 다운로드 받으실 수 있습니다.
종관기상관측이란 정해진 시각의 대기 상태를 파악하기 위해 모든 관측소에서 같은 시각에 실시하는 지상관측을 말하며, 관측방법은 기압, 기온, 풍향, 풍속, 상대습도, 강수량, 강수유무, 일사량, 일조시간, 지면온도, 초상온도, 지중온도, 토양수분, 지하수위 14개 요소에 대해서는 자동으로 관측하고, 시정, 구름, 증발량, 일기현상 등은 일부 자동과 목측(目測)으로 관측합니다.

## 2015.1.1~ 2017.12.31 의 시간당 날씨 데이터 조회

In [2]:
df2015 = pd.read_csv('origen/SURFACE_ASOS_112_HR_2015_2015_2016.csv', encoding="euc-kr")
df2016 = pd.read_csv('origen/SURFACE_ASOS_112_HR_2016_2016_2017.csv', encoding="euc-kr")
df2017 = pd.read_csv('origen/SURFACE_ASOS_112_HR_2017_2017_2018.csv', encoding="euc-kr")
df = pd.merge(df2015,df2016,how ='outer')
df = pd.merge(df,df2017,how='outer')
df.to_csv('result/toal.csv', index=False)
df

Unnamed: 0,지점,일시,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),이슬점온도(°C),현지기압(hPa),...,운형(운형약어),최저운고(100m ),시정(10m),지면상태(지면상태코드),현상번호(국내식),지면온도(°C),5cm 지중온도(°C),10cm 지중온도(°C),20cm 지중온도(°C),30cm 지중온도(°C)
0,112,2015-01-01 00:00,-6.1,,10.2,360.0,47.0,1.8,-15.6,1014.1,...,,,2000.0,,,-1.2,-0.3,-0.4,1.1,1.7
1,112,2015-01-01 01:00,-6.5,,7.3,360.0,52.0,2.0,-14.7,1014.2,...,,,,,,-1.5,-0.4,-0.4,1.1,1.7
2,112,2015-01-01 02:00,-6.9,,7.6,360.0,52.0,1.9,-15.1,1014.0,...,,,,,,-1.9,-0.6,-0.5,1.1,1.7
3,112,2015-01-01 03:00,-7.1,,8.4,360.0,53.0,1.9,-15.0,1014.3,...,,,1800.0,4.0,,-2.2,-0.8,-0.7,1.1,1.7
4,112,2015-01-01 04:00,-7.7,,8.2,360.0,49.0,1.7,-16.5,1014.5,...,,,1600.0,,,-2.4,-0.9,-0.8,1.1,1.7
5,112,2015-01-01 05:00,-8.0,,10.1,360.0,46.0,1.5,-17.6,1014.3,...,,,1400.0,,,-2.6,-1.0,-0.9,1.1,1.7
6,112,2015-01-01 06:00,-8.2,,8.0,360.0,51.0,1.7,-16.5,1014.5,...,,,1200.0,,42.0,-2.9,-1.2,-1.0,1.0,1.6
7,112,2015-01-01 07:00,-8.4,,8.3,340.0,55.0,1.8,-15.8,1014.9,...,,,1500.0,,42.0,-3.1,-1.3,-1.2,1.0,1.6
8,112,2015-01-01 08:00,-8.7,,7.1,360.0,56.0,1.8,-15.9,1015.6,...,,,2000.0,,,-3.2,-1.5,-1.3,1.0,1.6
9,112,2015-01-01 09:00,-8.4,,6.7,340.0,56.0,1.8,-15.6,1016.1,...,,,2000.0,4.0,,-3.0,-1.4,-1.2,1.1,1.7


# 총 데이터의 스키마

In [3]:
df.columns

Index(['지점', '일시', '기온(°C)', '강수량(mm)', '풍속(m/s)', '풍향(16방위)', '습도(%)',
       '증기압(hPa)', '이슬점온도(°C)', '현지기압(hPa)', '해면기압(hPa)', '일조(hr)',
       '일사(MJ/m2)', '적설(cm)', '3시간신적설(cm)', '전운량(10분위)', '중하층운량(10분위)',
       '운형(운형약어)', '최저운고(100m )', '시정(10m)', '지면상태(지면상태코드)', '현상번호(국내식)',
       '지면온도(°C)', '5cm 지중온도(°C)', '10cm 지중온도(°C)', '20cm 지중온도(°C)',
       '30cm 지중온도(°C)'],
      dtype='object')

## 일조, 일사량에 따른 기온과 지면온도의 상관관계

### 필요한 데이터 분리

In [4]:
ex1_df = df.drop(df.columns[[0,4,5,6,7,8,9,10,13,14,15,16,17,18,19,20,21,23,24,25,26]],axis=1)
ex1_df

Unnamed: 0,일시,기온(°C),강수량(mm),일조(hr),일사(MJ/m2),지면온도(°C)
0,2015-01-01 00:00,-6.1,,,,-1.2
1,2015-01-01 01:00,-6.5,,,,-1.5
2,2015-01-01 02:00,-6.9,,,,-1.9
3,2015-01-01 03:00,-7.1,,,,-2.2
4,2015-01-01 04:00,-7.7,,,,-2.4
5,2015-01-01 05:00,-8.0,,,,-2.6
6,2015-01-01 06:00,-8.2,,,,-2.9
7,2015-01-01 07:00,-8.4,,,,-3.1
8,2015-01-01 08:00,-8.7,,0.0,0.00,-3.2
9,2015-01-01 09:00,-8.4,,0.8,0.21,-3.0


### NaN값을 0로 치환

In [5]:
ex1_df=ex1_df.fillna(0)
ex1_df

Unnamed: 0,일시,기온(°C),강수량(mm),일조(hr),일사(MJ/m2),지면온도(°C)
0,2015-01-01 00:00,-6.1,0.0,0.0,0.00,-1.2
1,2015-01-01 01:00,-6.5,0.0,0.0,0.00,-1.5
2,2015-01-01 02:00,-6.9,0.0,0.0,0.00,-1.9
3,2015-01-01 03:00,-7.1,0.0,0.0,0.00,-2.2
4,2015-01-01 04:00,-7.7,0.0,0.0,0.00,-2.4
5,2015-01-01 05:00,-8.0,0.0,0.0,0.00,-2.6
6,2015-01-01 06:00,-8.2,0.0,0.0,0.00,-2.9
7,2015-01-01 07:00,-8.4,0.0,0.0,0.00,-3.1
8,2015-01-01 08:00,-8.7,0.0,0.0,0.00,-3.2
9,2015-01-01 09:00,-8.4,0.0,0.8,0.21,-3.0


### 일조량이 없는 row를 빼고, column  일조X일사, 온도차를 생성

In [6]:
ex1_df= ex1_df[(ex1_df['일조(hr)']>0)&(ex1_df['일사(MJ/m2)']>0)]
ex1_df['일조X일사']= ex1_df['일조(hr)']*ex1_df['일사(MJ/m2)']
ex1_df['온도차']=ex1_df['지면온도(°C)']-ex1_df['기온(°C)']
ex1_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,일시,기온(°C),강수량(mm),일조(hr),일사(MJ/m2),지면온도(°C),일조X일사,온도차
9,2015-01-01 09:00,-8.4,0.0,0.8,0.21,-3.0,0.168,5.4
10,2015-01-01 10:00,-8.1,0.0,1.0,0.67,-2.5,0.670,5.6
11,2015-01-01 11:00,-7.6,0.0,1.0,1.10,-1.4,1.100,6.2
12,2015-01-01 12:00,-6.9,0.0,1.0,1.41,-0.6,1.410,6.3
13,2015-01-01 13:00,-6.1,0.0,1.0,1.53,-0.3,1.530,5.8
14,2015-01-01 14:00,-5.5,0.0,1.0,1.45,-0.2,1.450,5.3
15,2015-01-01 15:00,-5.3,0.0,1.0,1.22,-0.2,1.220,5.1
16,2015-01-01 16:00,-5.3,0.0,1.0,0.80,-0.1,0.800,5.2
17,2015-01-01 17:00,-5.6,0.0,1.0,0.36,-0.2,0.360,5.4
18,2015-01-01 18:00,-6.1,0.0,0.3,0.02,-1.2,0.006,4.9


### 12:00시를 기준으로 일사X일조량이 있는 행만 추출하는 과정

In [7]:
time = []
for x in ex1_df['일시']:
    if datetime.strptime(x, '%Y-%m-%d %H:%M').hour==12:
        date=datetime.strptime(x, '%Y-%m-%d %H:%M').hour
        time.append(x)
        
time = DataFrame({'일시':time})
time

Unnamed: 0,일시
0,2015-01-01 12:00
1,2015-01-02 12:00
2,2015-01-03 12:00
3,2015-01-04 12:00
4,2015-01-05 12:00
5,2015-01-06 12:00
6,2015-01-07 12:00
7,2015-01-08 12:00
8,2015-01-09 12:00
9,2015-01-10 12:00


### 데이터 처리 (일조량이 1시간동안 1MJ이상 영향을 끼친 경우)

In [8]:
test =ex1_df
test=pd.merge(test,time)
#가독성 증가
test=test.drop(test.columns[[2,3,4]],axis=1)
test=test[test['일조X일사']>1]
test.to_csv('result/ex1.csv', index=False, encoding="euc-kr")
test

Unnamed: 0,일시,기온(°C),지면온도(°C),일조X일사,온도차
0,2015-01-01 12:00,-6.9,-0.6,1.410,6.3
1,2015-01-02 12:00,-4.1,-0.5,1.370,3.6
2,2015-01-03 12:00,0.2,-0.4,1.330,-0.6
4,2015-01-05 12:00,6.4,0.1,1.120,-6.3
5,2015-01-06 12:00,-3.5,0.2,1.490,3.7
6,2015-01-07 12:00,-4.1,-0.3,1.450,3.8
7,2015-01-08 12:00,-3.5,-0.4,1.420,3.1
8,2015-01-09 12:00,0.5,-0.3,1.010,-0.8
9,2015-01-10 12:00,3.6,-0.3,1.400,-3.9
10,2015-01-11 12:00,-0.2,0.0,1.400,0.2


# 비가 12시간 이상 오는 날짜 찾기

### 데이터를 일시와 강수량으로만 분리

In [10]:
ex2_df = df.drop(df.columns[[0,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26]],axis=1)
ex2_df=ex2_df.fillna(0)
#ex2_df=ex2_df[ex2_df['강수량(mm)'] > 0]
ex2_df

Unnamed: 0,일시,강수량(mm)
0,2015-01-01 00:00,0.0
1,2015-01-01 01:00,0.0
2,2015-01-01 02:00,0.0
3,2015-01-01 03:00,0.0
4,2015-01-01 04:00,0.0
5,2015-01-01 05:00,0.0
6,2015-01-01 06:00,0.0
7,2015-01-01 07:00,0.0
8,2015-01-01 08:00,0.0
9,2015-01-01 09:00,0.0


### 연속으로 12시간 이상 비가 온 날들을 뽑고,  연속으로 비가 내린 시간 행을 추가

In [11]:
rain = []
raincounter = []
counter =0
i=0
for x in ex2_df['강수량(mm)']:
    if x>0:
        counter = counter+1
    else :
        if counter >12:
            rain.append(ex2_df['일시'][i-1])
            raincounter.append(counter)
            #rain.append(x)
        counter =0
    i=i+1


rain= DataFrame({'비가 내린 시간':raincounter,'일시':rain})
rain


Unnamed: 0,비가 내린 시간,일시
0,14,2015-04-14 20:00
1,21,2015-04-20 06:00
2,15,2015-06-26 09:00
3,21,2015-07-12 20:00
4,15,2015-07-13 12:00
5,20,2015-07-24 16:00
6,15,2016-04-17 06:00
7,17,2016-05-03 11:00
8,13,2016-05-16 02:00
9,13,2016-07-02 00:00


### 데이터결합

In [12]:
test2 =ex2_df
test2=pd.merge(test2,rain)
#가독성 증가
#test=test.drop(test.columns[[2,3,4]],axis=1)
test2.to_csv('result/ex2.csv', index=False, encoding="euc-kr")
test2

Unnamed: 0,일시,강수량(mm),비가 내린 시간
0,2015-04-14 20:00,0.4,14
1,2015-04-20 06:00,0.4,21
2,2015-06-26 09:00,0.2,15
3,2015-07-12 20:00,0.3,21
4,2015-07-13 12:00,0.1,15
5,2015-07-24 16:00,0.3,20
6,2016-04-17 06:00,0.1,15
7,2016-05-03 11:00,0.7,17
8,2016-05-16 02:00,0.2,13
9,2016-07-02 00:00,0.1,13


## 계절에 따른 바람의 방향
## 3,4,5 봄 / 6,7,8 여름 / 9,10,11 가을 / 12,1,2 겨울

### 데이터를 일시와 풍향으로만 분리

In [16]:
ex3_df = df.drop(df.columns[[0,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26]],axis=1)
ex3_df=ex3_df.fillna(0)
#ex2_df=ex2_df[ex2_df['강수량(mm)'] > 0]
ex3_df

Unnamed: 0,일시,풍향(16방위)
0,2015-01-01 00:00,360.0
1,2015-01-01 01:00,360.0
2,2015-01-01 02:00,360.0
3,2015-01-01 03:00,360.0
4,2015-01-01 04:00,360.0
5,2015-01-01 05:00,360.0
6,2015-01-01 06:00,360.0
7,2015-01-01 07:00,340.0
8,2015-01-01 08:00,360.0
9,2015-01-01 09:00,340.0


### 통합 데이터를 계절별로 분리 (3,4,5 봄/6,7,8 여름/9,10,11 가을/12,1,2 겨울

In [17]:
spring = []
summer = []
fall = []
winter = []

for x in ex3_df['일시']:
    if (datetime.strptime(x, '%Y-%m-%d %H:%M').month>2) and (datetime.strptime(x, '%Y-%m-%d %H:%M').month<6):
        spring.append(x)
    elif (datetime.strptime(x, '%Y-%m-%d %H:%M').month>5) and (datetime.strptime(x, '%Y-%m-%d %H:%M').month<9):
        summer.append(x)
    elif (datetime.strptime(x, '%Y-%m-%d %H:%M').month>8) and (datetime.strptime(x, '%Y-%m-%d %H:%M').month<12):
        fall.append(x)
    else :
        winter.append(x)
        
spring = DataFrame({'일시':spring})
summer = DataFrame({'일시':summer})
fall = DataFrame({'일시':fall})
winter = DataFrame({'일시':winter})

spring=pd.merge(ex3_df,spring)
summer=pd.merge(ex3_df,summer)
fall=pd.merge(ex3_df,fall)
winter=pd.merge(ex3_df,winter)

### 각 데이터프레임에서 풍향을 value_counts 함

In [18]:
spring=DataFrame({'봄':spring['풍향(16방위)'].value_counts()})
summer=DataFrame({'여름':summer['풍향(16방위)'].value_counts()})
fall=DataFrame({'가을':fall['풍향(16방위)'].value_counts()})
winter=DataFrame({'겨울':winter['풍향(16방위)'].value_counts()})

In [19]:
spring

Unnamed: 0,봄
250.0,972
230.0,961
340.0,774
270.0,683
200.0,527
360.0,410
20.0,381
180.0,355
320.0,310
290.0,309


In [20]:
summer

Unnamed: 0,여름
250.0,979
270.0,890
230.0,771
340.0,728
200.0,452
290.0,325
180.0,320
360.0,311
20.0,310
140.0,263


In [21]:
fall

Unnamed: 0,가을
20.0,1083
360.0,938
340.0,893
50.0,420
160.0,374
90.0,372
270.0,317
320.0,315
140.0,306
290.0,274


In [22]:
winter

Unnamed: 0,겨울
340.0,1423
20.0,1040
360.0,994
160.0,377
320.0,366
180.0,336
230.0,233
290.0,230
270.0,228
200.0,224


### 봄 여름 가을 겨울을 한개에 데이터 프레임으로 결합하고, 결과파일 출력

In [27]:
result=spring.join(summer,how='outer')
result=result.join(fall,how='outer')
result=result.join(winter,how='outer')
result.to_csv('result/ex3.csv',index_label='풍향(16방위)', encoding="euc-kr")
result

Unnamed: 0,봄,여름,가을,겨울
0.0,79,163,130,77
20.0,381,310,1083,1040
50.0,175,139,420,218
70.0,45,58,211,53
90.0,120,214,372,143
110.0,97,211,248,148
140.0,170,263,306,198
160.0,256,248,374,377
180.0,355,320,240,336
200.0,527,452,125,224
