In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 공휴일 API로 2013~2019.03.31 공휴일 데이터 받아오기 
import requests
from bs4 import BeautifulSoup
import time
import datetime

def get_request_query(url, operation, params, serviceKey):
    import urllib.parse as urlparse
    params = urlparse.urlencode(params)
    request_query = url + '/' + operation + '?' + params + '&' + 'serviceKey' + '=' + serviceKey
    return request_query

# url 불러오기
URL = 'http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService'
OPERATION = 'getHoliDeInfo' 

# 서비스키 입력
SERVICEKEY = "개인정보 모자이크"
def year_month(year, month):
    solYear  = str(year)  
    if month >= 10:
        solMonth = str(month)    
    else:
        solMonth = '0'+str(month)
    PARAMS = {'solYear':solYear, 'solMonth':solMonth}


    request_query = get_request_query(URL, OPERATION, PARAMS, SERVICEKEY)
    print('request_query:', request_query)
    response = requests.get(url=request_query)
    print('status_code:' + str(response.status_code))
    
    bsObj = BeautifulSoup(response.text, 'lxml')
    datename = bsObj.find_all("datename")
    datename = bsObj.find_all("datename")
    datename = list(map(lambda x : x.get_text(),datename))
    locdate = bsObj.find_all("locdate")
    locdate = list(map(lambda x : x.get_text(),locdate))
    df = pd.DataFrame(zip(locdate,datename))
    return(df)

df = pd.DataFrame()
for i in  range(2013,2020):
    for j in range(1,13):
        df=df.append(year_month(i,j) , ignore_index=True)

df.rename(columns = {0: "date", 1:"holiday"},inplace=True)

In [3]:
df['date'] = df['date'].str[0:4]+'-'+ df['date'].str[4:6]+'-'+df['date'].str[6:8] # API날짜 연월일 형식 변경
df = df[~df.date.str.contains("07-17",na=False)].reset_index(drop=True) # 제헌절이 API데이타에 있어 삭제 
df = df.drop_duplicates(['date'],keep='last') #2017/10/03 개천절이자 추석. 이런날 제거 
df = df[:106] #train_public 날짜인 2019-03-31 전데이타만 자름 

In [4]:
df1 = df.assign(holi=np.nan)  # holi 빈칸열 만들기 (공휴일이름은 0,1로 바꾸기 위해 )

In [5]:
df1['holi'] = df1['holi'].fillna(1) #모두 1로 채우기 
df1 = df1[['date','holi']] # date랑 holi열만 가져오기 
df1

Unnamed: 0,date,holi
0,2013-01-01,1.0
1,2013-02-09,1.0
2,2013-02-10,1.0
3,2013-02-11,1.0
4,2013-03-01,1.0
...,...,...
102,2019-01-01,1.0
103,2019-02-04,1.0
104,2019-02-05,1.0
105,2019-02-06,1.0


In [6]:
# 2013-2019까지 날짜 뽑아서 주말은 1로 만들기 
date = pd.DataFrame(pd.date_range(start='2013-01-01', end='2019-03-31',  freq='D', name='date'))
date['date'] = pd.to_datetime(date['date']) 
date['dayname'] = date['date'].dt.weekday # 월~일을 0~6으로 맵핑 

In [7]:
date['dayname']= date['dayname'].replace({1:0,2:0, 3:0, 4:0, 5:1,6:1}) # 토,일만 1로 하고 나머지는 0으로 바꾸기 
date['date']= date['date'].astype(str) 
df2 = pd.merge(date,df1,how='outer',on='date')  # 공휴일과 주말 데이타 merge 
df2 = df2.fillna(0)  # 공휴일을 뺀 나머지 날 0으로 바꾸기 

In [8]:
df2

Unnamed: 0,date,dayname,holi
0,2013-01-01,0,1.0
1,2013-01-02,0,0.0
2,2013-01-03,0,0.0
3,2013-01-04,0,0.0
4,2013-01-05,1,0.0
...,...,...,...
2276,2019-03-27,0,0.0
2277,2019-03-28,0,0.0
2278,2019-03-29,0,0.0
2279,2019-03-30,1,0.0


In [9]:
df2.to_csv('./make_data/0A_holiday.csv',index=False) 