In [1]:

# API 호출
# ## https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057440
from urllib.request import Request, urlopen
# Request https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.Request
# urlopen https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.urlopen
from urllib.parse import urlencode, quote_plus
# urlenocde https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.urlencode
# quote_plus https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.quote_plus
from urllib.parse import unquote
import requests

# 시간 생성
import time
import datetime
from pytz import timezone, utc

# 폴더 자동 생성
import os

# 코드 스케줄링
import threading
import schedule

import pandas as pd

# XML 파싱
import xml.etree.ElementTree as ET

In [2]:
def getOnlyDirectory(index):
    
    global subCategoryDict, category
    rootDirectory = os.getcwd()
    
    subCategory = subCategoryDict[index]
    directory = rootDirectory + category + subCategory
    return directory

In [3]:
def getDirectory(index, routeId):
    global subCategoryDict, category
    rootDirectory = os.getcwd()
    # category = '/TEST'
    subCategory = subCategoryDict[index]
    directory = rootDirectory + category + subCategory + str(routeId) + '/'
    return directory

In [4]:
def getToday():
    todayDate = scheduleDict['today'].strftime('%Y-%m-%d')
    return todayDate

In [5]:
def makeTextFile(subcategory, routeId, Dict):
    global scheduledict
    folderPath = getDirectory(subcategory, routeId)
    todayDate = scheduleDict[routeId]['today'].strftime("%Y-%m-%d")
    filePath = folderPath + todayDate + '.txt'

    if(os.path.isfile(filePath)):
        # file = open(filePath, 'a', encoding = 'utf-8-sig', newline = '\n')
        # print('파일이 존재합니다.')
        pass

    else:
        createFolder(folderPath)
        file = open(filePath, 'w', encoding = 'utf-8-sig', newline = '\n')
        file.write(str(routeId))
        file.write(' ')
        file.write(getNowTime().isoformat())
        file.write('\n')
        # print('파일을 생성하였습니다.')
        file.close()
    
    Dict[routeId] = filePath
    
    return Dict

In [6]:
def makeOnlyTextFile(Dict, subcategory):
    global scheduledict, logFileDict
    folderPath = getOnlyDirectory(subcategory)
    todayDate = scheduleDict['today'].strftime("%Y-%m-%d")
    filePath = folderPath + todayDate + '.txt'

    if(os.path.isfile(filePath)):
        # file = open(filePath, 'a', encoding = 'utf-8-sig', newline = '\n')
        # print('파일이 존재합니다.')
        pass

    else:
        createFolder(folderPath)
        file = open(filePath, 'w', encoding = 'utf-8-sig', newline = '\n')
        file.write(getNowTime().isoformat())
        file.write('\n')
        # print('파일을 생성하였습니다.')
        file.close()
    
    Dict['today'] = filePath
    
    return Dict

In [7]:
# 폴더 자동 생성 함수
# https://data-make.tistory.com/170
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

In [8]:
# 테스트 타임 생성기
# https://qastack.kr/programming/993358/creating-a-range-of-dates-in-python
def getSampleTime(count):
    baseTime = datetime.datetime.today()
    numhours = count
    dateList = [baseTime + datetime.timedelta(hours=x) for x in range(count)]
    return dateList

In [9]:
def getRouteIdList():
    folderPath = getOnlyDirectory('rid')
    fileName = 'routeIdList.txt'
    filePath = folderPath + fileName

    routeIdList = []
    if(os.path.isfile(filePath)):
        routeIdFile = open(filePath, 'r', encoding = 'utf-8-sig', newline = '\n')
        # print('파일이 존재합니다.')

    else:
        createFolder(folderPath)
        routeIdFile = open(filePath, 'r', encoding = 'utf-8-sig', newline = '\n')

    routeId = routeIdFile.readline()
    while(routeId):
        routeIdList.append(int(routeId))
        routeId = (routeIdFile.readline())

    routeIdFile.close()
    
    return routeIdList

In [10]:
def openAPICall(routeId):
    global callCount
    #############################################################  
    url = 'http://openapi.gbis.go.kr/ws/rest/buslocationservice'# <<<<<<<<<<<<< 도메인 확인
    #############################################################

    # API를 호출하여 XML 형식으로 된 string 데이터를 변수 "oneLineXML" 에 저장합니다.
    queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
    request = Request(url + queryParams)
    request.get_method = lambda: 'GET'
    callCount += 1
    oneLineXML = urlopen(request).read().decode('utf8')

    # xtree는 "ws.bus.go.kr" 도메인 API 호출에서 "headerCd"에 상관없이 3개의 태그를 갖습니다.
    # [comMsgHeader, msgHeader, msgBody]
    xtree = ET.fromstring(oneLineXML)
    
    return xtree

In [11]:
def getNowTime():
    fileDateTime = utc.localize(datetime.datetime.utcnow()).astimezone(timezone('Asia/Seoul'))
    return fileDateTime

In [12]:
def readInfoCSV(routeId):
    directory = getDirectory('info', routeId)
    # infoRootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/BusInfo/'
    infoFilePath = directory + str(routeId) + '.txt'
    df= pd.read_csv(infoFilePath, sep=' ', index_col='INFOFILE')
    return df

In [13]:
def makeInfoFile(routeId):
    '''
    InfoFile을 만드는 함수 입니다.
    busrouteservice/info API를 호출하여 ...
    '''
    directory = getDirectory('info', routeId)
    infoFilePath = directory + str(routeId) + '.txt'
    try:
        infoFile = open(infoFilePath, 'w', encoding = 'utf-8-sig', newline = '\n')
    except:
        createFolder(directory)
        infoFile = open(infoFilePath, 'w', encoding = 'utf-8-sig', newline = '\n')


    # 노선정보항목조회 busrouteservice/info
    # 해당 노선에 대한 노선번호, 기점/종점 정류소, 첫차/막차시간, 배차간격, 운행업체 등의 운행계획 정보를 제공합니다.
    url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/info'
    queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
    # print(url+queryParams)

    request = Request(url + queryParams)
    request.get_method = lambda: 'GET'
    oneLineXML = urlopen(request).read().decode('utf8')
    # print(oneLineXML)

    xtree = ET.fromstring(oneLineXML)
    resultCode = int(xtree[1].find("resultCode").text)
    msgBody = xtree[2]

    busRouteInfoItem = msgBody[0]
    i=0
    infoFile.write("INFOFILE ")
    infoFile.write(str(routeId))
    infoFile.write('\n')

    for info in busRouteInfoItem:
        infoFile.write(info.tag)
        infoFile.write(' ')
        infoFile.write(info.text)
        infoFile.write('\n')
        # print(f"{i} {info.tag} : {info.text}")
        i+=1
    infoFile.close()

In [14]:
def getBusTime(routeId):
    # 첫차 출발시간과 막차 정류장 도착시간을 받습니다.
    # ISSUE1: 막차 정류장 도착시간이 실제 시간과 차이가 많이납니다.
    # ISSUE2: 자정이 넘어가면 하루를 더해줘야 합니다. 자정이 넘어갔다는 사실을 어떻게 알게 할까요? 
    #         >>> 출발 시간보다 작으면 하루를 더해주고 출발시간보다 크면 날짜를 그대로 사용합니다.
    try: 
        info_df = readInfoCSV(routeId)
    except:
        makeInfoFile(routeId)
        info_df = readInfoCSV(routeId)
        
    upFirstTime = info_df.loc['upFirstTime'].values[0]
    downLastTime = info_df.loc['upLastTime'].values[0]

    startDate = getNowTime()
    if(int(upFirstTime.replace(':','')) < int(downLastTime.replace(':',''))):
        endDate = startDate
    else:
        endDate = startDate + datetime.timedelta(days=1)

    startDate = startDate.isoformat()[:11] + upFirstTime + ':00.000000+09:00'
    endDate = endDate.isoformat()[:11] + downLastTime + ':00.000000+09:00'
    todayDate = getNowTime().isoformat()[:11] + '00:00:00.000000+09:00'

    startDatetime = datetime.datetime.strptime(startDate, '%Y-%m-%dT%H:%M:%S.%f%z') - datetime.timedelta(minutes=10) # buffer time 입니다.
    endDatetime = datetime.datetime.strptime(endDate, '%Y-%m-%dT%H:%M:%S.%f%z') + datetime.timedelta(days=0)
    todayDatetime = datetime.datetime.strptime(todayDate, '%Y-%m-%dT%H:%M:%S.%f%z')    
    
    return {routeId:{'start':startDatetime, 'end':endDatetime, 'today':todayDatetime}}

In [15]:
def makeScheduleDict(routeIdList):
    scheduleDict = {}
    todayTime = getNowTime().isoformat()[:11] + '00:00:00.000000+09:00'
    scheduleDict['today'] = datetime.datetime.strptime(todayTime, '%Y-%m-%dT%H:%M:%S.%f%z')
    for routeId in routeIdList:
        if(routeId in scheduleDict):
            continue
        tempDict = getBusTime(routeId)
        scheduleDict = {**scheduleDict, **tempDict} # Python 3.5+
    return scheduleDict

In [16]:
def makeSwitchDict(routeIdList):
    global scheduleDict
    switchDict = {}
    curTime = getNowTime()
    for routeId in routeIdList:
        if(curTime < scheduleDict[routeId]['start']):
            switch = False
        elif(scheduleDict[routeId]['start'] <= curTime < scheduleDict[routeId]['end']):
            switch = True
        else:
            scheduleDict[routeId]['start'] + datetime.timedelta(days=1)
            scheduleDict[routeId]['end'] + datetime.timedelta(days=1)
            switch = False
        switchDict[routeId] = switch
    return switchDict

In [17]:
def makeTxtFileDict(routeIdList, subCategory):
    txtFileDict = {}
#     txtFileDict['today'] = 
    for routeId in routeIdList:
        txtFileDict = makeTextFile(subCategory, routeId, txtFileDict)
    return txtFileDict

In [18]:
# 사용안함
def initProcess(routeIdList):
    scheduleDict = makeScheduleDict(routeIdList)
    switchDict = makeSwitchDict(routeIdList, scheduleDict)
    txtFileDict = makeTxtFileDict(routeIdList)
    return scheduleDict, switchDict, txtFileDict

In [19]:
"""
함수 [ChangeSwtich] 설명서
작성자: 김종현
기능: 함수 [CallAPI] 에서 특정 routeId에 대해 호출 여부를 결정하는 함수입니다.
      날짜가 바뀌었을때 값이 쓰여지는 파일을 새로 생성하고 API호출 수를 줄이는데 목적이 있습니다.
입력값: routeIdList.csv
        [routeNo, routeId, 첫차출발시간, 막차도착시간, 총 정류장 수]
내용:
모든 routeId(노선번호에 부여된 ID)는 첫차 시간과 막차 시간이 정해져있습니다.
현재 시간에 따라서 스위치(함수를 호출하는...)값을 변화 합니다.

1. [현재시간]이 [첫차시간]과 [막차시간] 사이에 존재하면 스위치는 켜져있어야 합니다.
2. 스위치가 켜져있는 상태에서 [현재시간]이 [막차시간]을 넘어가면(현재시간>막차시간)...
   막차시간보다 늦게 차고지에 도착하는 버스가 있으므로...
   API호출 결과 알 수 있는 [resultCode]의 값에 따라 진행합니다. 
   *2-1. [resultCode] == 0
         - API가 정상적으로 호출되고 있습니다.
         - 변경 사항 없이 계속 진행합니다.
   *2-2. [resultCode] != 0
         - 스위치를 끕니다.
         - [첫차시간]과 [막차시간]을 현재를 기준으로 업데이트(+ 1 Day)합니다.
           [첫차시간]과 [막차시간]이 모두 현재시간보다 뒤에 있으므로 현재시간보다 항상 작음
3. 스위치가 꺼져있는 상태에서 [현재시간]이 [첫차시간]을 넘어가면(현재시간>첫차시간)...
    - 스위치를 켭니다.
      다만 첫차가 조금 일찍 출발 할 수 있으므로 첫차시간을 조정합니다(- 10 Mins).
"""

def getSwitch(routeId):    
    global scheduleDict, switchDict, dttFileDict, logFileDict
    nowTime = getNowTime()
    logFile = open(logFileDict['today'], "a")
    
    # print(nowTime, switchDict[routeId], nowTime > scheduleDict[routeId]['start'], nowTime > scheduleDict[routeId]['end'])

    if(not switchDict[routeId]): #BOOL 1 - switchDict[routeId] : FALSE
        if(nowTime > scheduleDict[routeId]['start']): #BOOL 2 - 첫차 : TRUE
            # print('State #1', end='\t') # State : 1...
            logFile.write(f'{nowTime}\tState #1\t{routeId}\n')
            logFile.close()
            switchDict[routeId] = True
            
            return False # FTF_ > FALSE

        
        else: #BOOL 2 - 첫차 : FALSE
            # print('State #0', end='\t') # State : 0...
            return False # FFF_ > FALSE

    if(switchDict[routeId]): #BOOL 1 - switchDict[routeId] : TRUE
                             #BOOL 2 - 첫차 : TRUE
        xtree = openAPICall(routeId) # 함수 [ openAPICall ] 호출... xtree를 생성하였습니다.
        try:
            resultCode = int(xtree.find('msgHeader').find('resultCode').text)                     ### 테스트 후 수정해주세요 ###
            # resultCode = 0 
            
        except:
            # print('State ERR', end='\t')
            return None
        
        if(nowTime > scheduleDict[routeId]['end']): #BOOL 3 - 막차 : TRUE
 
                # print(resultCode)
                if(resultCode): #BOOL 4 - result code : TRUE
                    logFile.write(f'{nowTime}\tState #3\t{routeId}\n')
                    logFile.close()
                    
                    switchDict[routeId] = False
                    
                    scheduleDict[routeId]['start'] += datetime.timedelta(days=1)
                    scheduleDict[routeId]['end'] += datetime.timedelta(days=1)
                    scheduleDict[routeId]['today'] += datetime.timedelta(days=1)
                    dttFileDict = makeTextFile('dtt', routeId, dttFileDict)
                    logFileDict = makeTextFile('dtt-log', routeId, logFileDict)
                    return False # TTTT > FALSE
                
                if(not resultCode): #BOOL 4 - result code : False ... 운행중
                    if(getNowTime() > scheduleDict[routeId]['start'] + datetime.timedelta(days=1)):
                        #BOOL 4 - result code : not False > True
                        #BOOL 2' : 다음날 첫차 : TRUE 
                        logFile.write(f'{nowTime}\tState #5\t{routeId}\n')
                        scheduleDict[routeId]['start'] += datetime.timedelta(days=1)
                        scheduleDict[routeId]['end'] += datetime.timedelta(days=1)
                        scheduleDict[routeId]['today'] += datetime.timedelta(days=1)
                        dttFileDict = makeTextFile('dtt', routeId, dttFileDict)
                        logFileDict = makeTextFile('dtt-log', routeId, logFileDict)
                
                #BOOL 4 - result code : FALSE
                # print('State #4', end='\t') # State : 4...
                return xtree # TTTF > xtree                     
           
        else: #BOOL 3 - 막차 : FALSE
            if(resultCode): #BOOL 4 - result code : TRUE
                logFile.write(f'{nowTime}\tState #2-1\t{routeId}\n')
                logFile.close()
                # print('State #2-1', end='\t') # State : 2-1...
                return False # TTFT
            
            else: #BOOL 4 - result code : FALSE
                # print('State #2-2', end='\t') # State : 2-2...
                
                return xtree # TTFF > xtree
    
    logFile.close()

In [20]:
def parser(routeId):
    global scheduleDict, switchDict, dttFileDict, logFileDict 
    dttFile = open(dttFileDict[routeId], "a", encoding = 'utf-8-sig', newline = '\n')
    logFile = open(logFileDict[routeId], "a", encoding = 'utf-8-sig', newline = '\n')
    # print(dttFileDict[routeId])
    # print(routeId, end='\t')
    xtree = getSwitch(routeId)
    if(None):
        return
    if(xtree):
        msgHeader = xtree[1]
        queryTime = msgHeader[0]
        msgBody = xtree[2]
        
        for busLocationList in msgBody:
            busLocationValueList = []
            dttFile.write(getNowTime().isoformat())
            dttFile.write(' ')
            for busLocation in busLocationList:
                # print(busLocation.text, end=' ')
                dttFile.write(busLocation.text)
                dttFile.write(' ')
            # print('\n')
            dttFile.write("\n")
    else:
        pass
        # dttFile.write(str(xtree))
        
    dttFile.close()
    logFile.close()

In [21]:
def parsingRouteIdList(routeIdList):
    pastTime = getNowTime()
    for routeId in routeIdList:

        parser(routeId)

    # print(f"누적 호출수: {callCount}\t소요시간: {getNowTime() - pastTime}")
    duration = 59 - (getNowTime() - pastTime).seconds
    if(duration<0):
        duration = 0
        logFile = open(logFileDict['today'], "a")
        logFile.write(f"{getNowTime()}\t{routeId}\tduration<0 : 노선 수 조정이 필요합니다.")
        logFile.close()
    time.sleep(duration)
    
    return duration

In [22]:
def updateToday():
    global scheduleDict, callCount, logFileDict
    
    tomorrowDate = scheduleDict['today'] + datetime.timedelta(days=1)
    todayDate = getNowTime()
    if(todayDate > tomorrowDate):
        callCount = 0
        scheduleDict['today'] += datetime.timedelta(days=1)
        # routeIdList = getRouteIdList()
        # scheduleDict = makeScheduleDict(routeIdList)
        # switchDict = makeSwitchDict(routeIdList)
        # dttFileDict = makeTxtFileDict(routeIdList, 'dtt')
        # logFileDict = makeTxtFileDict(routeIdList, 'dtt-log')
        logFileDict = makeOnlyTextFile(logFileDict, 'main-log')

In [23]:
def TODO():
    print(\
    '''
    1. 존재하지 않는 노선번호가 입력되었을때 예외처리 방법이 필요합니다. 예) 224000008, 224000024
       >>> [RouteIdListManager] 에서 예외처리 후 리스트에 추가하려고 합니다: 
       >>> 함수 [makeInfoFile]를 활용합니다.
    2. routeId == 233000031 는 첫차와 막차의 배차간격이 30분이라서 resultCode == 0 인 순간이
       존재하지 않습니다. 이와 같은 노선이 더 있는지 알아보고 해결방법을 생각합니다.
       >>> 조건문을 이용하여 현재시간 > 첫차출발시간 + timedelta(days=1)인 경우 업데이트 합니다.
    '''
         )
    return

In [24]:
subCategoryDict = {'dtt' : '/daily-time-table/', 
                   'dtt-log' : '/log/daily-time-table/', 
                   'rid' : '/route-id-list/',
                   'main-log' : '/log/main/',
                   'info' : '/BusInfo/'
                  }

In [25]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
apiKey = unquote(KEY)
callCount = 0

In [26]:
isTest = int(input("테스트 버전 입니까?... True[1] OR False[0] : ")) 

##############################
##############################
                          ####
if(isTest):               ####
    category = '/TEST'    ####
                          ####
else:                     ####
    
    category = '/DATA'    ####
                          ####
##############################
##############################

테스트 버전 입니까?... True[1] OR False[0] :  1


In [27]:
routeIdList = getRouteIdList()
print(f"{routeIdList} 생성을 완료하였습니다.")
scheduleDict = makeScheduleDict(routeIdList)
switchDict = makeSwitchDict(routeIdList)
dttFileDict = makeTxtFileDict(routeIdList, 'dtt')
logFileDict = makeTxtFileDict(routeIdList, 'dtt-log')
logFileDict = makeOnlyTextFile(logFileDict, 'main-log')

[200000104, 200000108, 200000109, 200000119, 200000120, 200000145, 200000175, 200000193, 200000193, 200000205, 204000046, 204000056, 204000057, 204000059, 204000065, 204000081, 204000082, 205000002, 213000019, 213000024, 216000026, 218000010, 218000011, 219000006, 221000003, 221000033, 222000046, 222000075, 222000076, 222000078, 222000107, 222000137, 222000169, 227000019, 227000038, 227000039, 228000176, 228000177, 228000181, 228000263, 228000388, 228000389, 229000023, 229000028, 229000028, 229000072, 229000097, 229000111, 229000112, 232000047, 232000072, 232000072, 232000073, 232000081, 232000092, 232000098, 233000031, 233000125, 233000131, 233000135, 233000142, 233000258, 233000265, 234000002, 234000011, 234000013, 234000031, 234000042, 234000050, 234000075, 234000079, 234000130, 234000310, 234000313, 234000324, 234000878, 234000882, 234000884, 234001203, 234001204, 234001244, 234001511, 234001516, 234001609, 236000050, 236000149, 236000176] 생성을 완료하였습니다.


In [None]:
while(True):
    pastTime = getNowTime()
    updateToday()
    duration = parsingRouteIdList(routeIdList)
    strTime = getNowTime().strftime("%Y/%m/%d %H:%M")
    logFile = open(logFileDict['today'], "a")
    logFile.write(f"{strTime}\t누적 호출수: {callCount:6d}\t소요시간: {getNowTime() - pastTime}\n")
    logFile.close()
    print(f"{strTime}\t누적 호출수: {callCount:6d}\t소요시간: {getNowTime() - pastTime}")

2020/12/03 17:36	누적 호출수:     81	소요시간: 0:00:59.516254
2020/12/03 17:37	누적 호출수:    167	소요시간: 0:00:59.992187
2020/12/03 17:38	누적 호출수:    248	소요시간: 0:00:59.920429
2020/12/03 17:39	누적 호출수:    329	소요시간: 0:00:59.941283
2020/12/03 17:40	누적 호출수:    410	소요시간: 0:00:59.778390
2020/12/03 17:41	누적 호출수:    491	소요시간: 0:00:59.799497
2020/12/03 17:42	누적 호출수:    572	소요시간: 0:00:59.893338
2020/12/03 17:43	누적 호출수:    653	소요시간: 0:01:00.033259
2020/12/03 17:44	누적 호출수:    734	소요시간: 0:00:59.137935
2020/12/03 17:45	누적 호출수:    815	소요시간: 0:00:59.737865
2020/12/03 17:46	누적 호출수:    896	소요시간: 0:00:59.908693
2020/12/03 17:47	누적 호출수:    977	소요시간: 0:00:59.173632
2020/12/03 17:48	누적 호출수:   1058	소요시간: 0:00:59.946044
2020/12/03 17:49	누적 호출수:   1139	소요시간: 0:00:59.111838
2020/12/03 17:50	누적 호출수:   1220	소요시간: 0:00:59.707850
2020/12/03 17:51	누적 호출수:   1301	소요시간: 0:00:59.903853
2020/12/03 17:52	누적 호출수:   1382	소요시간: 0:00:59.603278
2020/12/03 17:53	누적 호출수:   1463	소요시간: 0:00:59.889563
2020/12/03 17:54	누적 호출수:   1544	소요시간: 0:00:59.

In [None]:
# 오류 노선ID 확인용
TEST_ROUTEID = 200000104
print(f"getNowTime:\t{getNowTime()}", f"start:\t{scheduleDict[TEST_ROUTEID]['start']}", f"end:\t{scheduleDict[TEST_ROUTEID]['end']}", f"today:\t{scheduleDict[TEST_ROUTEID]['today']}", sep='\n')
print(f"scheduleDict:\t{scheduleDict[TEST_ROUTEID]}")
print(f"switchDict:\t{switchDict[TEST_ROUTEID]}")
print(f"dttFileDict:\t{dttFileDict[TEST_ROUTEID]}")
print(f"logFileDict:\t{logFileDict[TEST_ROUTEID]}")
print(f"다음날 첫차:\t{scheduleDict[TEST_ROUTEID]['start'] + datetime.timedelta(days=1)}")