In [1]:
import requests
from pathlib import Path
import os
import tarfile
import pandas as pd
import io
import datetime
urlBase='http://tisvcloud.freeway.gov.tw/history/TDCS'
baseDir='../../../data/'
#確認該 url 是否可以下載, 例如 20190230 就不存在, 無法下載
def isDownloadable(url):
    """
    Does the url contain a downloadable resource
    """
    h = requests.head(url, allow_redirects=True)
    header = h.headers
    content_type = header.get('content-type')
    if content_type is None:
        return False
    if 'text' in content_type.lower():
        return False
    if 'html' in content_type.lower():
        return False
    return True

In [2]:
#下載某url檔案後, 放在指定目錄下
def downloadFileFromUrl(url, directory):
    
    filename = directory+'/'+ url.rsplit('/', 1)[1]
    if not os.path.exists(filename):
        r = requests.get(url, allow_redirects=True)
        open(filename, 'wb').write(r.content)

In [3]:
def downloadTDCSbyDay(trType, day, baseDir):
    url=urlBase + '/' + trType + '/' + trType + '_' + day+'.tar.gz'  #day format '20190630'
    
    directory=baseDir+trType
    Path(directory).mkdir(parents=True, exist_ok=True)
    print(directory)
    downloadFileFromUrl(url, directory)
    
    

In [5]:
import pymongo
import dns
client = pymongo.MongoClient("mongodb://localhost:27017")
db = client.traffic

def dataframe2Mongo(select_df, trfType):
    collection = db[trfType]  
    records = select_df.to_dict('records') # 參數 record 代表把列轉成個別物件
    collection.insert_many(records)


In [6]:
def insertMongo(trType, day, baseDir):
    colNames={
            'M03A': ['TimeInterval','GantryId', 'Direction', 'VehicleType', '交通量'],
            'M04A': ['TimeInterval','GantryFrom','GantryTo', 'VehicleType', 'TravelTime', '交通量'],
            'M06A': ['VehicleType','DetectionTime_O', 'GantryID_O', 'DetectionTime_D', 'GantryID_D', 'TripLength', 'TripEnd', 'TripInformation'],
            'M08A': ['TimeInterval','GantryFrom','GantryTo', 'VehicleType', '交通量']
        }
    tarGzFile=baseDir+trType+'/'+ trType + '_' + day+'.tar.gz'
    print(tarGzFile)
    
    result=pd.DataFrame()
    tar = tarfile.open(tarGzFile, "r:gz")
    for member in tar.getmembers():
        f = tar.extractfile(member)

        if f is not None:
            content = f.read()
            df = pd.read_csv(io.StringIO(content.decode('utf-8')), header=None, sep=',')
            
            result=result.append(df, ignore_index=True)
    
    result.columns = colNames[trType]
    result.TimeInterval=result.TimeInterval.apply(lambda dt:datetime.datetime.strptime(dt, "%Y-%m-%d %H:%M"))
    dataframe2Mongo(result,trType)
    return result

In [7]:
def downloadInsert(trType, day, baseDir):
    downloadTDCSbyDay(trType, day, baseDir) #20190404-07
    insertMongo(trType, day, baseDir)

In [10]:
downloadInsert('M03A','20190624',baseDir)
downloadInsert('M03A','20190625',baseDir)
downloadInsert('M03A','20190626',baseDir)
downloadInsert('M03A','20190627',baseDir)
downloadInsert('M03A','20190628',baseDir)
downloadInsert('M08A','20190624',baseDir)
downloadInsert('M08A','20190625',baseDir)
downloadInsert('M08A','20190626',baseDir)
downloadInsert('M08A','20190627',baseDir)
downloadInsert('M08A','20190628',baseDir)

../../../data/M03A
../../../data/M03A/M03A_20190624.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190625.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190626.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190627.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190628.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190624.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190625.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190626.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190627.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190628.tar.gz


In [9]:
downloadInsert('M03A','20190404',baseDir)
downloadInsert('M03A','20190405',baseDir)
downloadInsert('M03A','20190406',baseDir)
downloadInsert('M03A','20190407',baseDir)
downloadInsert('M08A','20190404',baseDir)
downloadInsert('M08A','20190405',baseDir)
downloadInsert('M08A','20190406',baseDir)
downloadInsert('M08A','20190407',baseDir)

../../../data/M03A
../../../data/M03A/M03A_20190404.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190405.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190406.tar.gz
../../../data/M03A
../../../data/M03A/M03A_20190407.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190404.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190405.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190406.tar.gz
../../../data/M08A
../../../data/M08A/M08A_20190407.tar.gz
