In [1]:
# Import libraries
import os
import json
import urllib.error
import shutil

from datetime import datetime, timedelta
from urllib.request import urlopen, urlretrieve

In [2]:
# Data API URL 
api_url = "https://healthdata.gov/api/3/action/package_show?id="
data_id = "596b5eed-31de-4fd8-a645-249f3f9b19c4"
data_url = api_url + data_id

# Max days between current data's date from URL and local data's date
max_days = 15

# Data paths
data_path = "data/"
data_info_path = "data/info/"
data_backup_path = "data_backup/"
local_data_date_name = "local_data_date"


# Set data name 
data_name = "cscpopendata.csv"

# Load Local Data's Date
date_format = "%d/%m/%Y - %H:%M"
local_data_file = open(data_path+local_data_date_name, "r")
local_data_date = datetime.strptime(local_data_file.read(), date_format)
local_data_file.close()

In [3]:
# Get data from API
try :
    jsonurl = urlopen(data_url)
    data_json = json.loads(jsonurl.read())
    
except urllib.error.URLError as err:
    raise Exception("There is an error with the URL: " + str(err))
    
except Exception as err:
    raise Exception("There is an unexpected error: " + str(err))

In [4]:
# Function to check if the request data has been successfull
def checkSuccessfullRequest(data_json) :
    if not 'help' in data_json.keys() :
        return [False, "'help' key does not exists in JSON data"]

    if not 'success' in data_json.keys() :
        return [False, "'success' key does not exists in JSON data"]

    if not 'result' in data_json.keys() :
        [False, "'result' key does not exists in JSON data"]
    
    if (data_json['success']) :
        return [data_json['success'], "Request data is successfull"]
    else :
        return [data_json['success'], "Request data is not successfull"]

In [5]:
# Check if the operation has been succesfull
check = checkSuccessfullRequest(data_json)

# If it is not successfull, exit
if not check[0] :
    raise Exception(check[1])

In [6]:
# Tranform 'revision_timestamp' string value to date value
date = datetime.strptime(data_json['result'][0]['revision_timestamp'], "%a, %m/%d/%Y - %H:%M")

print("Max days: " + str(max_days))
print("Local Date: " + str(local_data_date))
print("Current Date: " + str(date) + "\n")


# If the days between current data's date and local data's date is higher than 'max_days'
#  then we update local data
if (date - local_data_date) > timedelta(days = max_days) :
    print("Removing " + data_backup_path + "...")
    shutil.rmtree(data_backup_path, ignore_errors=True)
    
    print("Moving " + data_path + " to " + data_backup_path + "...")
    os.rename(data_path, data_backup_path)
    
    print("Creating " + data_path + "...")
    os.mkdir(data_path)
    
    print("Creating " + data_info_path + "...\n")
    os.mkdir(data_info_path)

    # For each resource
    for resource in data_json['result'][0]['resources'] :    
        # Print the resource's name and its last_modified date
        print("Downloading " + resource['name'] + "...")
        
        # Get the resource's url
        url = resource['url']
        start = url.find("https:")
        end = url[start:].find("<")
        url = url[start:(start+end)]
        
        print("\tURL: " + url)
        
        if resource['format'] == 'csv' :
            destination = data_path + data_name
        
        else :
            destination = data_info_path + resource['name'] + "." + resource['format']
            
        print("\tDestination: " + destination)
        urlretrieve(url, destination)
        print()
        
    print("Storing new Local Data's Date into " + data_path+local_data_date_name + "...")
    local_data_file = open(data_path+local_data_date_name, "w")
    local_data_file.write(datetime.strftime(date, date_format))
    local_data_file.close()
            
else :
    print("The download is not necessary")

Max days: 1
Local Date: 2019-02-10 02:46:00
Current Date: 2019-02-10 02:46:00

The download is not necessary
