In [1]:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from datetime import datetime
import logging
import traceback
import json
import os
import sys

In [2]:
print(os.getcwd())

C:\Users\Collin\Documents\Juypter Notebook


In [4]:
#Setup basic logging function for tracking and troubleshooting

def setup_logging():
    logging.basicConfig(
        level = logging.INFO,
        format = '%(asctime)s - %(levelname)s - %(message)s',
        handlers = [logging.FileHandler(f'logs/api_data_pull_run_{datetime.now().strftime('%Y%m%d')}.log'),
                    logging.StreamHandler(sys.stdout)
                   ]
    )

    return logging.getLogger(__name__)

In [5]:
#Setup retry function incase there is a timeout for the original data request pull

def retry_get_data_http_url_timeout(logger, url, retries = 5, backoff_factor = 1, timeout = 10): 

    retry = Retry( #retry rules 
        total = retries, 
        connect = retries,
        read = retries,
        backoff_factor = backoff_factor,
        status_forcelist = [429, 500, 502, 503, 504], #HTTP status codes worth retrying
        allowed_methods = ['GET'], #only allow data pulls
        raise_on_status= False )

    adapter = HTTPAdapter(max_retries=retry) #use an adapter to handle the HTTP network
    session = requests.Session() #use session to automatically apply retry rules
    session.mount('http://', adapter) #only want http/https urls.
    session.mount('https://', adapter)
    try:
        logger.info(f'Reattempting to pull data {retries} times with 10 second intervals')
        global data
        response = session.get(url, timeout=timeout)
        data = response.json()
        logger.info(f'Data stored in object')
        logger.info(f'Ending API data pull script.')
        
    except requests.exceptions.HTTPError as e:
        logger.error(f'Failed to pull data')
        logger.error(f'HTTP error: {e}')
        logger.error(traceback.format_exc())
        logger.info(f'Ending API data pull script.')
        return
  
    except requests.exceptions.RequestException as e:
        logger.error(f'Failed to pull data')
        logger.error(f'Other error: {e}')
        logger.error(traceback.format_exc())
        logger.info(f'Ending API data pull script.')
        return
    

In [6]:
#Setup function to do 1 data pull

def get_data_from_url(logger, url, retries, backoff_factor, params=None, timeout=10):
    try:
        response = requests.get(url, params) #used requests.get for a single try
        response.raise_for_status()
        logger.info(f'Response code {response}')
        global data
        data = response.json()
        logger.info(f'Data successfully pulled')
        logger.info(f'Ending API data pull script.')

    except requests.exceptions.Timeout as e:
        logger.error(f'Request timed out; 10 second wait before attempting to repull.')
        logger.error(traceback.format_exc())
        retry_get_data_http_url_timeout(logger, url, retries, backoff_factor, timeout)
        
    except requests.exceptions.HTTPError as e:
        logger.error(f'HTTP error: {e}')
        logger.error(traceback.format_exc())
        logger.info(f'Ending API data pull script.')
        return
        
    except requests.exceptions.RequestException as e:
        logger.error(f'Other error: {e}')
        logger.error(traceback.format_exc())
        logger.info(f'Ending API data pull script.')
        return


In [30]:
def save_json_file(logger, file_name):
    if not data:
        logger.info(f'url did not contain text')
    else:
        with open(f'{file_name}.json', 'w') as f:
            json.dump(data, f)

In [31]:
def main(url, file_name, params, retries, backoff_factor, timeout):
    
    data = {}
    file_name = file_name
    url = url
    params = params
    retries = retries
    backoff_factor = backoff_factor
    timeout = timeout
    
    os.makedirs('logs', exist_ok = True)
    os.makedirs('data/api_data_pull/logs', exist_ok = True)
    logger = setup_logging()
    logger.info(f'Starting API data pull...')
    try:
        get_data_from_url(logger, url, params, timeout)

    except requests.exceptions.Timeout as e:
        logger.error(f'Request timed out; 10 second wait before attempting to repull.')
        logger.error(traceback.format_exc())
        retry_get_data_http_url_timeout(logger, url, retries, backoff_factor, timeout)

    save_json_file(logger, file_name)
    logger.info(f'Session finished')
    

In [35]:
url = 'https://baconipsum.com/api/?type=meat-and-filler'
file_name = 'bacon_test_api_pull'
params = None
retries = 3
backoff_factor = 1
timeout = 10

In [36]:
main(url, file_name, None, retries, backoff_factor, timeout)

2026-01-30 15:42:30,646 - INFO - Starting API data pull...
2026-01-30 15:42:31,137 - INFO - Response code <Response [200]>
2026-01-30 15:42:31,140 - INFO - Data successfully pulled
2026-01-30 15:42:31,143 - INFO - Ending API data pull script.
2026-01-30 15:42:31,150 - INFO - Session finished
