In [None]:
import requests
from datetime import datetime, timedelta
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Set the endpoint and headers
endpoint = "https://api.usaspending.gov/api/v2/bulk_download/awards/"
headers = {
    "Content-Type": "application/json"
}

# Define the date range for the fiscal year 2023
start_date = datetime(2022, 10, 1)
end_date = datetime(2023, 9, 30)

# Configure session with retries
session = requests.Session()
retry = Retry(
    total=5,
    backoff_factor=10,
    status_forcelist=[500, 502, 503, 504],
    allowed_methods=["POST", "GET"]
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)

# Function to generate month intervals
def generate_month_intervals(start_date, end_date):
    intervals = []
    current_date = start_date
    while current_date < end_date:
        next_date = (current_date + timedelta(days=32)).replace(day=1)
        intervals.append((current_date, min(next_date - timedelta(days=1), end_date)))
        current_date = next_date
    return intervals

intervals = generate_month_intervals(start_date, end_date)

# Function to get the download URL for a given date range
def get_download_url(start_date, end_date):
    payload = {
        "filters": {
            "prime_award_types": [
                "A", "B", "C", "D"
            ],
            "date_type": "action_date",
            "date_range": {
                "start_date": start_date.strftime('%Y-%m-%d'),
                "end_date": end_date.strftime('%Y-%m-%d')
            }
        },
        "file_format": "csv"
    }

    response = session.post(endpoint, headers=headers, json=payload)

    if response.status_code == 200:
        data = response.json()
        if data.get("status_url"):
            status_url = data.get("status_url")
            print(f"Status URL: {status_url}")

            def check_download_status(url):
                while True:
                    response = session.get(url)
                    if response.status_code == 200:
                        data = response.json()
                        if data.get("status") == "finished" and data.get("file_url"):
                            return data.get("file_url")
                        elif data.get("status") == "failed":
                            raise Exception("Download failed.")
                        else:
                            print("Download in progress, waiting for 10 seconds...")
                            time.sleep(10)
                    else:
                        raise Exception(f"Failed to check download status. Status code: {response.status_code}")

            try:
                return check_download_status(status_url)
            except Exception as e:
                print(e)
                return None
        else:
            print("No status URL found.")
            return None
    else:
        print(f"Failed to create download request. Status code: {response.status_code}")
        print(f"Response: {response.text}")
        return None

# Generate download URLs for each month in the fiscal year 2023
for start, end in intervals:
    print(f"Generating download URL for {start.strftime('%B %Y')}...")
    file_url = get_download_url(start, end)
    if file_url:
        print(f"File URL for {start.strftime('%B %Y')}: {file_url}")
    else:
        print(f"Failed to generate file URL for {start.strftime('%B %Y')}.")

In [1]:
import requests
from datetime import datetime, timedelta
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time

# Set the endpoint and headers
endpoint = "https://api.usaspending.gov/api/v2/bulk_download/awards/"
headers = {
    "Content-Type": "application/json"
}

# Define the fiscal years
fiscal_years = [
    (datetime(2019, 3, 1), datetime(2019, 9, 30)),
    (datetime(2019, 10, 1), datetime(2020, 9, 30)),
    (datetime(2020, 10, 1), datetime(2021, 9, 30)),
    (datetime(2021, 10, 1), datetime(2022, 9, 30)),
    (datetime(2022, 10, 1), datetime(2023, 9, 30))
]

# Recipient types
recipient_types = [
    "Historically Underutilized Business (HUBZone) Firm",
    "Small Business"
]

# Configure session with retries
session = requests.Session()
retry = Retry(
    total=5,
    backoff_factor=10,
    status_forcelist=[500, 502, 503, 504],
    allowed_methods=["POST", "GET"]
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)

# Function to generate month intervals
def generate_month_intervals(start_date, end_date):
    intervals = []
    current_date = start_date
    while current_date < end_date:
        next_date = (current_date + timedelta(days=32)).replace(day=1)
        intervals.append((current_date, min(next_date - timedelta(days=1), end_date)))
        current_date = next_date
    return intervals

# Function to get the download URL for a given date range
def get_download_url(start_date, end_date):
    payload = {
        "filters": {
            "prime_award_types": ["A", "B", "C", "D"],  # All contract types
            "date_type": "action_date",
            "date_range": {
                "start_date": start_date.strftime('%Y-%m-%d'),
                "end_date": end_date.strftime('%Y-%m-%d')
            },
            "recipient_type_names": recipient_types
        },
        "file_format": "csv"
    }

    response = session.post(endpoint, headers=headers, json=payload)

    if response.status_code == 200:
        data = response.json()
        if data.get("status_url"):
            status_url = data.get("status_url")
            print(f"Status URL: {status_url}")

            def check_download_status(url):
                while True:
                    response = session.get(url)
                    if response.status_code == 200:
                        data = response.json()
                        if data.get("status") == "finished" and data.get("file_url"):
                            return data.get("file_url")
                        elif data.get("status") == "failed":
                            raise Exception("Download failed.")
                        else:
                            print("Download in progress, waiting for 10 seconds...")
                            time.sleep(10)
                    else:
                        raise Exception(f"Failed to check download status. Status code: {response.status_code}")

            try:
                return check_download_status(status_url)
            except Exception as e:
                print(e)
                return None
        else:
            print("No status URL found.")
            return None
    else:
        print(f"Failed to create download request. Status code: {response.status_code}")
        print(f"Response: {response.text}")
        return None

# Generate download URLs for each month in each fiscal year
download_links = []
for start_fy, end_fy in fiscal_years:
    monthly_intervals = generate_month_intervals(start_fy, end_fy)
    for start, end in monthly_intervals:
        print(f"Generating download URL for {start.strftime('%B %Y')}...")
        file_url = get_download_url(start, end)
        if file_url:
            download_links.append((start.strftime('%B %Y'), file_url))
        else:
            download_links.append((start.strftime('%B %Y'), "Failed to generate file URL"))

Generating download URL for March 2019...
Status URL: https://api.usaspending.gov/api/v2/download/status?file_name=All_PrimeTransactions_2024-06-08_H16M38S49127843.zip
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in prog

In [2]:

# Print all download links at the end
print("\nDownload links for each month in fiscal years 2019 to 2023:")
for month, url in download_links:
    print(f"{month}: {url}")


Download links for each month in fiscal years 2019 to 2023:
March 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H16M38S49127843.zip
April 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H17M11S29629810.zip
May 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H17M36S31065315.zip
June 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H18M00S26676928.zip
July 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H18M24S39104466.zip
August 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H18M54S36255885.zip
September 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H19M16S27115174.zip
October 2019: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-08_H19M40S21144919.zip
November 2019: h

In [1]:
import requests
from datetime import datetime
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time

# Set the endpoint and headers
endpoint = "https://api.usaspending.gov/api/v2/bulk_download/awards/"
headers = {
    "Content-Type": "application/json"
}

# Specific months to download
months_to_retry = [
    (datetime(2020, 9, 1), datetime(2020, 9, 30)),  # September 2020
    (datetime(2022, 4, 1), datetime(2022, 4, 30)),  # April 2022
    (datetime(2022, 5, 1), datetime(2022, 5, 31))   # May 2022
]

# Recipient types
recipient_types = [
    "Historically Underutilized Business (HUBZone) Firm",
    "Small Business"
]

# Configure session with retries
session = requests.Session()
retry = Retry(
    total=5,
    backoff_factor=10,
    status_forcelist=[500, 502, 503, 504],
    allowed_methods=["POST", "GET"]
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)

# Function to get the download URL for a given date range
def get_download_url(start_date, end_date):
    payload = {
        "filters": {
            "prime_award_types": ["A", "B", "C", "D"],  # All contract types
            "date_type": "action_date",
            "date_range": {
                "start_date": start_date.strftime('%Y-%m-%d'),
                "end_date": end_date.strftime('%Y-%m-%d')
            },
            "recipient_type_names": recipient_types
        },
        "file_format": "csv"
    }

    response = session.post(endpoint, headers=headers, json=payload)

    if response.status_code == 200:
        data = response.json()
        if data.get("status_url"):
            status_url = data.get("status_url")
            print(f"Status URL: {status_url}")

            def check_download_status(url):
                while True:
                    response = session.get(url)
                    if response.status_code == 200:
                        data = response.json()
                        if data.get("status") == "finished" and data.get("file_url"):
                            return data.get("file_url")
                        elif data.get("status") == "failed":
                            raise Exception("Download failed.")
                        else:
                            print("Download in progress, waiting for 10 seconds...")
                            time.sleep(10)
                    else:
                        raise Exception(f"Failed to check download status. Status code: {response.status_code}")

            try:
                return check_download_status(status_url)
            except Exception as e:
                print(e)
                return None
        else:
            print("No status URL found.")
            return None
    else:
        print(f"Failed to create download request. Status code: {response.status_code}")
        print(f"Response: {response.text}")
        return None

# Retry download for specified months
retry_download_links = []
for start, end in months_to_retry:
    print(f"Generating download URL for {start.strftime('%B %Y')}...")
    file_url = get_download_url(start, end)
    if file_url:
        retry_download_links.append((start.strftime('%B %Y'), file_url))
    else:
        retry_download_links.append((start.strftime('%B %Y'), "Failed to generate file URL"))


Generating download URL for September 2020...
Status URL: https://api.usaspending.gov/api/v2/download/status?file_name=All_PrimeTransactions_2024-06-10_H16M35S36889999.zip
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in progress, waiting for 10 seconds...
Download in 

In [2]:
print("\nRetry download links:")
for month, url in retry_download_links:
    print(f"{month}: {url}")


Retry download links:
September 2020: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-10_H16M35S36889999.zip
April 2022: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-10_H17M14S36345874.zip
May 2022: https://files.usaspending.gov/generated_downloads/All_PrimeTransactions_2024-06-10_H17M47S21986014.zip
