In [2]:
import pandas as pd

pd.set_option('display.max_columns', None)

naics_primes = pd.read_excel('NAICS_Data.xlsx')
psc_primes = pd.read_excel('PSC_Data.xlsx')

primes = pd.concat([naics_primes, psc_primes]).drop_duplicates()

### Top agencies from USASpending

In [16]:
import duckdb
import pandas as pd

# Clean the data
primes['total_dollars_obligated'] = pd.to_numeric(primes['total_dollars_obligated'], errors='coerce')
primes = primes.dropna(subset=['total_dollars_obligated'])

query = """
SELECT
    awarding_agency_name,
    SUM(total_dollars_obligated) AS total_obligation,
    COUNT(*) AS number_of_transactions
FROM primes
WHERE awarding_agency_name IS NOT NULL
GROUP BY awarding_agency_name
ORDER BY total_obligation DESC, number_of_transactions DESC
"""

# Run the query
top_agencies = duckdb.query(query).df()
top_agencies.to_excel('top_agencies.xlsx', index=False)
top_agencies

Unnamed: 0,awarding_agency_name,total_obligation,number_of_transactions
0,Department of Defense,1238222000000.0,32432
1,National Aeronautics and Space Administration,136696100000.0,943
2,Department of Transportation,127650100000.0,3772
3,Agency for International Development,103342200000.0,543
4,General Services Administration,61544270000.0,1370
5,Department of Energy,45730570000.0,1104
6,Department of the Interior,16200280000.0,1508
7,Department of State,15224460000.0,555
8,Department of Homeland Security,11499740000.0,887
9,Department of the Treasury,7723752000.0,157


### How much did your top agencies spend on your products/services over the past fiscal year?

In [4]:
list_top_agencies = top_agencies['awarding_agency_name'][:3].tolist()
list_top_agencies = ', '.join(f"'{agency}'" for agency in list_top_agencies)

# How much are the top 3 agencies spending on NAICS codes
query = f"""
SELECT
    awarding_agency_name,
    naics_code,
    SUM(total_dollars_obligated) AS total_obligation,
    COUNT(*) AS number_of_transactions
FROM primes
WHERE 
    awarding_agency_name IN ({list_top_agencies})
GROUP BY awarding_agency_name, naics_code
ORDER BY awarding_agency_name DESC, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_naics_spending = duckdb.query(query).df()
top_agencies_naics_spending.to_excel('top_agencies_naics_spending.xlsx', index=False)
top_agencies_naics_spending

Unnamed: 0,awarding_agency_name,naics_code,total_obligation,number_of_transactions
0,Department of Veterans Affairs,624221,219039900.0,401
1,Department of Veterans Affairs,623220,150765900.0,276
2,Department of Veterans Affairs,624120,43301680.0,4
3,Department of Veterans Affairs,541611,26630220.0,9
4,Department of Veterans Affairs,623990,7317921.0,5
5,Department of Veterans Affairs,561990,2161465.0,1
6,Department of Veterans Affairs,713940,1022960.0,16
7,Department of Veterans Affairs,621330,657473.8,3
8,Department of Veterans Affairs,624410,317250.0,2
9,Department of Veterans Affairs,812990,187926.9,5


In [None]:
# How much are the top 3 agencies spending on PSC
query = f"""
SELECT
    awarding_agency_name,
    product_or_service_code,
    SUM(total_dollars_obligated) AS total_obligation,
    COUNT(*) AS number_of_transactions
FROM primes
WHERE 
    awarding_agency_name IN ({list_top_agencies})
    AND product_or_service_code = 'R401'
GROUP BY awarding_agency_name, product_or_service_code
ORDER BY awarding_agency_name DESC, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_psc_spending = duckdb.query(query).df()
top_agencies_psc_spending.to_excel('top_agencies_psc_spending.xlsx', index=False)
top_agencies_psc_spending

### What percentage of their awards went to small businesses

In [None]:
query=f"""
SELECT
    awarding_agency_name,
    contracting_officers_determination_of_business_size,
    SUM(total_dollars_obligated) AS total_obligation,
    COUNT(*) AS number_of_transactions
FROM primes
WHERE 
    awarding_agency_name IN ({list_top_agencies}) 
    AND action_date_fiscal_year = '2024'
GROUP BY awarding_agency_name, contracting_officers_determination_of_business_size 
ORDER BY total_obligation DESC
"""

top_agencies_sb_spending = duckdb.query(query).df()
top_agencies_sb_spending

In [None]:
query = """
SELECT 
    sb.awarding_agency_name,
    sb.contracting_officers_determination_of_business_size,
    sb.total_obligation AS size_obligation,
    ta.total_obligation AS agency_total_obligation,
    ROUND((sb.total_obligation * 100.0 / ta.total_obligation), 2) AS percentage_of_total
FROM top_agencies_sb_spending AS sb
JOIN top_agencies AS ta
ON sb.awarding_agency_name = ta.awarding_agency_name
ORDER BY sb.awarding_agency_name, sb.contracting_officers_determination_of_business_size DESC
"""

top_agencies_sb_percentage = duckdb.query(query).df()
top_agencies_sb_percentage.to_excel('top_agencies_small_business_spending.xlsx', index=False)
top_agencies_sb_percentage

### Do your top agencies utilize small business set-aside and do your certifications match?

In [None]:
query = f"""
SELECT
    awarding_agency_name,
    type_of_set_aside,
    SUM(total_dollars_obligated) AS total_obligation,
    ROUND((SUM(total_dollars_obligated) * 100.0 / 
           (SELECT SUM(total_dollars_obligated) 
            FROM primes 
            WHERE awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
            AND awarding_agency_name = primes.awarding_agency_name
           )), 2) AS percentage_of_total
FROM primes
WHERE 
    awarding_agency_name IN ({list_top_agencies})
GROUP BY awarding_agency_name, type_of_set_aside
ORDER BY total_obligation DESC
"""

top_agencies_set_aside = duckdb.query(query).df()
top_agencies_set_aside.to_excel('top_agencies_set_aside_spending.xlsx', index=False)
top_agencies_set_aside

### Who are your top competitors that are winning those awards? How does your business company compare? 

In [None]:
import duckdb
query="""
SELECT
    ANY_VALUE(recipient_name) AS recipient_name,
    recipient_uei,
    SUM(total_dollars_obligated) AS total_obligation,
    COUNT(*) AS number_of_transactions,
    ANY_VALUE(contracting_officers_determination_of_business_size) AS business_size,
    ANY_VALUE(organizational_type) AS organizational_type
FROM primes
GROUP BY recipient_uei
ORDER BY total_obligation DESC, number_of_transactions DESC
"""

top_recipients = duckdb.query(query).df()
top_recipients.to_excel('top_recipients.xlsx', index=False)
top_recipients

Unnamed: 0,Recipient Name,Recipient UEI,Total Spending,Number of Transactions,Business Size,Organizational Type
0,THE AEROSPACE CORPORATION,YA8LJBJCND19,1.230310e+11,67,OTHER THAN SMALL BUSINESS,CORPORATE TAX EXEMPT
1,RANGE GENERATION NEXT LLC,E6J8JNNJ6AA5,1.175822e+11,90,OTHER THAN SMALL BUSINESS,PARTNERSHIP
2,"CHEMONICS INTERNATIONAL, INC.",DSUSFJFR9A33,8.693789e+10,71,OTHER THAN SMALL BUSINESS,OTHER
3,THE MITRE CORPORATION,DMHDNDCPWUD1,6.906539e+10,127,OTHER THAN SMALL BUSINESS,CORPORATE TAX EXEMPT
4,SCIENCE APPLICATIONS INTERNATIONAL CORPORATION,MMLKPW9JLX64,5.851599e+10,1394,OTHER THAN SMALL BUSINESS,CORPORATE NOT TAX EXEMPT
...,...,...,...,...,...,...
5530,PROVISION CONSTRUCTION INC,G3L1S7V9N977,0.000000e+00,1,SMALL BUSINESS,CORPORATE NOT TAX EXEMPT
5531,COLLINSON INC,DZ3WKATLV6U5,0.000000e+00,1,SMALL BUSINESS,CORPORATE NOT TAX EXEMPT
5532,"AMPLITUDE LASER, INC.",KZWFXYRNAL66,0.000000e+00,1,SMALL BUSINESS,CORPORATE NOT TAX EXEMPT
5533,"REAGENT WORLD, INC.",WMD6CT8YXG49,0.000000e+00,1,SMALL BUSINESS,CORPORATE NOT TAX EXEMPT


### Are there sub-contracting opportunities available from these competitors? 

In [None]:
primes_awardees = top_recipients['recipient_uei'].tolist()
primes_awardees = ', '.join(f"'{agency}'" for agency in primes_awardees)

query = f"""
SELECT
    prime_awardee_name,
    prime_awardee_uei,
    subawardee_name,
    subawardee_uei,
    SUM(prime_award_total_outlayed_amount) AS total_obligation,
    COUNT(*) AS number_of_transactions
FROM subs
WHERE prime_awardee_uei IN ({primes_awardees})
GROUP BY subawardee_uei
ORDER BY total_obligation DESC, number_of_transactions DESC
"""

sub_opportunities = duckdb.query(query).df()
sub_opportunities

### How often does the agency utilize Simplified Acquisition Procedures (SAP)?

In [None]:
query="""
SELECT
    awarding_agency_name,
    simplified_procedures_for_certain_commercial_items,
    SUM(total_dollars_obligated) AS total_obligation,
    COUNT(*) AS number_of_transactions,
    ROUND(SUM(total_dollars_obligated) * 100.0 / SUM(SUM(total_dollars_obligated)) OVER (PARTITION BY awarding_agency_name), 2) AS percentage_of_obligation
FROM primes
WHERE 
    awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
    AND simplified_procedures_for_certain_commercial_items != "None"
GROUP BY awarding_agency_name, simplified_procedures_for_certain_commercial_items
"""

top_agencies_sap = duckdb.query(query).df()
top_agencies_sap.to_excel('top_agencies_sap_spending.xlsx', index=False)
top_agencies_sap

### Top agencies' preferred buying method

In [None]:
query="""
SELECT
    awarding_agency_name,
    award_type,
    COUNT(*) AS number_of_transactions,
    SUM(total_dollars_obligated) AS total_obligation
FROM primes
WHERE awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
GROUP BY awarding_agency_name, award_type
ORDER BY awarding_agency_name, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_award_type = duckdb.query(query).df()
top_agencies_award_type.to_excel('top_agencies_award_type.xlsx', index=False)
top_agencies_award_type

In [None]:
query="""
SELECT
    awarding_agency_name,
    idv_type,
    COUNT(*) AS number_of_transactions,
    SUM(total_dollars_obligated) AS total_obligation
FROM primes
WHERE awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
GROUP BY awarding_agency_name, idv_type
ORDER BY awarding_agency_name, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_idv_type = duckdb.query(query).df()
top_agencies_idv_type.to_excel('top_agencies_idv_type.xlsx', index=False)
top_agencies_idv_type

In [None]:
query="""
SELECT
    awarding_agency_name,
    type_of_idc,
    COUNT(*) AS number_of_transactions,
    SUM(total_dollars_obligated) AS total_obligation
FROM primes
WHERE awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
GROUP BY awarding_agency_name, type_of_idc
ORDER BY awarding_agency_name, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_type_of_idc = duckdb.query(query).df()
top_agencies_type_of_idc.to_excel('top_agencies_type_of_idc.xlsx', index=False)
top_agencies_type_of_idc

In [None]:
query="""
SELECT
    awarding_agency_name,
    type_of_contract_pricing,
    COUNT(*) AS number_of_transactions,
    SUM(total_dollars_obligated) AS total_obligation
FROM primes
WHERE awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
GROUP BY awarding_agency_name, type_of_contract_pricing
ORDER BY awarding_agency_name, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_type_of_contract_pricing = duckdb.query(query).df()
top_agencies_type_of_contract_pricing.to_excel('top_agencies_type_of_contract_pricing.xlsx', index=False)
top_agencies_type_of_contract_pricing

In [None]:
query="""
SELECT
    awarding_agency_name,
    purchase_card_as_payment_method,
    COUNT(*) AS number_of_transactions,
    SUM(total_dollars_obligated) AS total_obligation
FROM primes
WHERE awarding_agency_name IN ('Department of Justice', 'Department of Veterans Affairs', 'Department of Health and Human Services')
GROUP BY awarding_agency_name, purchase_card_as_payment_method
ORDER BY awarding_agency_name, total_obligation DESC, number_of_transactions DESC
"""

top_agencies_purchase_card_as_payment_method = duckdb.query(query).df()
top_agencies_purchase_card_as_payment_method.to_excel('top_agencies_purchase_card_as_payment_method.xlsx', index=False)
top_agencies_purchase_card_as_payment_method