# USAJobs API Investigation: Missing Control Numbers

This notebook investigates why certain control numbers (844807200 and 845022800) may or may not appear in the current USAJobs API results for occupational series 0560.

## Setup and Imports

In [None]:
import requests
import pandas as pd
import os
import json
import time
from datetime import datetime
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Verify API key is available
api_key = os.getenv('USAJOBS_API_TOKEN')
if not api_key:
    raise ValueError('API key required. Set USAJOBS_API_TOKEN environment variable')

print(f"API key loaded: {'Yes' if api_key else 'No'}")
print(f"Current date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 1. Define API Connection and Helper Functions

In [None]:
# API Configuration
BASE_URL = 'https://data.usajobs.gov/api/Search'

# Headers required by USAJobs API
headers = {
    'Host': 'data.usajobs.gov',
    'Authorization-Key': api_key
}

def flatten_job(job_item):
    """Extract key fields from a job posting"""
    job = job_item.get('MatchedObjectDescriptor', {})
    user_area = job.get('UserArea', {}).get('Details', {})
    
    # Extract numeric control number from PositionURI
    position_uri = job.get('PositionURI', '')
    numeric_control_number = None
    if position_uri and '/job/' in position_uri:
        try:
            numeric_control_number = position_uri.split('/job/')[-1]
        except:
            pass
    
    return {
        'usajobs_control_number': numeric_control_number,
        'position_id': job.get('PositionID'),
        'position_title': job.get('PositionTitle'),
        'department_name': job.get('DepartmentName'),
        'organization_name': job.get('OrganizationName'),
        'position_location_display': job.get('PositionLocationDisplay'),
        'position_start_date': job.get('PositionStartDate'),
        'position_end_date': job.get('PositionEndDate'),
        'publication_start_date': job.get('PublicationStartDate'),
        'application_close_date': job.get('ApplicationCloseDate'),
        'position_uri': position_uri,
        'low_grade': user_area.get('LowGrade'),
        'high_grade': user_area.get('HighGrade'),
        'service_type': user_area.get('ServiceType')
    }

## 2. Fetch Current Jobs for Series 0560

We'll fetch all current job postings for occupational series 0560 (Budget Analysis) using pagination.

In [None]:
# Target control numbers we're searching for
TARGET_CONTROL_NUMBERS = ['844807200', '845022800']

# Parameters for the API request
params = {
    'JobCategoryCode': '0560',  # Budget Analysis series
    'ResultsPerPage': 25        # API pagination size
}

all_jobs = []
flattened_jobs = []
page = 1

print(f"Fetching jobs for series 0560...")
print(f"Looking for control numbers: {', '.join(TARGET_CONTROL_NUMBERS)}")
print("="*50)

while True:
    params['Page'] = page
    
    try:
        response = requests.get(BASE_URL, headers=headers, params=params, timeout=120)
        response.raise_for_status()
        data = response.json()
        
        search_result = data.get('SearchResult', {})
        items = search_result.get('SearchResultItems', [])
        
        if not items:
            print(f"No more results at page {page}")
            break
        
        # Process each job
        for item in items:
            all_jobs.append(item)
            flattened_jobs.append(flatten_job(item))
        
        # Get total count on first page
        if page == 1:
            total_count = search_result.get('SearchResultCountAll', 0)
            print(f"Total jobs available: {total_count}")
        
        print(f"Page {page}: fetched {len(items)} jobs (total so far: {len(all_jobs)})")
        
        page += 1
        time.sleep(0.5)  # Be nice to the API
        
    except Exception as e:
        print(f"Error on page {page}: {e}")
        break

print(f"\nTotal jobs fetched: {len(all_jobs)}")

## 3. Convert to DataFrame and Analyze

In [None]:
# Create DataFrame from flattened jobs
df = pd.DataFrame(flattened_jobs)

print(f"DataFrame created with {len(df)} jobs")
print(f"\nColumns: {', '.join(df.columns)}")

# Display first few jobs
print("\nFirst 5 jobs:")
df[['usajobs_control_number', 'position_title', 'department_name']].head()

## 4. Search for Target Control Numbers

In [None]:
# Search for each target control number
print("SEARCH RESULTS:")
print("=" * 50)

for cn in TARGET_CONTROL_NUMBERS:
    matches = df[df['usajobs_control_number'] == cn]
    
    if not matches.empty:
        print(f"\n✅ FOUND Control Number: {cn}")
        for idx, row in matches.iterrows():
            print(f"   Title: {row['position_title']}")
            print(f"   Agency: {row['department_name']}")
            print(f"   Location: {row['position_location_display']}")
            print(f"   Grade: {row['low_grade']}-{row['high_grade']}")
            print(f"   Open Date: {row['position_start_date']}")
            print(f"   Close Date: {row['position_end_date']}")
    else:
        print(f"\n❌ NOT FOUND Control Number: {cn}")

## 5. Show All Control Numbers in Series 0560

In [None]:
# Display all control numbers found
all_control_numbers = sorted(df['usajobs_control_number'].unique())

print(f"All {len(all_control_numbers)} control numbers in series 0560:")
print("=" * 50)

for i, cn in enumerate(all_control_numbers, 1):
    print(f"{i:3d}. {cn}")

## 6. Analysis: Where Do Missing Numbers Fall?

In [None]:
# Convert control numbers to integers for analysis
control_nums_int = [int(x) for x in all_control_numbers]

print("NUMERICAL ANALYSIS:")
print("=" * 50)
print(f"\nRange of control numbers in series 0560:")
print(f"  Minimum: {min(control_nums_int):,}")
print(f"  Maximum: {max(control_nums_int):,}")

# Check where our target numbers would fall
for cn in TARGET_CONTROL_NUMBERS:
    cn_int = int(cn)
    
    if cn in all_control_numbers:
        position = all_control_numbers.index(cn) + 1
        print(f"\n{cn}: FOUND at position {position} of {len(all_control_numbers)}")
    else:
        # Find where it would be inserted
        smaller = [x for x in control_nums_int if x < cn_int]
        larger = [x for x in control_nums_int if x > cn_int]
        
        print(f"\n{cn}: NOT FOUND")
        if smaller:
            print(f"  Would fall after: {max(smaller):,}")
        if larger:
            print(f"  Would fall before: {min(larger):,}")
        
        # Find closest number
        closest = min(control_nums_int, key=lambda x: abs(x - cn_int))
        print(f"  Closest number: {closest:,} (difference: {abs(closest - cn_int):,})")

## 7. Check If Numbers Exist in Other Series

Let's search for these control numbers across all occupational series to see if they exist elsewhere.

In [None]:
# Search for the missing control number across all series
missing_number = '845022800'  # The one not found in 0560

print(f"Searching for {missing_number} across all occupational series...")
print("=" * 50)

# Search by the control number directly
search_params = {
    'Keyword': missing_number,
    'ResultsPerPage': 10
}

try:
    response = requests.get(BASE_URL, headers=headers, params=search_params, timeout=120)
    response.raise_for_status()
    data = response.json()
    
    search_result = data.get('SearchResult', {})
    total_found = search_result.get('SearchResultCountAll', 0)
    items = search_result.get('SearchResultItems', [])
    
    print(f"\nResults found: {total_found}")
    
    if items:
        for item in items:
            job = item.get('MatchedObjectDescriptor', {})
            uri = job.get('PositionURI', '')
            job_id = uri.split('/job/')[-1] if '/job/' in uri else 'Unknown'
            
            if job_id == missing_number:
                print(f"\n✅ FOUND {missing_number}!")
                print(f"  Title: {job.get('PositionTitle', 'N/A')}")
                print(f"  Agency: {job.get('DepartmentName', 'N/A')}")
                print(f"  Series: {job.get('JobCategory', [{}])[0].get('Code', 'Unknown') if job.get('JobCategory') else 'Unknown'}")
                print(f"  URI: {uri}")
    else:
        print(f"\n❌ Control number {missing_number} not found in any current job postings")
        
except Exception as e:
    print(f"Error searching: {e}")

## 8. Summary and Conclusions

In [None]:
print("INVESTIGATION SUMMARY")
print("=" * 50)

print(f"\n1. Total current jobs in series 0560: {len(df)}")

print(f"\n2. Target control numbers:")
for cn in TARGET_CONTROL_NUMBERS:
    status = "FOUND" if cn in all_control_numbers else "NOT FOUND"
    print(f"   - {cn}: {status}")

print(f"\n3. Possible reasons for missing control numbers:")
print("   - Job belongs to a different occupational series")
print("   - Job posting has been closed/expired")
print("   - Job was cancelled before posting")
print("   - Control number is from a different time period")
print("   - Job is in a different API endpoint (e.g., archived jobs)")

# Save results for reference
df.to_csv('0560_series_investigation.csv', index=False)
print(f"\n4. Results saved to: 0560_series_investigation.csv")