# Investigation: Missing Monster Questionnaire URLs in USAJobs API

This notebook investigates why certain jobs (like control number 845022800) don't have Monster questionnaire URLs in the USAJobs API response, even though they use Monster for applications.

## Setup

In [None]:
import requests
import json
import pandas as pd
import os
from dotenv import load_dotenv
from datetime import datetime

# Load environment variables
load_dotenv()

# API setup
api_key = os.getenv('USAJOBS_API_TOKEN')
if not api_key:
    raise ValueError('API key required. Set USAJOBS_API_TOKEN environment variable')

headers = {
    'Host': 'data.usajobs.gov',
    'Authorization-Key': api_key
}

print(f"API configured at {datetime.now()}")

## 1. Search for All Series 0560 Jobs

First, let's fetch all current 0560 series jobs and then find our specific job.

In [None]:
# Search for all 0560 series jobs
target_control_number = '844807200'  # The one we're looking for
BASE_URL = 'https://data.usajobs.gov/api/Search'

# Search for series 0560
params = {
    'JobCategoryCode': '0560',
    'ResultsPerPage': 500  # Get all in one request
}

print(f"Searching for all series 0560 jobs...")
response = requests.get(BASE_URL, headers=headers, params=params)

if response.status_code == 200:
    data = response.json()
    count = data['SearchResult']['SearchResultCount']
    items = data['SearchResult']['SearchResultItems']
    
    print(f"\nTotal 0560 jobs found: {count}")
    print(f"Items returned: {len(items)}")
    
    # Look for our specific job
    target_job = None
    
    for job in items:
        descriptor = job['MatchedObjectDescriptor']
        position_uri = descriptor.get('PositionURI', '')
        
        if '/job/' in position_uri:
            job_id = position_uri.split('/job/')[-1]
            if job_id == target_control_number:
                target_job = job
                break
    
    if target_job:
        descriptor = target_job['MatchedObjectDescriptor']
        print(f"\n✅ Found target job {target_control_number}:")
        print(f"  Position ID: {descriptor.get('PositionID')}")
        print(f"  Title: {descriptor.get('PositionTitle')}")
        print(f"  Agency: {descriptor.get('DepartmentName')}")
        print(f"  Location: {descriptor.get('PositionLocationDisplay')}")
        print(f"  Position URI: {descriptor.get('PositionURI')}")
        print(f"  Apply URI: {descriptor.get('ApplyURI')}")
        
        # Save for later analysis
        target_descriptor = descriptor
        target_job_full = target_job
    else:
        print(f"\n❌ Job {target_control_number} not found in 0560 series")
        print("\nFirst few jobs found:")
        for i, job in enumerate(items[:5]):
            desc = job['MatchedObjectDescriptor']
            uri = desc.get('PositionURI', '')
            job_id = uri.split('/job/')[-1] if '/job/' in uri else 'Unknown'
            print(f"  {job_id}: {desc.get('PositionTitle')}")
else:
    print(f"Error: {response.status_code}")

## 2. Display Full API Response

Let's examine the complete API response to see all available fields.

In [None]:
if 'target_descriptor' in locals():
    # Pretty print the full MatchedObjectDescriptor
    print("Full MatchedObjectDescriptor JSON for job 844807200:")
    print("=" * 80)
    print(json.dumps(target_descriptor, indent=2))
else:
    print("Target job not found - cannot display descriptor")

## 3. Check UserArea for Hidden URLs

Sometimes additional URLs are stored in the UserArea section.

In [None]:
if count > 0:
    user_area = descriptor.get('UserArea', {})
    details = user_area.get('Details', {})
    
    print("UserArea.Details content:")
    print("=" * 80)
    print(json.dumps(details, indent=2))
    
    # Search for any URLs in the details
    print("\n" + "=" * 80)
    print("Searching for URLs in UserArea...")
    
    def find_urls(obj, path=''):
        urls_found = []
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, str) and 'http' in v:
                    urls_found.append((f"{path}.{k}", v))
                elif isinstance(v, (dict, list)):
                    urls_found.extend(find_urls(v, f"{path}.{k}"))
        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                urls_found.extend(find_urls(item, f"{path}[{i}]"))
        return urls_found
    
    urls = find_urls(details, 'UserArea.Details')
    
    if urls:
        print("\nURLs found:")
        for path, url in urls:
            print(f"  {path}: {url}")
    else:
        print("\nNo URLs found in UserArea.Details")

## 4. Compare with Jobs That DO Have Monster URLs

Let's find a job that has a Monster URL and compare the structure.

In [None]:
# Load the current jobs data
print("Loading current jobs data to find examples with Monster URLs...")
df = pd.read_parquet('data/current_jobs_2025.parquet')

# Find jobs with Monster URLs
monster_jobs = []
for idx, row in df.head(1000).iterrows():  # Check first 1000 for efficiency
    if 'MatchedObjectDescriptor' in row and pd.notna(row['MatchedObjectDescriptor']):
        if 'monstergovt.com' in str(row['MatchedObjectDescriptor']):
            monster_jobs.append(row)
            if len(monster_jobs) >= 3:  # Get 3 examples
                break

print(f"\nFound {len(monster_jobs)} examples with Monster URLs")

if monster_jobs:
    # Show first example
    example = monster_jobs[0]
    descriptor = json.loads(example['MatchedObjectDescriptor'])
    
    print(f"\nExample job WITH Monster URL:")
    print(f"  Control Number: {example['usajobs_control_number']}")
    print(f"  Title: {example.get('positionTitle', 'N/A')}")
    print(f"  Agency: {example.get('hiringAgencyName', 'N/A')}")
    print(f"  ApplyURI: {descriptor.get('ApplyURI')}")
    
    # Look for the Monster URL
    apply_uris = descriptor.get('ApplyURI', [])
    for uri in apply_uris:
        if 'monstergovt.com' in uri:
            print(f"\n  🎯 Monster URL found: {uri}")

## 5. Direct API Call for Job Details

Let's try to get the job details using different API endpoints or parameters.

In [None]:
# Try searching by Position ID instead
position_id = 'WAS-2025-0025'  # From the earlier search

params = {
    'PositionID': position_id,
    'ResultsPerPage': 10
}

print(f"Searching by Position ID: {position_id}")
response = requests.get(BASE_URL, headers=headers, params=params)

if response.status_code == 200:
    data = response.json()
    count = data['SearchResult']['SearchResultCount']
    print(f"Results found: {count}")
    
    if count > 0:
        # Check ApplyURI in all results
        for i, item in enumerate(data['SearchResult']['SearchResultItems']):
            desc = item['MatchedObjectDescriptor']
            apply_uri = desc.get('ApplyURI', [])
            print(f"\nResult {i+1}:")
            print(f"  Position ID: {desc.get('PositionID')}")
            print(f"  Apply URI: {apply_uri}")

## 6. Analysis: Why Are Monster URLs Missing?

Let's analyze the pattern of missing URLs.

In [None]:
# Check how many jobs have questionnaire URLs
print("Analyzing questionnaire URL patterns in current jobs...")

total_jobs = len(df)
has_questionnaire = 0
has_monster = 0
has_usastaffing = 0
only_usajobs = 0

for idx, row in df.iterrows():
    if 'MatchedObjectDescriptor' in row and pd.notna(row['MatchedObjectDescriptor']):
        try:
            descriptor = json.loads(row['MatchedObjectDescriptor'])
            apply_uris = descriptor.get('ApplyURI', [])
            
            # Convert to list if string
            if isinstance(apply_uris, str):
                apply_uris = [apply_uris]
            
            uri_str = ' '.join(str(uri) for uri in apply_uris)
            
            if 'ViewQuestionnaire' in uri_str or 'previewVacancyQuestions' in uri_str:
                has_questionnaire += 1
                
                if 'monstergovt.com' in uri_str:
                    has_monster += 1
                elif 'usastaffing.gov' in uri_str:
                    has_usastaffing += 1
            elif apply_uris and all('usajobs.gov' in str(uri) for uri in apply_uris):
                only_usajobs += 1
        except:
            pass

print(f"\nResults:")
print(f"Total jobs: {total_jobs:,}")
print(f"Jobs with questionnaire URLs: {has_questionnaire:,} ({has_questionnaire/total_jobs*100:.1f}%)")
print(f"  - Monster questionnaires: {has_monster:,}")
print(f"  - USAStaffing questionnaires: {has_usastaffing:,}")
print(f"Jobs with only USAJobs URL: {only_usajobs:,} ({only_usajobs/total_jobs*100:.1f}%)")
print(f"\nMissing questionnaire URLs: {only_usajobs:,} jobs")

## 7. Conclusion

This investigation shows that the USAJobs API is not providing Monster questionnaire URLs for certain jobs, even though these jobs use Monster for their application process.

Key findings:
1. Job 845022800 only returns `https://www.usajobs.gov:443/job/845022800` in ApplyURI
2. The actual questionnaire URL `https://jobs.monstergovt.com/bopmp/vacancy/previewVacancyQuestions.hms?orgId=2&jnum=178332` is not provided by the API
3. This affects thousands of jobs that only show USAJobs URLs despite using external application systems

This is a limitation of the USAJobs API that prevents complete questionnaire collection.