# pycancensus Testing Notebook

This notebook is for testing the basic functionality of the pycancensus package.

## Setup

First, install the package in development mode:
```bash
cd /path/to/pycancensus
pip install -e .
```

Or install dependencies manually:
```bash
pip install requests pandas geopandas
```

In [None]:
# Add the package to Python path if not installed
import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), '..'))

import pycancensus as pc
import pandas as pd
import geopandas as gpd

print(f"pycancensus version: {pc.__version__}")

## 1. Set API Key

Get a free API key at: https://censusmapper.ca/users/sign_up

In [None]:
# Set your API key here
# pc.set_api_key("your_api_key_here")

# Or set as environment variable
# import os
# os.environ['CANCENSUS_API_KEY'] = 'your_api_key_here'

# Check if API key is set
api_key = pc.get_api_key()
if api_key:
    print(f"API key is set: {api_key[:8]}...")
else:
    print("⚠️  No API key set. Please set one to test API functions.")

## 2. Test Basic Functions (No API Required)

In [None]:
# Test utility functions
from pycancensus.utils import validate_dataset, validate_level, process_regions

print("Testing utility functions:")
print(f"validate_dataset('ca16'): {validate_dataset('ca16')}")
print(f"validate_level('CMA'): {validate_level('CMA')}")
print(f"process_regions({{'CMA': '59933'}}): {process_regions({'CMA': '59933'})}")

In [None]:
# Test cache functions
from pycancensus.cache import cache_data, get_cached_data, list_cache

print("Testing cache functions:")

# Test caching a simple DataFrame
test_df = pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})
cache_data('test_key', test_df)

# Retrieve from cache
cached_df = get_cached_data('test_key')
print(f"Cached data retrieved successfully: {cached_df is not None}")
if cached_df is not None:
    print(cached_df)

# List cache
cache_list = list_cache()
print(f"\nCache entries: {len(cache_list)}")
if len(cache_list) > 0:
    print(cache_list)

## 3. Test API Functions (API Key Required)

**Note:** These tests require a valid API key. Skip if you don't have one yet.

In [None]:
# Test if we can make API calls
has_api_key = pc.get_api_key() is not None
print(f"Can test API functions: {has_api_key}")

if not has_api_key:
    print("\n⚠️  Skipping API tests. Set API key to test these functions.")
    print("   Get a free key at: https://censusmapper.ca/users/sign_up")
    print("   Then run: pc.set_api_key('your_key_here')")

In [None]:
# Test listing datasets
if has_api_key:
    try:
        print("Testing list_census_datasets()...")
        datasets = pc.list_census_datasets()
        print(f"✅ Success! Found {len(datasets)} datasets")
        print(datasets.head())
    except Exception as e:
        print(f"❌ Error: {e}")
else:
    print("⏭️  Skipping dataset test (no API key)")

In [None]:
# Test listing regions
if has_api_key:
    try:
        print("Testing list_census_regions('CA16')...")
        regions = pc.list_census_regions('CA16')
        print(f"✅ Success! Found {len(regions)} regions")
        print(regions.head())
    except Exception as e:
        print(f"❌ Error: {e}")
else:
    print("⏭️  Skipping regions test (no API key)")

In [None]:
# Test searching regions
if has_api_key:
    try:
        print("Testing search_census_regions('Vancouver', 'CA16')...")
        vancouver_regions = pc.search_census_regions('Vancouver', 'CA16')
        print(f"✅ Success! Found {len(vancouver_regions)} Vancouver regions")
        if len(vancouver_regions) > 0:
            print(vancouver_regions[['region', 'name', 'level']].head())
    except Exception as e:
        print(f"❌ Error: {e}")
else:
    print("⏭️  Skipping region search test (no API key)")

In [None]:
# Test listing vectors
if has_api_key:
    try:
        print("Testing list_census_vectors('CA16')...")
        vectors = pc.list_census_vectors('CA16')
        print(f"✅ Success! Found {len(vectors)} vectors")
        print(vectors.head())
    except Exception as e:
        print(f"❌ Error: {e}")
else:
    print("⏭️  Skipping vectors test (no API key)")

In [None]:
# Test searching vectors
if has_api_key:
    try:
        print("Testing search_census_vectors('population', 'CA16')...")
        pop_vectors = pc.search_census_vectors('population', 'CA16')
        print(f"✅ Success! Found {len(pop_vectors)} population vectors")
        if len(pop_vectors) > 0:
            print(pop_vectors[['vector', 'label', 'type']].head())
    except Exception as e:
        print(f"❌ Error: {e}")
else:
    print("⏭️  Skipping vector search test (no API key)")

## 4. Test Data Retrieval (Advanced API Tests)

In [None]:
# Test getting census data (tabular)
if has_api_key:
    try:
        print("Testing get_census() for Vancouver CMA...")
        data = pc.get_census(
            dataset='CA16',
            regions={'CMA': '59933'},  # Vancouver CMA
            vectors=['v_CA16_1', 'v_CA16_2'],  # Population vectors
            level='CSD'
        )
        print(f"✅ Success! Retrieved data shape: {data.shape}")
        print(f"Columns: {list(data.columns)}")
        print(data.head())
    except Exception as e:
        print(f"❌ Error: {e}")
        print("This might be due to API endpoint differences - we may need to adjust the implementation")
else:
    print("⏭️  Skipping data retrieval test (no API key)")

In [None]:
# Test getting census data with geometry
if has_api_key:
    try:
        print("Testing get_census() with geometry...")
        geo_data = pc.get_census(
            dataset='CA16',
            regions={'CMA': '59933'},  # Vancouver CMA
            vectors=['v_CA16_1'],
            level='CSD',
            geo_format='geopandas'
        )
        print(f"✅ Success! Retrieved GeoDataFrame shape: {geo_data.shape}")
        print(f"Columns: {list(geo_data.columns)}")
        print(f"CRS: {geo_data.crs}")
        print(f"Geometry type: {geo_data.geometry.geom_type.iloc[0] if len(geo_data) > 0 else 'N/A'}")
    except Exception as e:
        print(f"❌ Error: {e}")
        print("This might be due to API endpoint differences - we may need to adjust the implementation")
else:
    print("⏭️  Skipping geometry test (no API key)")

In [None]:
# Test getting geometries only
if has_api_key:
    try:
        print("Testing get_census_geometry()...")
        geometries = pc.get_census_geometry(
            dataset='CA16',
            regions={'CMA': '59933'},
            level='CSD'
        )
        print(f"✅ Success! Retrieved geometries shape: {geometries.shape}")
        print(f"Columns: {list(geometries.columns)}")
        print(f"CRS: {geometries.crs}")
    except Exception as e:
        print(f"❌ Error: {e}")
        print("This might be due to API endpoint differences - we may need to adjust the implementation")
else:
    print("⏭️  Skipping geometry-only test (no API key)")

## 5. Test Error Handling

In [None]:
# Test error handling with invalid inputs
print("Testing error handling...")

# Test invalid dataset
try:
    from pycancensus.utils import validate_dataset
    validate_dataset('invalid')
    print("❌ Should have raised error for invalid dataset")
except ValueError as e:
    print(f"✅ Correctly caught invalid dataset: {e}")

# Test invalid level
try:
    from pycancensus.utils import validate_level
    validate_level('invalid')
    print("❌ Should have raised error for invalid level")
except ValueError as e:
    print(f"✅ Correctly caught invalid level: {e}")

# Test invalid regions
try:
    from pycancensus.utils import process_regions
    process_regions({})
    print("❌ Should have raised error for empty regions")
except ValueError as e:
    print(f"✅ Correctly caught empty regions: {e}")

## 6. Performance and Caching Tests

In [None]:
# Test caching performance
if has_api_key:
    import time
    
    print("Testing caching performance...")
    
    try:
        # First call (should hit API)
        start_time = time.time()
        regions1 = pc.list_census_regions('CA16', use_cache=True, quiet=True)
        first_call_time = time.time() - start_time
        
        # Second call (should use cache)
        start_time = time.time()
        regions2 = pc.list_census_regions('CA16', use_cache=True, quiet=True)
        second_call_time = time.time() - start_time
        
        print(f"First call (API): {first_call_time:.2f}s")
        print(f"Second call (cache): {second_call_time:.2f}s")
        print(f"Speedup: {first_call_time/second_call_time:.1f}x")
        print(f"Data identical: {regions1.equals(regions2)}")
        
    except Exception as e:
        print(f"❌ Error in caching test: {e}")
else:
    print("⏭️  Skipping caching performance test (no API key)")

## 7. Summary and Next Steps

Use this section to note any issues found and improvements needed.

In [None]:
print("🧪 Testing Summary")
print("=" * 50)

if has_api_key:
    print("✅ API key configured")
    print("✅ Ready for full testing")
    print("\n📝 Notes:")
    print("- Test all functions with various parameters")
    print("- Check API response formats match expectations")
    print("- Verify error handling works correctly")
    print("- Test with different datasets (CA16, CA21, etc.)")
    print("- Test with different geographic levels")
    print("- Verify geometry data works correctly")
else:
    print("⚠️  API key not configured")
    print("✅ Basic functionality tests passed")
    print("\n📝 Next steps:")
    print("1. Get API key from https://censusmapper.ca/users/sign_up")
    print("2. Set API key: pc.set_api_key('your_key')")
    print("3. Re-run notebook for full testing")

print("\n🔧 Potential improvements:")
print("- Add more robust error handling")
print("- Implement retry logic for API calls")
print("- Add data validation for API responses")
print("- Add progress bars for long operations")
print("- Add examples for common use cases")
print("- Add visualization helpers")