# Libraries

In [1]:
import requests
import pandas as pd
import json
from typing import Optional, Dict, Any, List

# Data extraction

## API setup

In [2]:
class LongBeachAnimalShelterAPI:
    """
    A Python client for extracting data from the Long Beach Animal Shelter API
    using the Opendatasoft Explore API v2.1
    """
    
    def __init__(self):
        self.base_url = "https://longbeach.opendatasoft.com/api/explore/v2.1"
        self.dataset_id = "animal-shelter-intakes-and-outcomes"
        
    def get_all_records(self, 
                       select: Optional[str] = None,
                       where: Optional[str] = None,
                       order_by: Optional[str] = None) -> pd.DataFrame:
        """
        Extract all records from the animal shelter dataset without limits.
        
        Args:
            select: Fields to select (default: all fields)
            where: Filter conditions using ODSQL syntax
            order_by: Order by clause
            
        Returns:
            pandas.DataFrame with all records
        """
        
        all_records = []
        offset = 0
        limit = 100  # Maximum allowed per request
        
        while True:
            # Build API request
            url = f"{self.base_url}/catalog/datasets/{self.dataset_id}/records"
            
            params = {
                'limit': limit,
                'offset': offset
            }
            
            # Add optional parameters
            if select:
                params['select'] = select
            if where:
                params['where'] = where
            if order_by:
                params['order_by'] = order_by
                
            try:
                response = requests.get(url, params=params)
                response.raise_for_status()
                
                data = response.json()
                
                # Extract records from response
                records = data.get('results', [])
                
                if not records:
                    break
                    
                all_records.extend(records)
                
                # Check if we've retrieved all records
                total_count = data.get('total_count', 0)
                if len(all_records) >= total_count:
                    break
                    
                # Move to next batch
                offset += limit
                
                print(f"Retrieved {len(all_records)} of {total_count} records...")
                
            except requests.RequestException as e:
                print(f"Error fetching data: {e}")
                break
                
        # Convert to DataFrame
        df = pd.DataFrame(all_records)
        print(f"Successfully retrieved {len(df)} total records")
        
        return df
    
    def get_dataset_info(self) -> Dict[str, Any]:
        """
        Get metadata about the dataset including field information.
        
        Returns:
            Dictionary with dataset metadata
        """
        url = f"{self.base_url}/catalog/datasets/{self.dataset_id}"
        
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.json()
            
        except requests.RequestException as e:
            print(f"Error fetching dataset info: {e}")
            return {}
    
    def export_to_csv(self, 
                     filename: str = "animal_shelter_data.csv",
                     select: Optional[str] = None,
                     where: Optional[str] = None) -> bool:
        """
        Export data directly to CSV using the API's export endpoint.
        
        Args:
            filename: Output CSV filename
            select: Fields to select
            where: Filter conditions
            
        Returns:
            True if successful, False otherwise
        """
        url = f"{self.base_url}/catalog/datasets/{self.dataset_id}/exports/csv"
        
        params = {'limit': -1}  # -1 means no limit for exports
        
        if select:
            params['select'] = select
        if where:
            params['where'] = where
            
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            
            with open(filename, 'wb') as f:
                f.write(response.content)
                
            print(f"Data exported to {filename}")
            return True
            
        except requests.RequestException as e:
            print(f"Error exporting data: {e}")
            return False

## Run

In [3]:
if __name__ == "__main__":
    
    # Initialize the API client
    api = LongBeachAnimalShelterAPI()
    
    # Example 1: Get all records
    print("Fetching all animal shelter records...")
    df = api.get_all_records()
    print(f"Retrieved {len(df)} records")
    print(f"Columns: {list(df.columns)}")
    
    
    # Example 5: Export directly to CSV
    print("\nExporting all data to CSV...")
    api.export_to_csv("longbeach_animal_shelter_complete.csv")
    
    
    # Display sample data
    if not df.empty:
        print("\nSample data:")
        print(df.head())

Fetching all animal shelter records...
Retrieved 100 of 32487 records...
Retrieved 200 of 32487 records...
Retrieved 300 of 32487 records...
Retrieved 400 of 32487 records...
Retrieved 500 of 32487 records...
Retrieved 600 of 32487 records...
Retrieved 700 of 32487 records...
Retrieved 800 of 32487 records...
Retrieved 900 of 32487 records...
Retrieved 1000 of 32487 records...
Retrieved 1100 of 32487 records...
Retrieved 1200 of 32487 records...
Retrieved 1300 of 32487 records...
Retrieved 1400 of 32487 records...
Retrieved 1500 of 32487 records...
Retrieved 1600 of 32487 records...
Retrieved 1700 of 32487 records...
Retrieved 1800 of 32487 records...
Retrieved 1900 of 32487 records...
Retrieved 2000 of 32487 records...
Retrieved 2100 of 32487 records...
Retrieved 2200 of 32487 records...
Retrieved 2300 of 32487 records...
Retrieved 2400 of 32487 records...
Retrieved 2500 of 32487 records...
Retrieved 2600 of 32487 records...
Retrieved 2700 of 32487 records...
Retrieved 2800 of 32487 r

In [8]:
df = pd.read_csv("longbeach_animal_shelter_complete.csv", sep=None, engine='python')

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32487 entries, 0 to 32486
Data columns (total 23 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ﻿animal_id         32487 non-null  object 
 1   animal_name        19089 non-null  object 
 2   animal_type        32487 non-null  object 
 3   primary_color      32487 non-null  object 
 4   secondary_color    15487 non-null  object 
 5   sex                32487 non-null  object 
 6   dob                28391 non-null  object 
 7   intake_date        32487 non-null  object 
 8   intake_cond        32487 non-null  object 
 9   intake_type        32487 non-null  object 
 10  intake_subtype     32069 non-null  object 
 11  reason             2150 non-null   object 
 12  outcome_date       32211 non-null  object 
 13  crossing           32487 non-null  object 
 14  jurisdiction       32486 non-null  object 
 15  outcome_type       32202 non-null  object 
 16  outcome_subtype    287