In [3]:
import requests
import pandas as pd
from pathlib import Path

def fetch_and_save_asylum_applications(output_file: str, year_from: int = 2010, year_to: int = 2024):
    """
    Fetches asylum application data from the UNHCR API and saves it as a CSV file.

    :param output_file: Path where the CSV file will be saved.
    :param year_from: Start year for the data range.
    :param year_to: End year for the data range.
    """
    # API URL
    url = "https://api.unhcr.org/population/v1/asylum-applications/"

    # Request parameters
    params = {
        "yearFrom": year_from,
        "yearTo": year_to,
        "coo_all": "true",  # Include all countries of origin
        "coa_all": "true",  # Include all countries of asylum
        "cf_type": "ISO",  # Use ISO codes
        "limit": 100,  # Number of results per page
        "page": 1  # Initial page
    }

    # Headers to specify the response format
    headers = {"Accept": "application/json"}

    # List to store data from all pages
    all_data = []

    # Fetch data from the API, page by page
    while True:
        try:
            # Make a GET request to the API
            response = requests.get(url, params=params, headers=headers)
            response.raise_for_status()  # Raise an exception for HTTP errors

            # Parse the response as JSON
            response_json = response.json()

            # Extract the "items" field containing the actual results
            page_data = response_json.get("items", [])

            if not page_data:
                break  # Exit if no data is found on the current page

            # Flatten the nested JSON structure into a DataFrame
            page_df = pd.json_normalize(page_data)

            # Append the page's DataFrame to the list
            all_data.append(page_df)

            # Get the current and total number of pages
            current_page = params["page"]
            total_pages = response_json.get("maxPages", 1)

            # Exit the loop if all pages have been processed
            if current_page >= total_pages:
                break

            # Increment the page number for the next request
            params["page"] += 1

        except requests.RequestException as e:
            print(f"Request error: {e}")
            break

    # Combine all DataFrames into one
    if all_data:
        final_data = pd.concat(all_data, ignore_index=True)

        # Save the combined DataFrame to a CSV file
        output_path = Path(output_file)
        output_path.parent.mkdir(parents=True, exist_ok=True)  # Create directories if they don't exist
        final_data.to_csv(output_file, index=False)

        print(f"Data successfully saved to: {output_file}")
    else:
        print("No data found for the specified range.")

# Usage example
if __name__ == "__main__":
    fetch_and_save_asylum_applications(
        output_file=r"rutaparaelarchivo.csv",
        year_from=2010,
        year_to=2024
    )




Unnamed: 0,year,coo_id,coo_name,coo,coo_iso,coa_id,coa_name,coa,coa_iso,procedure_type,app_type,dec_level,app_pc,applied
0,2024,2,Afghanistan,AFG,AFG,8,Egypt,ARE,EGY,U,N,FI,P,26
1,2024,2,Afghanistan,AFG,AFG,10,Armenia,ARM,ARM,G,N,FI,P,6
2,2024,2,Afghanistan,AFG,AFG,11,Australia,AUL,AUS,G,A,AR,P,15
3,2024,2,Afghanistan,AFG,AFG,11,Australia,AUL,AUS,G,N,FI,P,284
4,2024,2,Afghanistan,AFG,AFG,12,Austria,AUS,AUT,G,N,FA,P,1365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4878,2024,214,Zimbabwe,ZIM,ZWE,159,South Africa,RSA,ZAF,G,NR,FA,P,89
4879,2024,214,Zimbabwe,ZIM,ZWE,183,Sweden,SWE,SWE,G,N,FI,P,5
4880,2024,214,Zimbabwe,ZIM,ZWE,202,United States of America,USA,USA,G,N,EO,P,13
4881,2024,214,Zimbabwe,ZIM,ZWE,202,United States of America,USA,USA,G,N,IN,P,628
