In [2]:
import pandas as pd
from dotenv import load_dotenv
import os
import requests
import random

In [3]:
endpoint = "https://api.opencorporates.com/"
load_dotenv()
api_key = os.getenv("OPENCORPORATES_API_KEY")
# Api endpoints
# GET companies/:jurisdiction_code/:company_number
# https://api.opencorporates.com/v0.4/companies/gb/00102498?sparse=true

# companies/search

Example code from Gemini

In [4]:
def get_random_companies(state, api_key, num_companies=10):
  """
  Downloads attributes for a random subset of active companies in a given US state
  and returns the data in a pandas DataFrame.

  Args:
    state: The US state abbreviation (e.g., "CA" for California).
    api_key: Your OpenCorporates API key.
    num_companies: The number of random companies to retrieve.

  Returns:
    A pandas DataFrame containing the company data.
  """

  # Search for active companies in the specified state
  search_url = f"https://api.opencorporates.com/v0.4/companies/search"
  params = {
      "q": "*",  # Search for all companies
      "jurisdiction_code": f"us_{state.lower()}",
      "current_status": "Active",  # Filter for active companies
      "api_token": api_key,
      "per_page": 100  # Get maximum results per page
  }
  response = requests.get(search_url, params=params)
  response.raise_for_status()  # Raise an exception for bad status codes
  search_data = response.json()

  total_companies = search_data["results"]["total_count"]
  if total_companies == 0:
    return pd.DataFrame()  # Return an empty DataFrame if no companies found

  # Generate random indices to select a subset of companies
  random_indices = random.sample(range(total_companies), min(num_companies, total_companies))

  companies = []
  current_page = 1
  companies_retrieved = 0

  # Iterate through pages and retrieve company details
  while companies_retrieved < len(random_indices):
    params["page"] = current_page
    response = requests.get(search_url, params=params)
    response.raise_for_status()
    search_data = response.json()

    for i, company_data in enumerate(search_data["results"]["companies"]):
      global_index = (current_page - 1) * 100 + i
      if global_index in random_indices:
        company_number = company_data["company"]["company_number"]
        company_url = f"https://api.opencorporates.com/v0.4/companies/us_{state.lower()}/{company_number}"
        params = {"api_token": api_key}
        response = requests.get(company_url, params=params)
        response.raise_for_status()
        company_details = response.json()["results"]["company"]
        companies.append(company_details)
        companies_retrieved += 1

    current_page += 1

  # Create a pandas DataFrame from the list of company dictionaries
  df = pd.DataFrame(companies)
  return df


# Example usage:
state = "CA"  # Replace with the desired state abbreviation
df = get_random_companies(state, api_key, num_companies=5)

print(df)

KeyboardInterrupt: 

In [5]:
df

NameError: name 'df' is not defined

In [6]:
# Search for active companies in the specified state
search_url = f"https://api.opencorporates.com/v0.4/companies/search"
params = {
    "q": "*",  # Search for all companies
    "jurisdiction_code": f"us_ga",
    "current_status": "Active",  # Filter for active companies
    "api_token": api_key,
    "per_page": 100  # Get maximum results per page
}
response = requests.get(search_url, params=params)
response.raise_for_status()  # Raise an exception for bad status codes
search_data = response.json()

total_companies = search_data["results"]["total_count"]

In [8]:
search_data

{'api_version': '0.4',
 'results': {'companies': [{'company': {'name': '10X Health Franchising LLC',
     'company_number': 'CTS-24077864',
     'jurisdiction_code': 'us_ga',
     'incorporation_date': '2024-04-16',
     'dissolution_date': None,
     'company_type': 'Consent to Service',
     'registry_url': None,
     'branch': None,
     'branch_status': None,
     'inactive': False,
     'current_status': 'Active',
     'created_at': '2024-05-01T12:39:58+00:00',
     'updated_at': '2024-09-29T13:09:33+00:00',
     'retrieved_at': '2024-09-25T00:00:00+00:00',
     'opencorporates_url': 'https://opencorporates.com/companies/us_ga/CTS-24077864',
     'previous_names': [],
     'alternative_names': [],
     'source': {'publisher': 'Georgia Secretary of State',
      'url': None,
      'retrieved_at': '2024-09-25T00:00:00+00:00'},
     'registered_address': {'street_address': '2920 NE 207th Street, 901',
      'locality': 'Miami',
      'region': 'FL',
      'postal_code': '33180',
    