# Companies House Public Data API Project Using PySpark

In [64]:
import requests
import json
import time
import csv
import os

In [65]:
def load_api_key(filepath="config.txt"):
    with open(filepath, "r") as file:
        for line in file:
            if line.startswith("API_KEY="):
                return line.strip().split("=")[1]
    raise ValueError("API_KEY not found in config file.")

In [66]:
# load API key from config file
API_KEY = load_api_key()
BASE_URL = "https://api.company-information.service.gov.uk/company/"
HEADERS = {"Accept": "application/json"}

In [67]:
# example list of company numbers
COMPANY_NUMBERS = [
    "09277122",  # The East India Company
    "01691920",  # ROYAL BANK OF SCOTLAND GROUP PLC
    "SC117120",  # SCOTTISH POWER UK PLC
    "00445790",  # TESCO PLC
    "01026167",  # BARCLAYS BANK PLC
    "04006623",  # ASOS PLC
    "00014259",  # HSBC BANK PLC
    "03261722",  # SAINSBURY'S SUPERMARKETS LTD
    "01800000",  # BRITISH TELECOMMUNICATIONS PUBLIC LIMITED COMPANY
    "00102498",  # BP P.L.C.
    "03034606",  # EASYJET AIRLINE COMPANY LIMITED
    "01003142",  # ROLLS-ROYCE PLC
    "00041424",  # UNILEVER PLC
    "00612172",  # OXFAM
    "01915767",  # CINEWORLD CINEMAS LIMITED
    "04031152",  # NATIONAL GRID PLC
    "00002065",  # LLOYDS BANK PLC
    "02906991",  # SKY UK LIMITED
    "01107406",  # ICELAND FOODS LIMITED
    "03279730",  # SAINSBURY’S BANK PLC
]

In [68]:
OUTPUT_FILE = "company_profiles.csv"
DELAY_BETWEEN_REQUESTS = 1

In [69]:
API_KEY

'21045d38-a79b-4c00-9adf-5d1080d02ae9'

In [70]:
# define fetch_company_profile function
def fetch_company_profile(company_number):
    url = BASE_URL + company_number
    try:
        response = requests.get(url, auth=(API_KEY, ""), headers=HEADERS)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"[{response.status_code}] Failed to fetch {company_number}")
            return None
    except Exception as e:
        print(f"[ERROR] {company_number}: {str(e)}")
        return None

In [71]:
def main():
    print(f"Fetching data for {len(COMPANY_NUMBERS)} companies...")
    results = []

    for number in COMPANY_NUMBERS:
        data = fetch_company_profile(number)
        if data:
            results.append({
                "company_number": data.get("company_number"),
                "company_name": data.get("company_name"),
                "status": data.get("company_status"),
                "date_of_creation": data.get("date_of_creation"),
                "type": data.get("type"),
                "jurisdiction": data.get("jurisdiction"),
                "registered_office_address": json.dumps(data.get("registered_office_address", {})),
                "sic_codes": ",".join(data.get("sic_codes", []))
            })
        time.sleep(DELAY_BETWEEN_REQUESTS)

        # save as csv
        keys = results[0].keys() if results else []
        with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            writer.writeheader()
            writer.writerows(results)

        print(f"Saved {len(results)} company profiles to {OUTPUT_FILE}")

if __name__ == "__main__":
    main()

Fetching data for 20 companies...
Saved 1 company profiles to company_profiles.csv
Saved 2 company profiles to company_profiles.csv
Saved 3 company profiles to company_profiles.csv
Saved 4 company profiles to company_profiles.csv
Saved 5 company profiles to company_profiles.csv
Saved 6 company profiles to company_profiles.csv
Saved 7 company profiles to company_profiles.csv
Saved 8 company profiles to company_profiles.csv
Saved 9 company profiles to company_profiles.csv
Saved 10 company profiles to company_profiles.csv
Saved 11 company profiles to company_profiles.csv
Saved 12 company profiles to company_profiles.csv
Saved 13 company profiles to company_profiles.csv
Saved 14 company profiles to company_profiles.csv
Saved 15 company profiles to company_profiles.csv
Saved 16 company profiles to company_profiles.csv
Saved 17 company profiles to company_profiles.csv
Saved 18 company profiles to company_profiles.csv
Saved 19 company profiles to company_profiles.csv
Saved 20 company profiles