# Extraction all data from Brønnøysund API into raw data.

In [1]:
# Library imports
import requests
import time
import pickle
import os.path as path

Dokumentasjon:
https://data.brreg.no/enhetsregisteret/api/docs/index.html#enheter-oppslag

In [2]:
# Import the raw data from Mattilsynet to get a list of orgnumbers for the API
# Gets the parent directory, enabeling direct targeting of the desiref file.

parent = path.abspath(path.join("brønnøysund_api.ipynb" ,"../../.."))
with open(f'{parent}/Data/Raw/mattilsynet_rawdata.obj', 'rb') as file:
	list_of_all_reports = pickle.load(file)

In [3]:
print(len(list_of_all_reports))

5897


#### Find all the unique orgnumbers

In [5]:
# Org_num for all unique places in Olso
list_of_all_org_num = [company['orgnummer'] for company in list_of_all_reports]
list_of_unique_org_num = list(set(list_of_all_org_num))
len(list_of_unique_org_num)

1163

##### Now get the company info and classify as sub or main units

In [6]:
def get_unit(org_num, sub_and_main_units):
    """Uses the API to get a unit based on org_num, prints a message if not found.

    Args:
        org_num (string): A company id number
        unit_type (string): Used to search for main or sub company.

    Returns:
        list: Returns a list of sub and main units, if no unit is found a error is printed.
    """
    
    url = "https://data.brreg.no/enhetsregisteret/api/"
    sub = "underenheter/"
    main = "enheter/"

    response_brøn = requests.get(url + sub + org_num)
    if response_brøn.ok:
        sub_and_main_units[0].append(response_brøn.json())
        return sub_and_main_units
    else:
        response_brøn = requests.get(url + main + org_num)
        if response_brøn.ok:
            sub_and_main_units[1].append(response_brøn.json())
            return sub_and_main_units
        else:
            print(f"Could not get org_num: {org_num} not found in the database")

In [7]:
def get_company_info(org_num_list):
    import time
    """Returns a list of sub unit company data

    Args:
        org_num_list (list): list of

    Returns:
        list: Returns a list of lists, one for sub and one for main units.
    """

    #                 [[sub],[main]]
    sub_and_main_units = [[],[]]

    for org_num in org_num_list:

        sub_and_main_units = get_unit(org_num, sub_and_main_units)
        time.sleep(0.005)
    
    return sub_and_main_units

In [8]:
sub_and_main_units = get_company_info(list_of_unique_org_num)
print(f"The lenght of the sub list: {len(sub_and_main_units[0])}")
print(f"The lenght of the main list: {len(sub_and_main_units[1])}")

The lenght of the sub list: 1152
The lenght of the main list: 11


##### Now get the coresponding main units or parent units of the sub units

In [15]:
def get_all_main_units(list_of_sub_units):
    """Takes a list of sub unit data, and returns a list of main unit org num. 

    Args:
        list_of_sub_units (list): list of all sub units

    Returns:
        list: list of all main unit org num
    """

    list_of_main_unit_org_num = []

    for sub_unit in list_of_sub_units:
        try:
            list_of_main_unit_org_num.append(sub_unit['overordnetEnhet'])
        except:
            print(sub_unit)
    
    return list_of_main_unit_org_num


In [17]:
def get_only_unique_main_units(list_of_sub_units):
    """Takes a list of sub unit data, and returns a list of unique main unit org num.

    Args:
        list_of_sub_units (list): list of all sub units

    Returns:
        list: list of all unique main unit org num
    """

    all_main_unit_org_num = get_all_main_units(list_of_sub_units)

    return list(set(all_main_unit_org_num))

In [18]:
list_of_sub_units = sub_and_main_units[0]
unique_parent_unit_orgnumbers = get_only_unique_main_units(list_of_sub_units)
len(unique_parent_unit_orgnumbers)

{'organisasjonsnummer': '911737795', 'navn': 'BAKER NORDBY GUNERIUS AVD 29', 'organisasjonsform': {'kode': 'BEDR', 'beskrivelse': 'Underenhet til næringsdrivende og offentlig forvaltning', '_links': {'self': {'href': 'https://data.brreg.no/enhetsregisteret/api/organisasjonsformer/BEDR'}}}, 'slettedato': '2023-02-09', 'nedleggelsesdato': '2022-12-31', '_links': {'self': {'href': 'https://data.brreg.no/enhetsregisteret/api/underenheter/911737795'}}}


992

The company shown above will not have its parent company included due to lack of information.

#### Final data structure:
A dictionary with the sub_unit stored with the sub_unit key, this is the company generally that each of the reports form Mattilsynet are linked to. However, a small number of reports from Mattilsynet are connected directly to main units. These are stored with the main unit key. Finally there are the unique parent units, these are companies that the sub units are part of. The Mattilsynet reports are not linked to these units directly.

In [19]:
unique_brønnøysund_company_data = {}
unique_brønnøysund_company_data['sub_unit'] = sub_and_main_units[0]
unique_brønnøysund_company_data['main_unit'] = sub_and_main_units[1]
unique_brønnøysund_company_data['parent_unit'] = unique_parent_unit_orgnumbers

In [20]:
print(len(unique_brønnøysund_company_data['sub_unit']))
print(len(unique_brønnøysund_company_data['main_unit']))
print(len(unique_brønnøysund_company_data['parent_unit']))

1152
11
992


##### Store the data as a pickle file in the raw data folder.

In [21]:
# Store the raw data as a pickle file.
# Gets the parent directory location enabeling storing the 
# data directly to the desired folder.
parent = path.abspath(path.join("brønnøysund_api.ipynb" ,"../../.."))
with open(f'{parent}/Data/Raw/unique_brønnøysund_company_data.obj', 'wb') as file:
	pickle.dump(unique_brønnøysund_company_data, file)