# Opencorporates code snippets

The aim of this notebook is to create some code snippets to query the opencorporates database. These snippets can be reused in other code, or on other platforms such as recon-ng or maltego. 

Opencorporates does allow to query the API without a key up to approximately 100 times a day. They provide free keys for some open source projects. You can find the API reference of opencorporates [here](https://api.opencorporates.com/documentation/API-Reference) and more on API keys [here](https://opencorporates.com/api_accounts/new)

In [None]:
import requests
import pprint

First define a basic search function with pagination

In [None]:
def search_company(company_name, jurisdiction=None):
    
    '''search company by name and jurisdiction 
    and returns a list of companies'''
    
    search_endpoint = 'https://api.opencorporates.com/v0.4/companies/search?q=' 

    if jurisdiction != None:
        query = search_endpoint + company_name + '&jurisdiction_code=' + jurisdiction
    
    else:
        query = search_endpoint + company_name
        
    result = requests.get(query).json()
    print(result)
    companies = []
    
    for company in result['results']['companies']:
        companies.append(company)
    
    total_pages = result['results'].get('total_pages')
    count = 2
    
    if total_pages > 1:
        while count <= total_pages:
            query2 = query + '&page=' + str(count)
            result = requests.get(query2).json()
            for company in result['results']['companies']:
                companies.append(company)

            count +=1
            
    return companies

see if it works...

In [None]:
nl = search_company('fox-it', jurisdiction='nl')
world = search_company('fox-it')

In [None]:
count = 0
for company in nl:
    count +=1
print('found ' + str(count) + ' companies')

pprint.pprint(nl)

In [None]:
count = 0
for company in world:
    count +=1
print('found ' + str(count) + ' companies')

pprint.pprint(world)

Parse the data and extract relevant information, such as name, company_number, company_type, current_status, incorporation_date, dissolution_date, inactive, jurisdiction_code, opencorporates_url, previous_names, street_address, postal_code, country, address_in_full, source_url, source.

In [None]:
def parse_company_data(result):
    
    '''parses results of company search
    and returns list of tuples'''
    
    for company in result:
        name = company['company'].get('name', 'unknown')
        company_number = company['company'].get('company_number', 'unknown')
        company_type = company['company'].get('company_type', 'unknown')
        current_status = company['company'].get('current_status', 'unknown')
        incorporation_date = company['company'].get('incorporation_date', 'unknown')
        dissolution_date = company['company'].get('dissolution_date', 'still active')
        inactive = company['company'].get('inactive', 'unknown')
        jurisdiction_code = company['company'].get('jurisdiction_code', 'unknown.')
        opencorporates_url = company['company'].get('opencorporates_url', 'unknown')
        previous_names = company['company'].get('previous_names', 'no previous names')
        try:
            street_address = company['company']['registered_address'].get('street_address', 'unknown')
        except:
            street_address = 'unknown'
        try:
            postal_code = company['company']['registered_address'].get('postal_code', 'unknown')
        except:
            postal_code = 'unknown'
        try:
            country = company['company']['registered_address'].get('country', 'unknown')
        except:
            country = 'unknown'
        
        address_in_full = company['company'].get('registered_address_in_full', 'unknown')
        source_url = company['company'].get('registry_url', 'unknown')
        source = company['company']['source'].get('publisher', 'unknown')

        yield name, company_number, company_type, current_status, \
            incorporation_date, dissolution_date, inactive, jurisdiction_code, \
            opencorporates_url, previous_names, street_address, postal_code, \
            country, address_in_full, source_url, source
            

In [None]:
companies = list(parse_company_data(nl))

In [None]:
for result in companies:
    name, company_number, company_type, current_status, \
    incorporation_date, dissolution_date, inactive, jurisdiction_code, \
    opencorporates_url, previous_names, street_address, postal_code, \
    country, address_in_full, source_url, source = result
    print(name, company_number, jurisdiction_code, inactive)