In [1]:
import pandas as pd
import csv
from demyst.analytics import Analytics
from collections import Counter
import numpy as np

# Single Sign On / SAML

First, you need to login to the Demyst Console through the SSO link your company provides. Once you are authenticated, click the "API Key" button in the menubar and copy the token into your clipboard. Place the token in the JWT field and your email address in the username field.

In [2]:
analytics = Analytics(username="REPLACE WTIH EMAIL ADDRESS", jwt="REPLACE WTIH KEY")

# Read In Your Input Data File

In [3]:
inputs = pd.read_csv('https://demyst-apis.s3.amazonaws.com/demos/all_reg_no.csv', header = None)
inputs.columns = ['company_number']
inputs['client_id'] =  inputs.index

# List All Of The Relevant Providers For This Use Case

In [4]:
all_providers = ['acuris_business_search', 
                 'companies_house_company_officers',
                 'companies_house_company_profile',
                 'duedil_company_charges', 
                 'duedil_company_group_parents',
                 'duedil_company_group_subsidiaries',
                 'duedil_company_officers',
                 'duedil_company_related_companies',
                 'duedil_company_vitals',
                 'duedil_find_company',
                 'globaldatabase_company_details',
                 'globaldatabase_find_companies',
                 'matchdeck_search',
                 'matchdeck_details',
                 'opencorporates_companies_search',
                 'orb_search',
                 'owler_company_premium',
                 'pipl_premium_search']

# Get Company Name and Address through Companies House

In [5]:
providers = ['companies_house_company_profile', 'companies_house_company_officers']

company_basic = analytics.enrich_and_download(providers, inputs, validate=False)

company_basic.rename(columns = {'companies_house_company_profile.client_id': 'client_id',
                                'inputs.company_number': 'company_number'},inplace = True)
company_basic.head()

Verifying providers...
Starting enrichment...
Uploading data...


This enrichment will use 0.1 credits of the 998063049 credits your organization currently has.


Enrich Job ID: 6169


IntProgress(value=1, max=2)

Label(value='Checking status...')

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,companies_house_company_officers.items[9].occupation,companies_house_company_officers.items[9].officer_role,companies_house_company_officers.items[9].resigned_on,companies_house_company_officers.items_per_page,companies_house_company_officers.kind,companies_house_company_officers.links.self,companies_house_company_officers.resigned_count,companies_house_company_officers.start_index,companies_house_company_officers.total_results,companies_house_company_officers.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,Commercial Director,director,,35,officer-list,/company/SC005364/officers,23,0,33,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,,,,35,officer-list,/company/09797821/officers,4,0,8,
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,Private Equity Executive,director,2018-04-01,35,officer-list,/company/09631707/officers,3,0,11,
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,secretary,1997-07-01,35,officer-list,/company/SC036374/officers,18,0,26,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,Master Baker,director,2016-10-14,35,officer-list,/company/08802727/officers,6,0,14,


# Add Returned Company Name And Address To Input File

In [80]:
# Mapping county to region(state) ISO code
county_code= pd.read_excel('https://demyst-apis.s3.amazonaws.com/demos/county_code.xlsx')
code = {}
for i in range(len(county_code)):
    code[county_code['county'][i].lower().strip()] = county_code['parent'][i]
    
def find_code(x):
    if x != x:
        return ''
    if type(x) is str and x.lower() in code.keys():
        return code[x.lower()]
    else:
        return ''

In [81]:
inputs['business_name'] = company_basic['companies_house_company_profile.company_name']
inputs['country'] = 'UK'
inputs['name'] = inputs['business_name']
inputs['search_term'] = inputs['business_name']
inputs['street'] = company_basic['companies_house_company_profile.registered_office_address.address_line_1']
inputs['city'] = company_basic['companies_house_company_profile.registered_office_address.locality']
inputs['post_code'] = company_basic['companies_house_company_profile.registered_office_address.postal_code']
inputs['state'] = company_basic['companies_house_company_profile.registered_office_address.locality'].apply(lambda x: find_code(x))
inputs.fillna('', inplace = True)

In [82]:
inputs.head()

Unnamed: 0,company_number,client_id,business_name,country,name,search_term,street,city,post_code,state
0,SC005364,0,Aberdeen Football Club Plc,UK,Aberdeen Football Club Plc,Aberdeen Football Club Plc,Pittodrie Stadium,Aberdeen,AB24 5QH,SCT
1,09797821,1,Freetrade Limited,UK,Freetrade Limited,Freetrade Limited,Second Home,London,E1 5JL,ENG
2,09631707,2,Erm Worldwide Group Limited,UK,Erm Worldwide Group Limited,Erm Worldwide Group Limited,2nd Floor Exchequer Court,London,EC3A 8AA,ENG
3,SC036374,3,The Edrington Group Limited,UK,The Edrington Group Limited,The Edrington Group Limited,100 Queen Street,Glasgow,G1 3DN,SCT
4,08802727,4,Bako North Western (group) Limited,UK,Bako North Western (group) Limited,Bako North Western (group) Limited,74 Roman Way Industrial Estate,Preston,PR2 5BE,


# Execute First Stage Of Appends - Vendors Which Use Company Name And Address As Input

In [52]:
providers = [
             'duedil_find_company',
             'acuris_business_search', 
             'globaldatabase_find_companies',
             'opencorporates_companies_search',
             'matchdeck_search',
             'orb_search'
             ]

In [53]:
first_results = analytics.enrich_and_download(providers, inputs, validate=False)
first_results.head()

Verifying providers...
Starting enrichment...
Uploading data...


This enrichment will use 0.95 credits of the 998063049 credits your organization currently has.


Enrich Job ID: 6170


IntProgress(value=1, max=2)

Label(value='Checking status...')

Unnamed: 0,inputs.business_name,inputs.city,inputs.company_number,inputs.country,inputs.name,inputs.post_code,inputs.search_term,inputs.state,inputs.street,duedil_find_company.row_id,...,orb_search.results[9].orb_num,orb_search.results[9].parent_name,orb_search.results[9].parent_orb_num,orb_search.results[9].state,orb_search.results[9].subsidiaries_count,orb_search.results[9].ultimate_parent_name,orb_search.results[9].ultimate_parent_orb_num,orb_search.results[9].zip,orb_search.results_count,orb_search.error
0,Aberdeen Football Club Plc,Aberdeen,SC005364,UK,Aberdeen Football Club Plc,AB24 5QH,Aberdeen Football Club Plc,SCT,Pittodrie Stadium,0,...,,,,,,,,,0,
1,Freetrade Limited,London,09797821,UK,Freetrade Limited,E1 5JL,Freetrade Limited,ENG,Second Home,1,...,96116834.0,,,ENG,0.0,,,E14 5AP,17,
2,Erm Worldwide Group Limited,London,09631707,UK,Erm Worldwide Group Limited,EC3A 8AA,Erm Worldwide Group Limited,ENG,2nd Floor Exchequer Court,2,...,96116834.0,,,ENG,0.0,,,E14 5AP,17,
3,The Edrington Group Limited,Glasgow,SC036374,UK,The Edrington Group Limited,G1 3DN,The Edrington Group Limited,SCT,100 Queen Street,3,...,,,,,,,,,0,
4,Bako North Western (group) Limited,Preston,08802727,UK,Bako North Western (group) Limited,PR2 5BE,Bako North Western (group) Limited,,74 Roman Way Industrial Estate,4,...,14301100.0,,,Lancashire,0.0,,,PR2 8UR,15365,


#### Merge the returned results against unique client ID

In [62]:
results = pd.merge(company_basic, first_results, how = 'left',left_on = 'client_id', right_on = 'duedil_find_company.client_id')
results.head()

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,orb_search.results[9].orb_num,orb_search.results[9].parent_name,orb_search.results[9].parent_orb_num,orb_search.results[9].state,orb_search.results[9].subsidiaries_count,orb_search.results[9].ultimate_parent_name,orb_search.results[9].ultimate_parent_orb_num,orb_search.results[9].zip,orb_search.results_count,orb_search.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,,,,,,,,,0,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,96116834.0,,,ENG,0.0,,,E14 5AP,17,
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,96116834.0,,,ENG,0.0,,,E14 5AP,17,
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,,,,,,,,0,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,14301100.0,,,Lancashire,0.0,,,PR2 8UR,15365,


# Execute Second Stage Of Appends - Sources Which Require More Inputs

#### Create Input for Pipl

In [57]:
#Create PIPL input to enable matching against unique client ID
pipl_input = inputs[['client_id','country']].copy()
pipl_input['street'] = company_basic['companies_house_company_officers.items[0].address.address_line_1']
pipl_input['city'] = company_basic['companies_house_company_officers.items[0].address.locality']
pipl_input['post_code'] = company_basic['companies_house_company_officers.items[0].address.postal_code']
pipl_input['state'] = company_basic['companies_house_company_officers.items[0].address.locality'].apply(lambda x:find_code(x))
pipl_input['first_name'] = company_basic['companies_house_company_officers.items[0].name'].apply(lambda x: x.split()[0] if x==x else '')
pipl_input['last_name'] = company_basic['companies_house_company_officers.items[0].name'].apply(lambda x: x.split()[-1] if x==x else '')
pipl_input.fillna('', inplace = True)
pipl_input.head()

Unnamed: 0,client_id,country,street,city,post_code,state,first_name,last_name
0,0,UK,Pittodrie Stadium,Aberdeen,AB24 5QH,SCT,Roy,Johnston
1,1,UK,68 Hanbury Street,London,E1 5JL,ENG,Daniel,Tedman
2,2,UK,Exchequer Court,London,EC3A 8AA,ENG,Roy,Burrows
3,3,UK,Queen Street,Glasgow,G1 3DN,SCT,Martin,Cooke
4,4,UK,Roman Way Industrial Estate,Preston,PR2 5BE,,Kirti,Hirani


#### Run Pipl and Merge Results

In [58]:
pipl_results = analytics.enrich_and_download(['pipl_premium_search'], pipl_input[0:1], validate=False)
results = pd.merge(results, pipl_results, how = 'left',left_on = 'client_id', right_on = 'pipl_premium_search.client_id')

Verifying providers...
Starting enrichment...
Uploading data...


This enrichment will use 5.5 credits of the 998063049 credits your organization currently has.


Enrich Job ID: 6171


IntProgress(value=1, max=2)

Label(value='Checking status...')

In [64]:
results.head()

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,pipl_premium_search.query.names[0].last_seen,pipl_premium_search.query.names[0].middle_name,pipl_premium_search.query.names[0].prefix,pipl_premium_search.query.names[0].suffix,pipl_premium_search.query.names[0].type,pipl_premium_search.query.names[0].valid_since,pipl_premium_search.query.search_pointer,pipl_premium_search.search_id,pipl_premium_search.visible_sources,pipl_premium_search.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,,,,,,,,1905302101092615289220499064332010274,0.0,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,,,,,,,,,,
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,,,,,,,,,
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,,,,,,,,,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,,,,,,,,,


#### Run DueDil and Merge Results

In [60]:
try:
    duedil_input = results[results['duedil_find_company.companies[0].company_id'].isna() == False][['duedil_find_company.client_id','duedil_find_company.companies[0].company_id']]
    duedil_input.rename(columns = {'duedil_find_company.client_id':'client_id',
                               'duedil_find_company.companies[0].company_id':'id'}, inplace = True)
    duedil_results = analytics.enrich_and_download(['duedil_company_vitals',
                                      'duedil_company_charges',
                                      'duedil_company_related_companies',
                                      'duedil_company_group_parents', 
                                      'duedil_company_group_subsidiaries', 
                                      'duedil_company_officers'], duedil_input, validate=False)
    results = pd.merge(results, duedil_results, how = 'left',left_on = 'client_id', right_on = 'duedil_company_vitals.client_id')
except:
    print('No company found for Duedil')

Verifying providers...
Starting enrichment...
Uploading data...


This enrichment will use 0.3 credits of the 998063044 credits your organization currently has.


Enrich Job ID: 6172


IntProgress(value=1, max=2)

Label(value='Checking status...')

In [66]:
results.head()

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,duedil_company_officers.officers[9].person.honorific,duedil_company_officers.officers[9].person.last_name,duedil_company_officers.officers[9].person.middle_name,duedil_company_officers.officers[9].person.nationalities[0].country_code,duedil_company_officers.officers[9].person.nationalities[0].demonym,duedil_company_officers.officers[9].type,duedil_company_officers.pagination.limit,duedil_company_officers.pagination.offset,duedil_company_officers.pagination.total,duedil_company_officers.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,Mr,Yule,Kynoch,GB,British,person,10,0,30,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,,,,,,,10,0,8,
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,Mr,Doherty,,US,American,person,10,0,11,
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,Mr,Macphail,Alexander Rose,GB,British,person,10,0,24,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,Mr,Taylor,Anthony,GB,British,person,10,0,14,


#### Run GlobalDatabase And Merge Results

In [67]:
try:
    globaldatabase_input = results[results['globaldatabase_find_companies.companies[0].id'].isna() == False][['globaldatabase_find_companies.client_id','globaldatabase_find_companies.companies[0].id']]
    globaldatabase_input.rename(columns = {'globaldatabase_find_companies.client_id':'client_id',
                               'globaldatabase_find_companies.companies[0].id':'id'}, inplace = True)
    globaldatabase_results = analytics.enrich_and_download(['globaldatabase_company_details'], globaldatabase_input, validate=False)
    results = pd.merge(results, globaldatabase_results, how = 'left',left_on = 'client_id', right_on = 'globaldatabase_company_details.client_id')
except:
    print('No company found for global database')

Verifying providers...
Starting enrichment...
Uploading data...


This enrichment will use 0.05 credits of the 998063044 credits your organization currently has.


Enrich Job ID: 6173


IntProgress(value=1, max=2)

Label(value='Checking status...')

In [68]:
results.head()

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,globaldatabase_company_details.location,globaldatabase_company_details.phone,globaldatabase_company_details.post_code,globaldatabase_company_details.region,globaldatabase_company_details.registration_number,globaldatabase_company_details.size,globaldatabase_company_details.street,globaldatabase_company_details.vat_number,globaldatabase_company_details.website,globaldatabase_company_details.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,PITTODRIE STREET,441224650400.0,AB24 5QH,Aberdeen,SC005364,,Pittodrie Stadium,,http://afc.co.uk,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,,,,,,,,,,"type: insufficient_input , message: Inputs are..."
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,ST MARY AXE,,EC3A 8AA,London,09631707,,2nd Floor Exchequer Court,,,
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,GLASGOW,,G1 3DN,,SC036374,,100 Queen Street,,http://edringtongroup.com,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,LONGRIDGE ROAD,448438164618.0,PR2 5BE,Preston,08802727,,74 Roman Way Industrial Estate,,http://callbakolondon.com,


#### Run Owler And Merge Results

In [70]:
try:
    owler_input = results[results['globaldatabase_company_details.website'].isna() == False][['globaldatabase_company_details.client_id','globaldatabase_company_details.website']]
    owler_input.rename(columns = {'globaldatabase_company_details.client_id':'client_id',
                               'globaldatabase_company_details.website':'url'}, inplace = True)
    owler_results = analytics.enrich_and_download(['owler_company_premium'], owler_input, validate=False)
    results = pd.merge(results, owler_results, how = 'left',left_on = 'client_id', right_on = 'owler_company_premium.client_id')
except:
    print('No company found for Owler')

Verifying providers...
Starting enrichment...
Uploading data...


This enrichment will use 20.5 credits of the 998063044 credits your organization currently has.


Enrich Job ID: 6174


IntProgress(value=1, max=2)

Label(value='Checking status...')

In [71]:
results.head()

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,owler_company_premium.perm_id,owler_company_premium.profile_url,owler_company_premium.revenue,owler_company_premium.short_name,owler_company_premium.stock.exchange,owler_company_premium.stock.ticker,owler_company_premium.twitter_link,owler_company_premium.website,owler_company_premium.youtube_link,owler_company_premium.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,https://permid.org/1-4295897304,https://www.owler.com/iaApp/2298926/aberdeen-f...,,Aberdeen Football Club,,,http://twitter.com/AberdeenFC,http://www.afc.co.uk,,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,,,,,,,,,,"type: insufficient_input , message: Inputs are..."
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,,,,,,,,,"type: insufficient_input , message: Inputs are..."
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,https://permid.org/1-4296446788,https://www.owler.com/iaApp/280154/edringtongr...,24876417.0,Edringtongroup,,,,http://www.edringtongroup.com/,,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,https://www.owler.com/iaApp/1836466/callbakolo...,,Callbakolondon,,,,http://callbakolondon.com,,


#### Run Matchdeck And Merge Results

In [72]:
try:
    matchdeck_input = results[results['matchdeck_search.id'].isna() == False][['matchdeck_search.client_id','matchdeck_search.id']]
    matchdeck_input.rename(columns = {'matchdeck_search.client_id':'client_id',
                               'matchdeck_search.id':'id'}, inplace = True)
    matchdeck_results = analytics.enrich_and_download(['matchdeck_details'], matchdeck_input, validate=False)
    results = pd.merge(results, matchdeck_results, how = 'left',left_on = 'client_id', right_on = 'matchdeck_details.client_id')
except:
    print('No company found for matchdeck')

No company found for matchdeck


In [73]:
results.head()

Unnamed: 0,company_number,companies_house_company_profile.row_id,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,...,owler_company_premium.perm_id,owler_company_premium.profile_url,owler_company_premium.revenue,owler_company_premium.short_name,owler_company_premium.stock.exchange,owler_company_premium.stock.ticker,owler_company_premium.twitter_link,owler_company_premium.website,owler_company_premium.youtube_link,owler_company_premium.error
0,SC005364,0,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,...,https://permid.org/1-4295897304,https://www.owler.com/iaApp/2298926/aberdeen-f...,,Aberdeen Football Club,,,http://twitter.com/AberdeenFC,http://www.afc.co.uk,,
1,09797821,1,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,...,,,,,,,,,,"type: insufficient_input , message: Inputs are..."
2,09631707,2,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,,,,,,,,,"type: insufficient_input , message: Inputs are..."
3,SC036374,3,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,https://permid.org/1-4296446788,https://www.owler.com/iaApp/280154/edringtongr...,24876417.0,Edringtongroup,,,,http://www.edringtongroup.com/,,
4,08802727,4,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,...,,https://www.owler.com/iaApp/1836466/callbakolo...,,Callbakolondon,,,,http://callbakolondon.com,,


# Clean Output For Analysis - Remove Empty Columns, Errors, Row ID and Input Columns

In [74]:
results.drop(columns = [column for column in results.columns if 'error' in column or 'inputs' in column or 'row_id' in column or ('client_id' in column and column != 'client_id')], inplace = True)
results.replace([''], [None], inplace =True)
results.dropna(axis = 1, thresh = 1, inplace = True)

# Display Number of Columns Returned for Each Vendor

In [76]:
output_columns = sorted([column.split('.')[0] for column in results.columns if 'client_id' != column and 'company_number' != column])
vendor_count = Counter(output_columns)
for vendor in all_providers:
    if vendor not in vendor_count.keys():
        vendor_count[vendor] = 0
vendor_count

Counter({'acuris_business_search': 14213,
         'companies_house_company_officers': 506,
         'companies_house_company_profile': 44,
         'duedil_company_charges': 86,
         'duedil_company_group_parents': 6,
         'duedil_company_group_subsidiaries': 27,
         'duedil_company_officers': 245,
         'duedil_company_related_companies': 116,
         'duedil_company_vitals': 34,
         'duedil_find_company': 75,
         'globaldatabase_company_details': 12,
         'globaldatabase_find_companies': 5,
         'matchdeck_search': 2,
         'opencorporates_companies_search': 829,
         'orb_search': 140,
         'owler_company_premium': 19,
         'pipl_premium_search': 12,
         'matchdeck_details': 0})

# Display Final Results and Save to Local Folder

In [77]:
results.head()

Unnamed: 0,company_number,client_id,companies_house_company_profile.accounts.accounting_reference_date.day,companies_house_company_profile.accounts.accounting_reference_date.month,companies_house_company_profile.accounts.last_accounts.made_up_to,companies_house_company_profile.accounts.last_accounts.period_end_on,companies_house_company_profile.accounts.last_accounts.period_start_on,companies_house_company_profile.accounts.last_accounts.type,companies_house_company_profile.accounts.next_accounts.due_on,companies_house_company_profile.accounts.next_accounts.overdue,...,owler_company_premium.industries[0].industry,owler_company_premium.is_hit,owler_company_premium.logo_url,owler_company_premium.name,owler_company_premium.perm_id,owler_company_premium.profile_url,owler_company_premium.revenue,owler_company_premium.short_name,owler_company_premium.twitter_link,owler_company_premium.website
0,SC005364,0,30,6,2018-06-30,2018-06-30,2017-07-01,group,2019-12-31,False,...,"Travel, Recreation & Leisure",True,https://s3.amazonaws.com/owler-image/logo/aber...,Aberdeen Football Club Plc,https://permid.org/1-4295897304,https://www.owler.com/iaApp/2298926/aberdeen-f...,,Aberdeen Football Club,http://twitter.com/AberdeenFC,http://www.afc.co.uk
1,09797821,1,30,9,2017-09-30,2017-09-30,2016-10-01,micro-entity,2019-06-30,False,...,,,,,,,,,,
2,09631707,2,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,False,...,,,,,,,,,,
3,SC036374,3,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,False,...,"Travel, Recreation & Leisure",True,https://s3.amazonaws.com/owler-image/logo/edri...,The Edrington Group,https://permid.org/1-4296446788,https://www.owler.com/iaApp/280154/edringtongr...,24876417.0,Edringtongroup,,http://www.edringtongroup.com/
4,08802727,4,31,3,2018-03-31,2018-03-31,2017-04-01,group,2019-12-31,False,...,,True,https://s3.amazonaws.com/owler-image/logo/bako...,Bako South Eastern Wholesaler,,https://www.owler.com/iaApp/1836466/callbakolo...,,Callbakolondon,,http://callbakolondon.com


In [None]:
results.to_csv('', index = False)