In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import os

import pandas as pd

from bankreg import BankReg

from sources.banktrack.banktrack import Banktrack
from sources.bocc.bocc import BOCC
from sources.gabv.gabv import Gabv
from sources.fairfinance.fairfinance import Fairfinance
from sources.switchit.switchit import Switchit
from sources.marketforces.marketforces import Marketforces
from sources.custombank.custombank import Custombank
from sources.wikidata.wikidata import Wikidata
from sources.usnic.usnic import USNIC

from airtableutils import BankGreenAirtable

In [13]:
%%time

BankReg.__instance__ = None
bankreg = BankReg()

Banktrack.load_and_create(bankreg, load_from_api=True)
print('Banktrack Added. New Length: ' + str(len(bankreg.reg)))

BOCC.load_and_create(bankreg)
print('BOCC Added. New Length: ' + str(len(bankreg.reg)))

Gabv.load_and_create(bankreg)
print('GABV and B-Impact Added. New Length: ' + str(len(bankreg.reg)))

Fairfinance.load_and_create(bankreg)
print('Fairfinance Added. New Length: ' + str(len(bankreg.reg)))

Switchit.load_and_create(bankreg)
print('Switchit Added. New Length: ' + str(len(bankreg.reg)))

Marketforces.load_and_create(bankreg)
print('Marketforces Added. New Length: ' + str(len(bankreg.reg)))

Wikidata.load_and_create(bankreg, load_from_api=True)
print('Wikidata Added. New Length: ' + str(len(bankreg.reg)))

USNIC.load_and_create(bankreg)
print('USNIC Added. New Length: ' + str(len(bankreg.reg)))

Custombank.load_and_create(bankreg, load_from_api=True)
print('Custom Data Added. New Length: ' + str(len(bankreg.reg)))

Banktrack Added. New Length: 227
BOCC Added. New Length: 227
GABV and B-Impact Added. New Length: 324
Fairfinance Added. New Length: 371
Switchit Added. New Length: 393
Marketforces Added. New Length: 495
Wikidata Added. New Length: 1768
USNIC Added. New Length: 62778
Custom Data Added. New Length: 62880
CPU times: user 51.5 s, sys: 656 ms, total: 52.2 s
Wall time: 1min


# Manual Cleanup

In [14]:
# generic triodos needs to be removed because it is duplicated in country-specific instances
bankreg.reg.pop('triodos', None)
bankreg.reg.pop('triodos_bank', None)

<triodos_bank <class 'bank.Bank'> with >

# Spot Checks

In [15]:
# there should be no duplicates in custom data. Otherwise, they will need to be edited at the following url
# at https://docs.google.com/spreadsheets/d/17UwBPNJ7kuLYnD4xhCc4c0rWWFKpVerWrGwXDVhXX84/edit#gid=302562551
from sources.source import Source, URIs
df = pd.read_csv(URIs.CUSTOM_BANK.value).fillna('')
df[df['Bank Tag'].duplicated(keep=False)]

Unnamed: 0,Preferred Bank Name,Bank Tag,Subsidiary Of Tag,Rating,Rating Reason,Country,Website,Added By,Notes,Date Added


In [16]:
# export for playing with data
# df = bankreg.return_registry_as_df(allowed_ratings=['great', 'ok', 'bad', 'worst'])
# df = bankreg.return_registry_as_df(allowed_ratings=['great', 'ok', 'bad', 'worst', 'unk'])

In [17]:
# this should have no output. If wikidata tags and other tags don't match, something is wrong
for tag, bank in bankreg.reg.items():
    if bank.wikidata and bank.wikidata.tag != tag:
        print(tag)
        print(bank.wikidata.tag)
        print('---')

In [19]:
# spot check nordea. There should be banks originating in wikidata. Some of the country-specific Nordea banks should be subsidiary_of nordea.
# country-specific nordea banks should have the same ranking as nordea. This isn't always the case, because the US data can have poor subsidiary
# information to it
df = bankreg.return_registry_as_df(allowed_ratings=['great', 'ok', 'bad', 'worst', 'unk'])
df[df.aliases.str.contains('nordea')]

Unnamed: 0,tag,name,aliases,country,data_sources,website,rating,reason,subsidiary_of,Rank - Total,...,total-GBP,total-AUD,total-CAD,permid,isin,viafid,lei,rssd,googleid,wikiid
12576,nordea_bk_norge_asa,NORDEA BK NORGE ASA,"nordea bank norge asa,nordea bk norge asa",United States,usnic,WWW.NORDEA.COM,unk,We do not have enough information to rate this...,,,...,,,,,,,,1242937,,
19335,nordea_bk_danmark_as,NORDEA BK DANMARK AS,"nordea bank danmark as,nordea bk danmark as",United States,usnic,0,unk,We do not have enough information to rate this...,,,...,,,,,,,,1838228,,
19594,nordea_inv_mgmt_north_amer,NORDEA INV MGMT NORTH AMER,"nordea inv mgmt north amer,nordea investment m...",United States,usnic,0,unk,We do not have enough information to rate this...,nordea_inv_mgmt_ab,,...,,,,,,,,1842177,,
25470,nordea_bk_abp,NORDEA BK ABP,"nordea bank abp,nordea bk abp",United States,usnic,WWW.NORDEA.COM,unk,We do not have enough information to rate this...,,,...,,,,,,,,2858830,,
28120,nordea_inv_mgmt_bk_denmark_as,NORDEA INV MGMT BK DENMARK AS,"nordea inv mgmt bk denmark as,nordea investmen...",United States,usnic,0,unk,We do not have enough information to rate this...,,,...,,,,,,,,3133758,,
28866,unibank_nordea_inv_mgmt,UNIBANK NORDEA INV MGMT,"unibank nordea inv mgmt,unibanknordea investme...",United States,usnic,0,unk,We do not have enough information to rate this...,,,...,,,,,,,,3065251,,
58188,nordea_mkts_hc,NORDEA MKTS HC,"nordea markets holding company inc,nordea mkts hc",United States,usnic,0,unk,We do not have enough information to rate this...,nordea_bk_abp,,...,,,,,,,,5493700,,
58189,nordea_sctys_llc,NORDEA SCTYS LLC,"nordea sctys llc,nordea securities llc",United States,usnic,0,unk,We do not have enough information to rate this...,,,...,,,,,,,,5493719,,
60889,nordea_asset_mgmt_hold_ab,NORDEA ASSET MGMT HOLD AB,"nordea asset management holding ab,nordea asse...",United States,usnic,0,unk,We do not have enough information to rate this...,nordea_bk_abp,,...,,,,,,,,5429905,,
60890,nordea_inv_mgmt_ab,NORDEA INV MGMT AB,"nordea inv mgmt ab,nordea investment managemen...",United States,usnic,0,unk,We do not have enough information to rate this...,nordea_asset_mgmt_hold_ab,,...,,,,,,,,5429914,,


In [8]:
# i = 0
# for tag, bank in bankreg.reg.items():
#     if i < 100:
#         print(tag)
#         print(bank.rssd)
#     i += 1

# Airtable

In [9]:
# bg_at = BankGreenAirtable(table_name="staging", local_df=bankreg.return_registry_as_df(), preservation_columns=['name', 'website', 'subsidiary_of'])

In [10]:
# bg_at.airtable_backup()

'./airtable_backups/2021.13.08 16.22.26 staging.pkl'

In [11]:
# flushed = bg_at.airtable_flush()
# print('Num Flushed: ' + str(len(flushed)))

# inserted = bg_at.airtable_insert()
# print('Num Inserted: ' + str(len(inserted)))

# updated = bg_at.airtable_update()
# print('Num Updated: ' + str(len(updated)))

Num Flushed: 0
Num Inserted: 0
Num Updated: 405
