# data

> Module with some data to be use in the treatment of the data

In [None]:
#| default_exp data

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export

from pydantic import root_validator, validator, ValidationError
from typing import Dict, Any, Optional
import os

In [None]:
#| export
def get_from_dict_or_env(
    data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None
) -> str:
    """Get a value from a dictionary or an environment variable."""
    if key in data and data[key]:
        return data[key]
    else:
        return get_from_env(key, env_key, default=default)

In [None]:
#| export
def get_from_env(key: str, env_key: str, default: Optional[str] = None) -> str:
    """Get a value from a dictionary or an environment variable."""
    if env_key in os.environ and os.environ[env_key]:
        return os.environ[env_key]
    elif default is not None:
        return default
    else:
        raise ValueError(
            f"Did not find {key}, please add an environment variable"
            f" `{env_key}` which contains it, or pass"
            f"  `{key}` as a named parameter."
        )

In [None]:
#| export
SEARCH_TAGS = {
    'Affiliation': '[ad]',
    'All Fields': '[all]',
    'Article Identifier': '[aid]',
    'Author' : '[au]',
    'Author Identifier' : '[auid]',
    'EC/RN Number' : '[rn]',
    'First Author Name' : '[1au]',
    'Full Author Name' : '[fau]',
    'Full Investigator Name' : '[fir]',
    'Grant Number' : '[gr]',
    'Investigator' : '[ir]',
    'Journal' : '[ta]',
    'Last Author Name' : '[lastau]',
    'Location ID' : '[lid]',
    'MeSH Major Topic' : '[majr]',
    'MeSH Subheadings' : '[sh]',
    'MeSH Terms' : '[mh]',
    'Other Term' : '[ot]',
    'PMID' : '[pmid]',
    'Subset' : '[sb]',
    'Text Words' : '[tw]',
    'Title' : '[ti]',
    'Title/Abstract' : '[tiab]'
}


In [None]:
#| export
#keyword list

DEPARMENT = frozenset(['laboratorio', 'laboratories', 'laboratory',
    'laboratoire', 'institute', 'instituto', 'academico', 'academic', 'departamento', 'department', 'division',
    'faculty of ', 'facultad de', 'faculdade de', 'pesquisa', 'genomics center', 'research station'])

In [None]:
#| export
INSTITUTE = frozenset(['college', 'university', 'universitat', 'universite',
    'unversiteit', 'universita', 'universidad', 'universiti', 'hospital', 'hopitaux de', 'unidade de',
    "ha'pital", 'istituti', 'istituto', 'institucio', 'institut', 'medical center', ' pharma',
    'riuniti', 'clinic', ' school of medicine', 'karolinska sjukhuset',
    'national institutes of health', 'cancer center', 'bioscience institute',
    'national institute for', 'national center for ', 'national centre for',
    'unilever research', 'national cardiovascular center',
    'centro operativo', 'animal research centre', 'nutrition research center',
    'national perinatal epidemiology unit', 'tanabe seiyaku', 'animal health trust',
    'marine biological laboratory', ' medical school', ' research laboratories',
    'baxter diagnostics', 'inserm', 'sylvius laboratory', 'broad institute', ' inra', '/inra',
    'health chemical laboratory', 'genecor, inc', 'infirmary',
    'national center for health', 'john innes centre', 'chru de la timone',
    'chu de bordeaux', 'ecole nationale ', 'cape technologies',
    'national chemical laboratory', ' national laboratory', 'department of research and development',
    'academy of sciences', 'centre chirurgical de la porte', 'international centre of ',
    'lawrence berkeley laboratory', 'albert einstein college', 'gedeon richter ltd',
    ' nih', 'ufrgs', 'national research centre', ' co.', ' ltd.', ' ltd', 'inc.', 'research limited',
    'clinic college of', 'center for', 'research center', 'research centre',
    'schon klinik', 'innovaderm research', 'novartis', 'aquarium', 'foundation',
    'permanente', 'healthcare system', 'national oncology institute',
    'global research and development', 'health service', 'national primate research center',
    'faculdade de ', ' urmc', ' pllc', ' pgimer', 'center for disease control',
    'london school of ', 'ggze', 'health service executive', 'council for scientific',
    'cnrs', 'eth zurich', 'johns hopkins', 'isconova', 'barts health', 'ceinge',
    'national jewish health', 'german institute', 'iqwig', 'federal joint committee'
    'nationale contre', 'cura villa maria', 'centre de psychologie', 'centro diagnostico',
    'international reference centre', 'complesso integrato', 'health care centre',
    'idiphim', 'cytogenetic laboratory', 'fondazione', 'facebook', 'google', 'association for',
    ' llc', 'national museum', 'national research council', 'rehabilitation center',
    'rehabilitation institute', 'oncology center', 'cancer centre', 'virginia tech',
    'ciberesp', 'department of food', 'rothamsted research', 'evangelisches',
    'ziekenhuizen', 'academy of ', 'chinese national ', 'pathology associates',
    'science magnet', 'ucla ', ' ucsd', 'uc berkeley', 'uc san diego', 'trial group',
    'acdi', 'specialty center', 'agemetra', 'national research institute', 'diabetes center',
    'rothamsted research', 'affichem', 'disease association', 'ministry of health',
    'incorporation', 'medical research council', 'develogen', 'innovation campus',
    'flemish government', "centre d'etudes", 'kaist', 'epfl', ' eth', 'ecole normale',
    'ecole polytechnique', 'mental health center', 'charite centrum', 'phc affairs',
    'afmc', 'cdsr', 'chu de ', 'harvard school', 'karnavati school', 'academic centre for',
    'school of public health', 'school of sport sciences', 'medical center', 'medical centre',
    'neocodex', 'umc utrecht', 'centers for disease', 'cardiac surgery center',
    'medical city', 'wisconsin department', "doctor's data", 'drug development office',
    'research unit', 'ecogen', 'international corporation', 'tourism agency',
    'naval research laboratory', 'infection research', 'health solutions',
    'us military', 'us department', 'human genome center', 'siemens', 'swiss institute',
    'usda', 'marine science center', 'u.s. geological', 'u.s. positive', 'u.s. Department',
    'botanical center', 'municipal centre', 'municipal health', 'research council',
    'national serology', 'national sexually', "d'aragona", 'metropolitan health',
    'rosa and company', 'laboratory of oncology', 'oncology r&d', 'assessment service',
    'cancer registry', 'technology agency', 'district health', 'irccs', 'pharmexa',
    'scientific service', 'limited company', 'health authority', 'biodiversity center',
    'national park', 'corporation', 'ucl ', 'escola nacional', 'va health system',
    'agri-food', 'agrotech', 'agroforestry', 'umr micalis', 'allan rosenfield',
    'allan wilson', 'allen institute', 'ameripath', 'biotechnologies', 'anaerobe systems',
    'nhs trust'])

In [None]:
#| export
REMOVE_INSTITUE = frozenset(['pharmacology', 'college of pharmacy',
    'institute of zoology', 'institute of population', 'institute of bioinformatics',
    'institute of plant',  'section for ', 'institute of clinical medicine',
    'department of clinical'])

In [None]:
#| export
COUNTRY = (
    ('brazil', 'rio de janeiro', 'são paulo', 'porto alegre', 'brasil', 'cordeiropolis', 'florianopolis', 'fortaleza', 'sao paulo', 'belo horizonte', 'uberlandia', 'recife'),
    ('argentina', 'buenos aires', 'rosario', 'cordoba', 'la plata'),
    ('chile', 'santiago', 'concepción', 'valparaiso', 'valdivia', 'valparaíso', 'talca'),
    ('mexico', 'mexico city', 'méxico', 'monterrey', 'sonora', 'puebla'),
    ('uruguay', 'montevideo'),
    ('peru', ' lima'),
    ('colombia', 'bogota', 'medellin', 'medellín'),
    ('paraguay', 'asuncion'),
    ('italy', 'italia', 'torino', 'turin', 'portici', 'palermo'),
    ('united kingdom', 'u.k.', '\buk\b', 'uk.', 'england', ' uk', 'uk', 'liverpool', 'london', 'crumpsall', 'leicester', 'manchester', 'cardiff', 'salford', 'bradford', 'oxford', 'clwyd', 'edinburgh', 'edinburge'),
    ('united states', 'united states of america', 'u.s.a', 'u. s. a.', 'usa', 'massachusetts', 'boston', 'howard university', 'torrance', 'san francisco', 'duarte', 'los alamos', 'houston', 'united state', 'california'),
    ('germany', 'frg', 'brd', 'f.r.g.', 'deutschland', 'engelskirchen', 'berlin', 'dresden',
     'hannover', 'marburg', 'mainz', 'leipzig', 'frankfurt'),
    ('japan', 'keio University', 'jikei university', 'shiga university', 'jikei university','niigata university', 'sendai city', 'shiba', 'asahikawa', 'tokyo', 'yokohama', 'osaka', 'nagoya', 'sapporo', 'kobe', 'kyoto', 'fukuoka', 'kawasaki', 'saitama', 'hiroshima', 'sendai', 'kitakyushu', 'chiba', 'sakai', 'hamamatsu', 'niigata', 'shizuoka', 'okayama', 'asahikawa', 'yamaguchi', 'okayama', 'gunma', 'hyogo', 'kanazawa', 'fukui', 'tajimi', 'komagome', 'akita', 'suita', 'kochi', 'nara medical', 'keio', 'kobe university', 'gunma', 'gifu university', 'toranomon', 'bunkyo gakuin'),
    ('south korea', 'korea', 'seoul'),
    ('russia', 'moscow'),
    ('austria', 'linz', 'wien', ' graz', 'innsbruck'),
    ('israel', 'jerusalem', 'haifa', 'tel aviv'),
    ('norway', ' oslo'),
    ('finland', 'helsinki'),
    ('south africa', 'johannesburg'),
    ('france', 'paris', 'marseille', 'brest', 'limoges', 'toulouse'),
    ('canada', 'vancouver', 'ontario', 'ottawa', 'nova scotia', 'calgary', 'alberta'),
    ('denmark', 'copenhagen'),
    ('taiwan', 'taiwan, ', 'taipei', 'chi-mei medical'),
    ('china', 'beijing', 'pr china', 'hangzhou', 'zhejiang', 'shenyang', 'shanghai'),
    ('egypt', 'cairo'),
    ('poland', 'gdansk', 'krakow'),
    ('turkey', 'istanbul', ' ankara'),
    ('netherlands', 'utrecht', 'nijmegen', 'amsterdam', 'leiden'),
    ('belgium', 'belgique', ' namur', 'bruxelles', ' genk', 'brussels'),
    ('sweden', 'karlstad', 'uppsala', 'stockholm'),
    ('australia', ' perth', 'queensland', 'canberra', 'melbourne'),
    ('south africa', 'johannesburg', 'onderstepoort', 'pretoria'),
    ('hungary', 'ungarn'),
    ('india', 'chandigarh', 'hyderabad', 'delhi', 'calcutta', 'wardha', ' ucms', 'ludhiana', 'vellore'),
    ('ireland', 'dublin', 'belfast'),
    ('spain', ' madrid', ' toledo', ' alicante', 'zaragoza', 'saragossa', 'barcelona', 'hospital vega baja', 'valencia', 'malaga'),
    ('greece', 'athens'),
    ('new zealand', 'upper hutt', 'auckland'),
    ('saudi arabia', 'riyadh'),
    ('nigeria', 'ibadan'),
    ('croatia', 'yugoslavia', 'belgrade'),
    ('switzerland', 'basel', ' geneva'),
    ('thailand', 'bangkok'),
    ('tunisia', 'tunis', 'tunisie'),
    ('czech republic', ' praha', 'prague', 'czechoslovakia'),
    ('hungary', 'budapest'),
    ('zimbabwe', 'bulawayo'),
    ('malaysia', 'kelantan'),
    ('vietnam', 'hanoi'),
    ('hong kong', 'hong kong'),
    ('iran', 'tehran', 'shiraz', 'mashhad'),
    ('romania', 'bucharest'),
    ('congo', 'democratic republic of congo'),
    ('armenia', 'yerevan'),
    ('bosnia and herzegovina', 'bosnia', 'herzegovina'),
    ('albania', 'tirana'),
    ('tanzania', 'morogoro'),
    ('sri lanka', 'sri lanka'),
    ('cyprus', 'nicosia'),
    ('gambia', ' banjul'),
    ('lithuania', 'lietuvos'),
    ('portugal', 'lisboa'),
    ('philippines', 'manila'),
    ('kuwait', 'kuwait city'),
    ('slovenia', 'ljubljana'),
    ('jamaica', 'kingston'),
    ('united arab emirates', 'uae', 'abu dhabi'),
    ('pakistan', 'islamabad')
)

In [None]:
#| export
STATES = frozenset(['Alabama', 'Alaska', 'Arizona', 'Arkansas',
    'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida',
    'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
    'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
    'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',
    'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
    'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
    'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
    'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia',
    'Washington', 'West Virginia', 'Wisconsin', 'Wyoming', 'Washington DC',
    ' AL', ' AK', ' AZ', ' AR', ' CA', ' CO', ' CT', ' DE', ' FL', ' GA ', ' GA,', ' HI',
    ' ID', ' IL', ' IN', ' IA', ' KS', ' KY', ' LA', ' ME', ' MD', ' MA,', ' MA ', ' MI',
    ' MN', ' MS', ' MO', ' MT', ' NE', ' NV', ' NH', ' NJ', ' NM', ' NY', ' NC',
    ' ND', ' OH', ' OK', ' OR', ' PA', ' RI', ' SC', ' SD', ' TN', ' TX', ' UT',
    ' VT', ' VA', ' WA', ' WV', ' WI', ' WY', ' DC'])

In [None]:
#| export
BR_STATES = frozenset([' AC',' AL',' AP',' AM', ' BA',' DF', ' ES', ' GO', ' MA',
                       ' MT', ' MS', ' MG', ' PA', ' PB', ' PR', ' PE', ' PI', ' RJ', ' RN',
                       ' RS', ' RO', ' RR', ' SC', ' SP', ' SE', ' TO', ' CE' ])

In [None]:
#| export
# full name and abbreviation
UNIVERSITY_ABBR = (
    ('university of california los angeles', 'UCLA', 'UC Los Angeles'),
    ('university of california san diego', 'UCSD', 'UC San Diego'),
    ('california institute of technology', 'caltech'),
    ('city college of new york', 'CCNY'),
    ('carnegie mellon university', 'CMU'),
    ('georgia institute of technology', 'georgia tech'),
    ('pennsylvania state university', 'penn state'),
    ('university of massachusetts amherst', 'UMass'),
    ('city university of new york', ' cuny'),
    ('havard university school of', 'harvard school of')
)

In [None]:
#| export
# use to concat university with string
UNIVERSITY_MULTIPLE_CAMPUS = (
    ('university of california', 'berkeley', 'los angeles', 'davis', 'davis medical center',
     'irvine', 'irvine medical center', 'san francisco', 'santa barbara', 'riverside'),
    ('university of illinois urbana champaign', 'urbana champaign', 'urbana', 'champaign'),
    ('university of illinois', 'urbana champaign', 'chicago', 'springfield', 'peoria'),
    ('university of pittsburgh', 'medical center'),
    ('university of michigan', 'ann arbor', 'dearborn', 'health system', 'flint'),
    ('university of minnesota', 'duluth', 'morris', 'medical Ccnter', 'rochester', 'crookston')
)

In [None]:
#| export
COUNTRIES = {
    'United States' : {
        'Alaska': {'state_code': 'AK'},
        'Alabama': {'state_code': 'AL'},
        'Armed Forces Pacific': {'state_code': 'AP'},
        'Arkansas': {'state_code': 'AR'},
        'American Samoa': {'state_code': 'AS'},
        'Arizona': {'state_code': 'AZ'},
        'California': {'state_code': 'CA'},
        'Colorado': {'state_code': 'CO'},
        'Connecticut': {'state_code': 'CT'},
        'District of Columbia': {'state_code': 'DC'},
        'Delaware': {'state_code': 'DE'},
        'Florida': {'state_code': 'FL'},
        'Federated Micronesia': {'state_code': 'FM'},
        'Georgia': {'state_code': 'GA'},
        'Guam': {'state_code': 'GU'},
        'Hawaii': {'state_code': 'HI'},
        'Iowa': {'state_code': 'IA'},
        'Idaho': {'state_code': 'ID'},
        'Illinois': {'state_code': 'IL'},
        'Indiana': {'state_code': 'IN'},
        'Kansas': {'state_code': 'KS'},
        'Kentucky': {'state_code': 'KY'},
        'Louisiana': {'state_code': 'LA'},
        'Massachusetts': {'state_code': 'MA'},
        'Maryland': {'state_code': 'MD'},
        'Maine': {'state_code': 'ME'},
        'Marshall Islands': {'state_code': 'MH'},
        'Michigan': {'state_code': 'MI'},
        'Minnesota': {'state_code': 'MN'},
        'Missouri': {'state_code': 'MO'},
        'Nördliche Marianen': {'state_code': 'MP'},
        'Mississippi': {'state_code': 'MS'},
        'Montana': {'state_code': 'MT'},
        'North Carolina': {'state_code': 'NC'},
        'North Dakota': {'state_code': 'ND'},
        'Nebraska': {'state_code': 'NE'},
        'New Hampshire': {'state_code': 'NH'},
        'New Jersey': {'state_code': 'NJ'},
        'New Mexico': {'state_code': 'NM'},
        'Nevada': {'state_code': 'NV'},
        'New York': {'state_code': 'NY'},
        'Ohio': {'state_code': 'OH'},
        'Oklahoma': {'state_code': 'OK'},
        'Oregon': {'state_code': 'OR'},
        'Pennsylvania': {'state_code': 'PA'},
        'Puerto Rico': {'state_code': 'PR'},
        'Palau': {'state_code': 'PW'},
        'Rhode Island': {'state_code': 'RI'},
        'South Carolina': {'state_code': 'SC'},
        'South Dakota': {'state_code': 'SD'},
        'Tennessee': {'state_code': 'TN'},
        'Texas': {'state_code': 'TX'},
        'Amerikanisch-Ozeanien': {'state_code': 'UM'},
        'Utah': {'state_code': 'UT'},
        'Virginia': {'state_code': 'VA'},
        'US Virgin Islands': {'state_code': 'VI'},
        'Vermont': {'state_code': 'VT'},
        'Washington': {'state_code': 'WA'},
        'Wisconsin': {'state_code': 'WI'},
        'West Virginia': {'state_code': 'WV'},
        'Wyoming': {'state_code': 'WY'}
        },

    'Germany' : {
        'Brandenburg': {'state_code': 'BB'},
        'Berlin': {'state_code': 'BE'},
        'Baden-Württemberg': {'state_code': 'BW'},
        'Bavaria': {'state_code': 'BY'},
        'Bremen': {'state_code': 'HB'},
        'Hesse': {'state_code': 'HE'},
        'Hessen': {'state_code': 'HE'},
        'Hamburg': {'state_code': 'HH'},
        'Mecklenburg-Vorpommern': {'state_code': 'MV'},
        'Lower Saxony': {'state_code': 'NI'},
        'Schleswig-Holstein': {'state_code': 'SH'},
        'Saarland': {'state_code': 'SL'},
        'Saxony': {'state_code': 'SN'},
        'Saxony-Anhalt': {'state_code': 'ST'},
        'Thuringia': {'state_code': 'TH'},
        'North Rhine-Westphalia': {'state_code': 'NW'},
        'Rhineland-Palatinate': {'state_code': 'RP'}
        },
    'France' :{
        'Auvergne-Rhône-Alpes': {'state_code': 'Auvergne-Rhône-Alpes'},
        'Burgundy-Franche-Comté': {'state_code': 'Burgundy-Franche-Comté'},
        "Bourgogne-Franche-Comté": {"state_code": "Burgundy-Franche-Comt\u00e9"}, 
        'Brittany': {'state_code': 'Brittany'},
        'Corse': {'state_code': 'Corse'},
        'Centre-Val de Loire': {'state_code': 'Centre-Val de Loire'},
        'Grand-Est': {'state_code': 'Grand-Est'},
        "Grand Est": {"state_code": "Grand-Est"},
        'Hauts-de-France': {'state_code': 'Hauts-de-France'},
        'Île-de-France': {'state_code': 'Île-de-France'},
        'Ile-de-France': {'state_code': 'Île-de-France'},
        'Ile–de–France': {'state_code': 'Île-de-France'},
        'New Aquitaine': {'state_code': 'New Aquitaine'},
        'Nouvelle-Aquitaine': {'state_code': 'New Aquitaine'},
        'Normandy': {'state_code': 'Normandy'},
        'Occitania': {'state_code': 'Occitania'},
        'Occitanie': {'state_code': 'Occitania'},
        "Provence-Alpes-Côte d'Azur": {'state_code': "Provence-Alpes-Côte d'Azur"},
        'Pays de la Loire': {'state_code': 'Pays de la Loire'},
        'ARA': {'state_code': 'Auvergne-Rhône-Alpes'},
        'BFC': {'state_code': 'Burgundy-Franche-Comté'},
        'BRE': {'state_code': 'Brittany'},
        'COR': {'state_code': 'Corse'},
        'CVL': {'state_code': 'Centre-Val de Loire'},
        'GES': {'state_code': 'Grand-Est'},
        'HDF': {'state_code': 'Hauts-de-France'},
        'IDF': {'state_code': 'Île-de-France'},
        'NAQ': {'state_code': 'New Aquitaine'},
        'PAC': {'state_code': "Provence-Alpes-Côte d'Azur"},
        'PDL': {'state_code': 'Pays de la Loire'}
    },
    'Switzerland':{
        'Aargau': {'state_code': 'Aargau'},
        'Appenzell Innerrhoden': {'state_code': 'Appenzell Innerrhoden'},
        'Appenzell Ausserrhoden': {'state_code': 'Appenzell Ausserrhoden'},
        'Bern': {'state_code': 'Bern'},
        'Basel-Landschaft': {'state_code': 'Basel-Landschaft'},
        "Basel-Country": {"state_code": "Basel-Landschaft"}, 
        'Basel-Stadt': {'state_code': 'Basel-Stadt'},
        'Basel-City': {'state_code': 'Basel-Stadt'},
        'Basel': {'state_code': 'Basel-Stadt'},
        'Fribourg': {'state_code': 'Fribourg'},
        'Geneva': {'state_code': 'Geneva'},
        'Glarus': {'state_code': 'Glarus'},
        'Graubünden': {'state_code': 'Graubünden'},
        'Jura': {'state_code': 'Jura'},
        'Luzern': {'state_code': 'Luzern'},
        'Neuchâtel': {'state_code': 'Neuchâtel'},
        'Nidwalden': {'state_code': 'Nidwalden'},
        'Obwalden': {'state_code': 'Obwalden'},
        'St. Gallen': {'state_code': 'St. Gallen'},
        'Schaffhausen': {'state_code': 'Schaffhausen'},
        'Solothurn': {'state_code': 'Solothurn'},
        'Schwyz': {'state_code': 'Schwyz'},
        'Thurgau': {'state_code': 'Thurgau'},
        'Ticino': {'state_code': 'Ticino'},
        'Uri': {'state_code': 'Uri'},
        'Vaud': {'state_code': 'Vaud'},
        'Valais': {'state_code': 'Valais'},
        'Zug': {'state_code': 'Zug'},
        'Zürich': {'state_code': 'Zürich'},
        'Zurich': {'state_code': 'Zürich'},
        'AG': {'state_code': 'Aargau'},
        'AI': {'state_code': 'Appenzell Innerrhoden'},
        'AR': {'state_code': 'Appenzell Ausserrhoden'},
        'BE': {'state_code': 'Bern'},
        'BL': {'state_code': 'Basel-Landschaft'},
        'BS': {'state_code': 'Basel-Stadt'},
        'FR': {'state_code': 'Fribourg'},
        'GE': {'state_code': 'Geneva'},
        'GL': {'state_code': 'Glarus'},
        'GR': {'state_code': 'Graubünden'},
        'JU': {'state_code': 'Jura'},
        'LU': {'state_code': 'Luzern'},
        'NE': {'state_code': 'Neuchâtel'},
        'NW': {'state_code': 'Nidwalden'},
        'OW': {'state_code': 'Obwalden'},
        'SG': {'state_code': 'St. Gallen'},
        'SH': {'state_code': 'Schaffhausen'},
        'SO': {'state_code': 'Solothurn'},
        'SZ': {'state_code': 'Schwyz'},
        'TG': {'state_code': 'Thurgau'},
        'TI': {'state_code': 'Ticino'},
        'UR': {'state_code': 'Uri'},
        'VD': {'state_code': 'Vaud'},
        'VS': {'state_code': 'Valais'},
        'ZG': {'state_code': 'Zug'},
        'ZH': {'state_code': 'Zürich'}
    },
    'Canada':{
        'Alberta': {'state_code': 'Alberta'},
        'British Columbia': {'state_code': 'British Columbia'},
        'Manitoba': {'state_code': 'Manitoba'},
        'New Brunswick': {'state_code': 'New Brunswick'},
        'Newfoundland and Labrador': {'state_code': 'Newfoundland and Labrador'},
        'Nova Scotia': {'state_code': 'Nova Scotia'},
        'Northwest Territories': {'state_code': 'Northwest Territories'},
        'Nunavut': {'state_code': 'Nunavut'},
        'Ontario': {'state_code': 'Ontario'},
        'Prince Edward Island': {'state_code': 'Prince Edward Island'},
        'Quebec': {'state_code': 'Quebec'},
        'Saskatchewan': {'state_code': 'Saskatchewan'},
        'Yukon Territory': {'state_code': 'Yukon Territory'},
        'AB': {'state_code': 'Alberta'},
        'BC': {'state_code': 'British Columbia'},
        'MB': {'state_code': 'Manitoba'},
        'NB': {'state_code': 'New Brunswick'},
        'NL': {'state_code': 'Newfoundland and Labrador'},
        'NS': {'state_code': 'Nova Scotia'},
        'NT': {'state_code': 'Northwest Territories'},
        'NU': {'state_code': 'Nunavut'},
        'ON': {'state_code': 'Ontario'},
        'PE': {'state_code': 'Prince Edward Island'},
        'QC': {'state_code': 'Quebec'},
        'SK': {'state_code': 'Saskatchewan'},
        'YT': {'state_code': 'Yukon Territory'}
        }
}

In [None]:
#| export
def lower_countries(COUNTRIES):
    new_dict= dict()
    for k,v in COUNTRIES.items():
        new_dict.update({k.lower():{}})
        for kk, vv in v.items():
            new_dict[k.lower()].update({kk.lower():vv})
    return new_dict

def countries2list(COUNTRIES):
    COUNTRIES_L ={}
    for country in COUNTRIES.keys():
        COUNTRIES_L[country] = set([x  for k,v in COUNTRIES[country].items() for x in (k, v['state_code'])])
    return COUNTRIES_L

In [None]:
#| export
COUNTRIES = lower_countries(COUNTRIES)
COUNTRIES_L = countries2list(COUNTRIES)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()