## Metadata spreadsheet batch converter

This notebook converts metadata from a multi-tabbed spreadsheet into a json files for upload into the DDE

The tabbed sheets available in the spreadsheet are as follows:
- resource_base
- funding
- collectionSize
- related
- author
- definedTerms
- distribution

All sheets use the url field as the index/linking field

Notes:
* resource_base: contains various metadata properties and their expected values. For citation.pmid, a helper function should be used to pull the citation name based on the pmid so that it will be able to pass the schema validation
* funding: Note a single grant ID may be associated with multiple funding organizations. Convert 'type' to '@type'
* related: relationship properties and their expected objects. Convert 'type' to '@type'
* definedTerm: save only the urls to a list for the DDE

How it works: 
Every sheet except for the resource_base is converted into a dictionary where the key is the url, and the value is either an array of objects (funding, collectionSize, author) or a dictionary with additional objects (related, definedTerm)
The resource_base is converted into a base dictionary, and additional objects are added to the base dictionary using the url
The json records are then dumped into a batch_file for upload


In [1]:
import os
import pandas as pd
import json
from datetime import datetime
from Bio import Entrez
from Bio import Medline
import requests
from math import isnan

In [2]:
Entrez.email = "your email here"

In [3]:
script_path = os.getcwd()
parent_path = os.path.abspath(os.path.join(script_path, os.pardir))
data_path = os.path.join(script_path,'data')
filelist = os.listdir(data_path)
result_path = os.path.join(parent_path,'nde-metadata-corrections','metadata_for_DDE','resourceCatalogs')

In [4]:
print(parent_path)
print(filelist)

C:\Users\gtsueng\Anaconda3\envs\nde
['2024_05_14_RepoMetaCuration.xlsx', '2024_05_21_RepoMetaCuration.xlsx', '2024_06_18_RepoMetaCuration.xlsx', '~$2024_06_18_RepoMetaCuration.xlsx']


In [5]:
def clean_rawdf(df_raw):
    df = df_raw.fillna(-1)
    if 'pmid' in df.columns.values.tolist():
        df['pmid'] = df['pmid'].astype(int)
    df.rename(columns={'type':'@type'}, inplace=True)
    return df


def clean_nones(a_dict):
    for k,v in list(a_dict.items()):
        if v == -1:
            del a_dict[k]
        if v == "None":
            del a_dict[k]
        if v == None:
            del a_dict[k]
        if not isinstance(v,str) and not isinstance(v,dict) and not isinstance(v,list):
            if isnan(v):
                del a_dict[k]
    return a_dict


def clean_dict_array(dict_array):
    for eachdict in dict_array:
        eachdict = clean_nones(eachdict)
    return dict_array

### Process the resource_base sheet

In [6]:
def add_date(df):
    today = datetime.now()
    df['date'] = today.strftime("%Y-%m-%d")
    return df

def format_date(datefield):
    if isinstance(datefield,str)==True:
        cleandate = datefield
    if isinstance(datefield,datetime)==True:
        cleandate = datefield.strftime("%Y-%m-%d")
    return cleandate

def get_pmids(citation_field):
    citation_array = []
    clean_citations = []
    tmp_citations = citation_field.replace('[','').replace(']','')
    if ',' in tmp_citations:
        clean_citations.extend(tmp_citations.split(','))
    else:
        clean_citations.append(tmp_citations)
    clean_citations = list(set(clean_citations))
    for eachpmid in clean_citations:
        handle = Entrez.efetch(db="pubmed", id=eachpmid, rettype="medline", retmode="text")
        records = Medline.parse(handle) ##parses pubmed entry for that ID and records the author
        for record in records:
            titles = record.get("TI","?") #writes the record to a list called MH 
            citation_array.append({'@type':'ScholarlyArticle',
                                   'name':titles,
                                   'pmid':eachpmid})
    return citation_array

def format_language(language_field):
    language_array = []
    if '[' in language_field:
        tmp_lang = language_field.replace('[','').replace(']','')
    else:
        tmp_lang = language_field
    if ',' in tmp_lang:
        clean_lang = tmp_lang.split(',')
        for eachlang in clean_lang:
            language_array.append({'@type': 'Language','name':eachlang})
    else:
        language_array.append({'@type': 'Language','name':tmp_lang})

    return language_array

def format_usage(usage_field):
    usage_dict = {}
    usage_dict['@type'] = 'CreativeWork'
    usage_dict['name'] = 'Conditions of use'
    usage_dict['url'] = usage_field
    return usage_dict

def add_type(df):
    df['@type'] = 'nde:ResourceCatalog'
    return df

def run_quick_clean(df):
    ## fill the na's
    df = df.fillna("None")
    ## format the date fields
    dateprops = ['date','dateModified','dateCreated','datePublished']
    for eachprop in dateprops:
        if eachprop in list(df.columns.values):
            df[eachprop] = df.apply(lambda row: format_date(row[eachprop]), axis=1)
    df.drop('License type',axis=1,inplace=True)
    ## clean up the language field
    df['inLanguage'] = df.apply(lambda row: format_language(row['inLanguage']), axis=1)
    ## clean up the usage info
    df['usageInfo'] = df.apply(lambda row: format_usage(row['usageInfo']), axis=1)
    ## clean up the citation field
    df['citation pmid'] = df.apply(lambda row: get_pmids(row['citation pmid']), axis=1)
    df.drop('citation pmid', axis=1, inplace=True)
    return df

In [7]:
filepath = os.path.join(data_path,filelist[2])
df_base = pd.read_excel(filepath, 'resource_base', engine='openpyxl')
df_clean = add_type(run_quick_clean(df_base))
#print(df_samples.head(n=2))
#print(df_samples.iloc[0]['citation pmid'])
print(df_clean.head(n=2))

       name                    url             identifier alternateName  \
0  WormBase  https://wormbase.org/  https://wormbase.org/          None   
1   FlyBase   https://flybase.org/   https://flybase.org/          None   

                                          license conditionsOfAccess  \
0  https://creativecommons.org/public-domain/cc0/               Open   
1    https://creativecommons.org/licenses/by/4.0/               Open   

                                           usageInfo  \
0  {'@type': 'CreativeWork', 'name': 'Conditions ...   
1  {'@type': 'CreativeWork', 'name': 'Conditions ...   

                                            abstract  \
0  WormBase is a NIH supported biomedical reposit...   
1  FlyBase is a NIH supported basic science repos...   

                                         description  collectionType  hasAPI  \
0  WormBase is an international consortium of bio...  Knowledge base    True   
1  The aim of the FlyBase project is to provide a...  Knowl

### process the funding sheet


In [8]:
def process_single_funder(row):
    funder_object = {'@type':row['funder.@type'],
                     'name':row['funder.name'],
                     'alternateName':row['funder.alternateName'],
                     'parentOrganization':row['funder.parentOrganization']
                    }
    funder_object = clean_nones(funder_object)
    funding_object = {'@type':row['type'],
                      'identifier': row['identifier'],
                      'funder':funder_object
                     }
    return funding_object

def process_multi_funders(df_funding, multi_funder_ids):
    funder_array = []
    for eachid in multi_funder_ids:
        tmpdf = df_funding.loc[df_funding['identifier']==eachid]
        cleandf = tmpdf[['funder.@type','funder.name','funder.alternateName','funder.parentOrganization']].copy()
        cleandf.rename(columns = {'funder.@type':'@type',
                                'funder.name':'name',
                                'funder.alternateName':'alternateName',
                                'funder.parentOrganization':'parentOrganization'}, inplace=True)
        cleandf.fillna(-1,inplace=True)
        funderlist = cleandf.to_dict(orient='records')
        funderlist = clean_dict_array(funderlist)
        funder_array.append({'url':tmpdf.iloc[0]['url'],'temp':{'@type':'MonetaryGrant','identifier':eachid,'funder':funderlist}})
        funder_df = pd.DataFrame(funder_array)
    return funder_df

In [9]:
def generate_funding_dict(df_funding):
    funding_dict = {}
    funding_grouped = df_funding.groupby(['url','identifier']).size().reset_index(name='counts')
    multi_funder_ids = funding_grouped.loc[funding_grouped['counts']>1]['identifier'].unique().tolist()
    single_funder_ids = funding_grouped.loc[funding_grouped['counts']==1]['identifier'].unique().tolist()
    single_funders = df_funding.loc[df_funding['identifier'].isin(single_funder_ids)].copy()
    urllist = df_funding['url'].unique().tolist()
    single_funders['temp'] = single_funders.apply(lambda row: process_single_funder(row), axis=1)
    funder_array = process_multi_funders(df_funding, multi_funder_ids)
    for eachurl in urllist:
        funding_array = []
        ## get all single funding objects and add to array
        clean_singles = single_funders['temp'].loc[single_funders['url']==eachurl].tolist()
        ## add any multi funding objects to the array
        clean_multi= funder_array['temp'].loc[funder_array['url']==eachurl].tolist()
        clean_singles.extend(clean_multi)
        ## add the funding array to the funding_dict
        funding_dict[eachurl]=clean_singles    
    return funding_dict

In [10]:
df_funding = pd.read_excel(filepath, 'funding', engine='openpyxl')
#print(single_funders.head(n=2))
funding_dict = generate_funding_dict(df_funding)
print(funding_dict['https://flybase.org/'])

[{'@type': 'MonetaryGrant', 'identifier': 'U24HG013300', 'funder': {'@type': 'Organization', 'name': 'National Human Genome Research Institute', 'alternateName': 'NHGRI', 'parentOrganization': 'NIH'}}, {'@type': 'MonetaryGrant', 'identifier': 'MR/W024233/1', 'funder': {'@type': 'Organization', 'name': 'UK Medical Research Council', 'alternateName': 'MRC', 'parentOrganization': 'UKRI'}}, {'@type': 'MonetaryGrant', 'identifier': 2035515, 'funder': {'@type': 'Organization', 'name': 'National Science Foundation', 'alternateName': 'NSF'}}, {'@type': 'MonetaryGrant', 'identifier': 'BB/T014008/1', 'funder': {'@type': 'Organization', 'name': 'Biotechnology and Biological Sciences Research Council', 'alternateName': 'BBSRC', 'parentOrganization': 'UKRI'}}, {'@type': 'MonetaryGrant', 'identifier': 'PLM13398', 'funder': {'@type': 'Organization', 'name': 'Wellcome Trust'}}, {'@type': 'MonetaryGrant', 'identifier': 2039324, 'funder': {'@type': 'Organization', 'name': 'National Science Foundation', 

### process collection_size 

In [11]:
def create_collection_dict(df_collection):
    collection_dict = {}
    url_list = df_collection['url'].unique().tolist()
    df_collection['@type']='PropertyValue'
    for eachurl in url_list:
        tmpdf = df_collection.loc[df_collection['url']==eachurl].copy()
        tmpdf.drop('url',inplace=True,axis=1)
        tmp_array = tmpdf.to_dict(orient='records')
        collection_dict[eachurl] = clean_dict_array(tmp_array)
    return collection_dict

In [12]:
df_collection_raw = pd.read_excel(filepath, 'collectionSize', engine='openpyxl')
df_collection = clean_rawdf(df_collection_raw)
collection_dict = create_collection_dict(df_collection)
print(collection_dict['https://wormbase.org/'])

[{'minValue': 19984, 'unitText': 'Protein-coding genes', '@type': 'PropertyValue'}, {'minValue': 27668, 'unitText': 'Non-coding RNA and pseudogene', '@type': 'PropertyValue'}, {'minValue': 1523, 'unitText': 'Uncloned genes', '@type': 'PropertyValue'}, {'minValue': 28587, 'unitText': 'CDS', '@type': 'PropertyValue'}, {'minValue': 19981, 'unitText': 'protein-coding loci', '@type': 'PropertyValue'}, {'minValue': 28587, 'unitText': 'sequences', '@type': 'PropertyValue'}]


### process related

In [13]:
def handle_urls(df):
    url_df = df.loc[df['@type']=='URL'].copy()
    url_list = url_df['url'].unique().tolist()
    return url_list


def create_related_dict(df_related):
    nde_dict = {"@type": "DataCatalog", "name": "Data Discovery Engine", "url": "https://discovery.biothings.io/portal/nde"}
    related_dict = {}
    url_list = df_related['url'].unique().tolist()
    for eachurl in url_list:
        prop_dict = {}
        tmpdf = df_related.loc[df_related['url']==eachurl].copy()
        tmpdf.drop('url', inplace=True, axis = 1)
        tmpdf.rename(columns={'prop.url':'url'},inplace=True)
        proplist = tmpdf['property'].tolist()
        if 'sdPublisher' not in proplist:
            prop_dict['sdPublisher'] = nde_dict
        for eachprop in proplist:
            tmpdf2 = tmpdf.loc[tmpdf['property']==eachprop].copy()
            if 'URL' in tmpdf2['@type'].tolist():
                tmp_array = handle_urls(tmpdf2)
            else:
                tmpdf2.drop('property', inplace=True, axis=1)
                tmp_array = tmpdf2.to_dict(orient='records')
                tmp_array = clean_dict_array(tmp_array)
            if eachprop == 'sdPublisher':
                tmp_array.append(nde_dict)
            prop_dict[eachprop] = tmp_array
        related_dict[eachurl]=prop_dict
    return related_dict
        

In [14]:
df_related_raw = pd.read_excel(filepath, 'related', engine='openpyxl')
df_related = clean_rawdf(df_related_raw)
#print(df_related.head(n=2))
related_dict = create_related_dict(df_related)
print(related_dict['https://wormbase.org/'])

{'hasPart': [{'@type': 'ComputationalTool', 'name': 'Wormicloud', 'url': 'https://wormicloud.textpressolab.com/'}, {'@type': 'SoftwareSourceCode', 'name': 'Vennter', 'url': 'https://github.com/WormBase/website-public/releases/tag/WS276.1'}], 'isBasedOn': [{'@type': 'ScholarlyArticle', 'name': 'Tissue enrichment analysis for C. elegans genomics', 'url': 'https://pubmed.ncbi.nlm.nih.gov/27618863/', 'pmid': 27618863, 'doi': '10.1186/s12859-016-1229-9'}, {'@type': 'CreativeWork', 'name': 'Worm Phenotype Ontology', 'url': 'http://purl.obolibrary.org/obo/wbphenotype.owl'}, {'@type': 'SoftwareSourceCode', 'name': 'Wormbase code repositories', 'url': 'https://github.com/WormBase'}], 'isBasisFor': [{'@type': 'CreativeWork', 'name': 'Worm Phenotype Ontology', 'url': 'http://purl.obolibrary.org/obo/wbphenotype.owl'}, {'@type': 'ResourceCatalog', 'name': 'WormBase ParaSite', 'url': 'https://parasite.wormbase.org/index.html'}], 'citedBy': [{'@type': 'ScholarlyArticle', 'name': 'Worm Phenotype Ontol

### Process the author list

In [15]:
def process_orgs(df):
    org_df = df.copy()
    org_df.drop(['givenName','familyName','affiliation.name'],inplace=True,axis=1)
    org_array = org_df.to_dict(orient='records')
    org_array = clean_dict_array(org_array)
    return org_array

def process_affiliations(an_affiliation):
    if an_affiliation != -1:
        tmpdict = {'@type':'Organization', 'name':an_affiliation}
        return tmpdict
    else:
        return -1

def process_ppl(df):
    ppl_df = df.copy()
    ppl_df['affiliation'] = ppl_df.apply(lambda row: process_affiliations(row['affiliation.name']),axis=1)
    ppl_df.drop(['parentOrganization','affiliation.name'],inplace=True,axis=1)
    ppl_array = ppl_df.to_dict(orient='records')
    ppl_array = clean_dict_array(ppl_array)
    return ppl_array

In [16]:
def create_author_dict(df_author):
    author_dict = {}
    urlist = df_author['url'].unique().tolist()
    for eachurl in urlist:
        author_array = []
        tmpdf = df_author.loc[df_author['url']==eachurl].copy()
        tmpdf.drop('url',inplace=True,axis=1)
        if 'Organization' in tmpdf['@type'].tolist():
            orgdf = tmpdf.loc[tmpdf['@type']=='Organization']
            org_array = process_orgs(orgdf)
            author_array.extend(org_array)
        if 'Person' in tmpdf['@type'].tolist():
            ppldf = tmpdf.loc[tmpdf['@type']=='Person']
            ppl_array = process_ppl(ppldf)
            author_array.extend(ppl_array)
        author_dict[eachurl] = author_array
    return author_dict

In [17]:
df_author_raw = pd.read_excel(filepath, 'author', engine='openpyxl')
df_author = clean_rawdf(df_author_raw)
author_dict = create_author_dict(df_author)
print(author_dict['https://wormbase.org/'])
#print(df_author.head(n=2))

[{'@type': 'Organization', 'name': 'California Institute of Technology', 'alternateName': 'CalTech'}, {'@type': 'Organization', 'name': 'Wellcome Trust Sanger Institute ', 'alternateName': 'WTSI'}, {'@type': 'Organization', 'name': 'European Bioinformatics Institute', 'alternateName': 'EBI'}, {'@type': 'Organization', 'name': 'Ontario Institute for Cancer Research ', 'alternateName': 'OICR'}, {'@type': 'Organization', 'name': 'Washington University at St. Louis ', 'alternateName': 'WUSTL'}, {'@type': 'Organization', 'name': 'The WormBase Consortium'}, {'@type': 'Person', 'name': 'Paul Sternberg', 'givenName': 'Paul', 'familyName': 'Sternberg', 'affiliation': {'@type': 'Organization', 'name': 'California Institute of Technology'}}, {'@type': 'Person', 'name': 'Matt Berriman', 'givenName': 'Matt', 'familyName': 'Berriman', 'affiliation': {'@type': 'Organization', 'name': 'Wellcome Trust Sanger Institute '}}, {'@type': 'Person', 'name': 'Sarah Dyer', 'givenName': 'Sarah', 'familyName': 'D

### Process the definedTerms sheet

In [18]:
def generate_dt_dict(df_dt):
    dfdt_dict={}
    urlist = df_dt['url'].tolist()
    for eachurl in urlist:
        prop_dict={}
        tmpdf = df_dt.loc[df_dt['url']==eachurl].copy()
        tmpdf.drop('url',inplace=True,axis=1)
        proplist = tmpdf['property'].unique().tolist()
        for eachprop in proplist:
            prop_dict[eachprop]=tmpdf.loc[tmpdf['property']==eachprop]['prop.url'].unique().tolist()
        dfdt_dict[eachurl]=prop_dict
    return dfdt_dict

In [19]:
df_definedTerm_raw = pd.read_excel(filepath, 'definedTerms', engine='openpyxl')
df_dt = clean_rawdf(df_definedTerm_raw)
dfdt_dict = generate_dt_dict(df_dt)
#print(df_dt.head(n=2))
print(dfdt_dict['https://wormbase.org/'])

{'species': ['http://purl.obolibrary.org/obo/NCBITaxon_6239', 'http://purl.obolibrary.org/obo/NCBITaxon_135651', 'http://purl.obolibrary.org/obo/NCBITaxon_281687', 'http://purl.obolibrary.org/obo/NCBITaxon_31234', 'http://purl.obolibrary.org/obo/NCBITaxon_54126'], 'infectiousAgent': ['http://purl.obolibrary.org/obo/NCBITaxon_6279', 'http://purl.obolibrary.org/obo/NCBITaxon_6282', 'http://purl.obolibrary.org/obo/NCBITaxon_34506', 'http://purl.obolibrary.org/obo/NCBITaxon_70415'], 'topicCategory': ['http://edamontology.org/topic_0621', 'http://edamontology.org/topic_3053', 'http://edamontology.org/topic_0622'], 'keywords': ['http://purl.obolibrary.org/obo/NCIT_C48292', 'http://edamontology.org/topic_0625', 'http://edamontology.org/topic_3067'], 'healthCondition': ['http://purl.obolibrary.org/obo/MONDO_0005761', 'http://purl.obolibrary.org/obo/MONDO_0017137'], 'measurementTechnique': ['http://edamontology.org/operation_0226', 'http://purl.obolibrary.org/obo/OBI_0002628', 'http://purl.obol

### Process the distribution sheet

In [20]:
def create_distro_dict(df):
    distro_dict = {}
    df['dateModified'] = df.apply(lambda row: format_date(row['dateModified']),axis=1)
    urlist = df['url'].unique().tolist()
    for eachurl in urlist:
        tmpdf = df.loc[df['url']==eachurl].copy()
        tmpdf.drop('url',inplace=True,axis=1)
        tmp_array = tmpdf.to_dict(orient='records')
        tmp_array = clean_dict_array(tmp_array)
        distro_dict[eachurl]=tmp_array
    return distro_dict

In [21]:
df_distro_raw = pd.read_excel(filepath, 'distribution', engine='openpyxl')
df_distro = clean_rawdf(df_distro_raw)
distro_dict = create_distro_dict(df_distro)

print(distro_dict['https://wormbase.org/'])
#print(df_distro.head(n=2))

[{'@type': 'DataDownload', 'contentUrl': 'ftp.wormbase.org/pub/wormbase/releases', 'dateModified': '2024-03-29'}]


### Assemble the json records

In [22]:
def process_records(filepath,context_dict):
    batchlist = []
    df_base = pd.read_excel(filepath, 'resource_base', engine='openpyxl')
    df_clean = add_type(run_quick_clean(df_base))
    today = datetime.now()
    df_clean['date'] = today.strftime("%Y-%m-%d")
    df_funding = pd.read_excel(filepath, 'funding', engine='openpyxl')
    funding_dict = generate_funding_dict(df_funding)
    df_collection_raw = pd.read_excel(filepath, 'collectionSize', engine='openpyxl')
    df_collection = clean_rawdf(df_collection_raw)
    collection_dict = create_collection_dict(df_collection)
    df_related_raw = pd.read_excel(filepath, 'related', engine='openpyxl')
    df_related = clean_rawdf(df_related_raw)
    related_dict = create_related_dict(df_related)
    df_author_raw = pd.read_excel(filepath, 'author', engine='openpyxl')
    df_author = clean_rawdf(df_author_raw)
    author_dict = create_author_dict(df_author)
    df_definedTerm_raw = pd.read_excel(filepath, 'definedTerms', engine='openpyxl')
    df_dt = clean_rawdf(df_definedTerm_raw)
    dfdt_dict = generate_dt_dict(df_dt)
    df_distro_raw = pd.read_excel(filepath, 'distribution', engine='openpyxl')
    df_distro = clean_rawdf(df_distro_raw)
    distro_dict = create_distro_dict(df_distro)
    base_dict_array = df_clean.to_dict(orient='records')
    base_dict_array = clean_dict_array(base_dict_array)
    for eachdict in base_dict_array:
        url = eachdict['url']
        eachdict['@context'] = context_dict
        eachdict['funding'] = funding_dict[url]
        eachdict['author'] = author_dict[url]
        try:
            eachdict['collectionSize'] = collection_dict[url]
        except:
            pass
        try:
            eachdict['distribution'] = distro_dict[url]
        except:
            pass
        eachdict.update(related_dict[url])
        eachdict.update(dfdt_dict[url])
        batchlist.append(eachdict)
    return batchlist

In [23]:
context_dict = {"owl": "http://www.w3.org/2002/07/owl#",
                      "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
                      "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
                      "schema": "http://schema.org/",
                      "niaid": "https://discovery.biothings.io/view/niaid/",
                      "nde": "https://discovery.biothings.io/view/nde/"}

In [24]:
batchlist = process_records(filepath,context_dict)
today = datetime.now()
with open(os.path.join(result_path,f'{today.strftime("%Y-%m-%d")}_batch_file.json'),'w') as outfile:
    outfile.write(json.dumps(batchlist, indent=4))
print(batchlist[0]['name'])

WormBase


### test functions

In [None]:
funding_dict = {}
funding_grouped = df_funding.groupby(['url','identifier']).size().reset_index(name='counts')
multi_funder_ids = funding_grouped.loc[funding_grouped['counts']>1]['identifier'].unique().tolist()
single_funder_ids = funding_grouped.loc[funding_grouped['counts']==1]['identifier'].unique().tolist()
single_funders = df_funding.loc[df_funding['identifier'].isin(single_funder_ids)].copy()
urllist = df_funding['url'].unique().tolist()
single_funders['temp'] = single_funders.apply(lambda row: process_single_funder(row), axis=1)
funder_array = process_multi_funders(df_funding, multi_funder_ids)
for eachurl in urllist:
    funding_array = []
    ## get all single funding objects and add to array
    clean_singles = single_funders['temp'].loc[single_funders['url']==eachurl].tolist()
    ## add any multi funding objects to the array
    clean_multi= funder_array['temp'].loc[funder_array['url']==eachurl].tolist()
    clean_singles.extend(clean_multi)
    ## add the funding array to the funding_dict
    funding_dict[eachurl]=clean_singles

print(funding_dict)