# Wikidata - OECD TG - Endpoint - Material

https://web-archive.oecd.org/2016-11-07/361917-dossiers-and-endpoints-testing-programme-manufactured-nanomaterials.htm used to find the NMs tested for each endpoint


https://www.oecd.org/ehs/templates/harmonised-templates.htm used to find the TGs associated to each endpoint

In [28]:
def remove_lr_whitespace(text):
    text = str(text).lstrip()
    return text

In [74]:
import pandas as pd
import numpy as np
import regex as re
import json

## Endpoints and tested materials table
this one we can download directly

In [30]:
endpoint_material = pd.read_excel('oecd/endpoint_material.xlsx').applymap(remove_lr_whitespace)

endpoint_material.sample()

  endpoint_material = pd.read_excel('oecd/endpoint_material.xlsx').applymap(remove_lr_whitespace)


Unnamed: 0,ENDPOINTS,Fullerenes (C60),Single-walled carbon nanotubes (SWCNTs),Multi-walled carbon nanotubes (MWCNTs),Silver nanoparticles,Titanium dioxide NM105,Titanium dioxide NM100,Titanium dioxide NM101,Titanium dioxide NM102,Titanium dioxide NM103,...,Silicon dioxide NM200,Silicon dioxide NM201,Silicon dioxide NM202,Silicon dioxide NM203,Silicon dioxide NM204,Silicon dioxide JP AIST data on SiO2 UFP-80 and NanoTek,Dendrimers,Nanoclays,Gold nanoparticles,TOTALS
36,4.6 Vapour pressure,1.0,,,1.0,1.0,,1.0,,1.0,...,,,,,,,,,,6.0


In [31]:
endpoint_material = endpoint_material.drop(columns=["TOTALS"], errors="ignore")

Which materials?

In [32]:
endpoint_material.columns

Index(['ENDPOINTS', 'Fullerenes (C60)',
       'Single-walled carbon nanotubes (SWCNTs)',
       'Multi-walled carbon nanotubes (MWCNTs)', 'Silver nanoparticles',
       'Titanium dioxide NM105', 'Titanium dioxide NM100',
       'Titanium dioxide NM101', 'Titanium dioxide NM102',
       'Titanium dioxide NM103', 'Titanium dioxide NM104', 'Cerium oxide',
       'Zinc oxide', 'Silicon dioxide NM200 ', 'Silicon dioxide NM201 ',
       'Silicon dioxide NM202 ', 'Silicon dioxide NM203 ',
       'Silicon dioxide NM204 ',
       'Silicon dioxide JP AIST data on SiO2 UFP-80 and NanoTek',
       'Dendrimers ', 'Nanoclays ', 'Gold nanoparticles '],
      dtype='object')

In [33]:
endpoint_material['ENDPOINTS']

0                                1. GENERAL INFORMATION 
1                                     1.1 Identification
2                                       1.2 Composition 
3                                       1.3 Identifiers 
4                            1.4 Analytical information 
                             ...                        
183    14.3.8 Measures to protect humans, animals and...
184          14.3.9 Packaging (12.7 in Annex III of BPR)
185                       14.3.10 Summary and evaluation
186                                               TOTALS
187                                          GRAND TOTAL
Name: ENDPOINTS, Length: 188, dtype: object

Need cleanup

In [34]:
endpoint_material['ENDPOINTS'] = endpoint_material['ENDPOINTS'].str.replace(r'\d+', '', regex=True)
endpoint_material['ENDPOINTS'] = endpoint_material['ENDPOINTS'].str.replace(r'\.', '', regex=True)
endpoint_material['ENDPOINTS']

0                                   GENERAL INFORMATION 
1                                         Identification
2                                           Composition 
3                                           Identifiers 
4                                Analytical information 
                             ...                        
183     Measures to protect humans, animals and the e...
184                     Packaging ( in Annex III of BPR)
185                               Summary and evaluation
186                                               TOTALS
187                                          GRAND TOTAL
Name: ENDPOINTS, Length: 188, dtype: object

In [35]:
endpoint_material.sample(3)

Unnamed: 0,ENDPOINTS,Fullerenes (C60),Single-walled carbon nanotubes (SWCNTs),Multi-walled carbon nanotubes (MWCNTs),Silver nanoparticles,Titanium dioxide NM105,Titanium dioxide NM100,Titanium dioxide NM101,Titanium dioxide NM102,Titanium dioxide NM103,...,Zinc oxide,Silicon dioxide NM200,Silicon dioxide NM201,Silicon dioxide NM202,Silicon dioxide NM203,Silicon dioxide NM204,Silicon dioxide JP AIST data on SiO2 UFP-80 and NanoTek,Dendrimers,Nanoclays,Gold nanoparticles
172,INFORMATION REQUIREMENTS,,,,,,,,,,...,,,,,,,,,,
140,Toxicity to reproduction: other studies,,,,,,,,,,...,,,,,,,,,,
159,Metabolism in rotational crops,,,,,,,,,,...,,,,,,,,,,


## Endpoints and TGs
Retrieved from navigating https://www.oecd.org/ehs/templates/harmonised-templates.htm

Physchem templates: https://www.oecd.org/ehs/templates/harmonised-templates-physico-chemical-properties.htm
![screenshot_physchem_oecd](physchem.png)

The corresponding OECD Test Guidelines are listed in the 5th row

In [36]:
import numpy as np
endpoints_tg = pd.read_csv('oecd/endpoint_guidelines.csv', sep='\t').applymap(remove_lr_whitespace)
endpoints_tg.sample(1)

  endpoints_tg = pd.read_csv('oecd/endpoint_guidelines.csv', sep='\t').applymap(remove_lr_whitespace)


Unnamed: 0,Endpoint,Last update,Corresponding OECD Test Guideline(s)
74,Metabolism of residues in livestock,Jul. 2023,TG503


Match columns endpoints_tg['Template'] and endpoint_material['ENDPOINT']

In [37]:
endpoints_tg.columns

Index(['Endpoint', 'Last update', 'Corresponding OECD Test Guideline(s)'], dtype='object')

In [78]:
seen = []
materials = list(endpoint_material.columns)[1:]
tgs_endpoints_materials = {i: {mat: [] for mat in materials} for i in list(endpoints_tg['Endpoint'])}

for i, rowi in endpoints_tg.iterrows():
    tgs = rowi['Corresponding OECD Test Guideline(s)']
    if tgs != 'none':
        if ',' in tgs:
            tgs = tgs.split(", ")  # Splitting the string correctly
        else:
            tgs = [tgs]
        endpoint_i = rowi['Endpoint']
        if endpoint_i not in seen:
            seen.append(endpoint_i)
            for j, rowj in endpoint_material.iterrows():
                endpoint_j = rowj['ENDPOINTS']
                pattern = re.compile(endpoint_i, re.IGNORECASE)  # No need for parentheses here
                if pattern.search(endpoint_j):
                    tested_mats = [mat for mat in materials if not pd.isnull(rowj[mat])]  # Checking for NaN values
                    for mat in tested_mats:
                        for tg in tgs:
                            tgs_endpoints_materials[endpoint_i][mat].append(tg)  # Using add() to add to a set

print(tgs_endpoints_materials)

{'Water solubility': {'Fullerenes (C60)': ['TG105'], 'Single-walled carbon nanotubes (SWCNTs)': ['TG105'], 'Multi-walled carbon nanotubes (MWCNTs)': ['TG105'], 'Silver nanoparticles': ['TG105'], 'Titanium dioxide NM105': ['TG105'], 'Titanium dioxide NM100': ['TG105'], 'Titanium dioxide NM101': ['TG105'], 'Titanium dioxide NM102': ['TG105'], 'Titanium dioxide NM103': ['TG105'], 'Titanium dioxide NM104': ['TG105'], 'Cerium oxide': ['TG105'], 'Zinc oxide': ['TG105'], 'Silicon dioxide NM200 ': ['TG105'], 'Silicon dioxide NM201 ': ['TG105'], 'Silicon dioxide NM202 ': ['TG105'], 'Silicon dioxide NM203 ': ['TG105'], 'Silicon dioxide NM204 ': ['TG105'], 'Silicon dioxide JP AIST data on SiO2 UFP-80 and NanoTek': ['TG105'], 'Dendrimers ': ['TG105'], 'Nanoclays ': ['TG105'], 'Gold nanoparticles ': ['TG105']}, 'Viscosity': {'Fullerenes (C60)': ['TG114'], 'Single-walled carbon nanotubes (SWCNTs)': ['TG114'], 'Multi-walled carbon nanotubes (MWCNTs)': ['TG114'], 'Silver nanoparticles': ['TG114'], 'Ti

In [79]:
file_path = "data_tg_material_endpoint.json"

# Write the dictionary to the JSON file
with open(file_path, "w") as json_file:
    json.dump(tgs_endpoints_materials, json_file, indent=2)

In [81]:
pd.read_json(file_path)

Unnamed: 0,Water solubility,Viscosity,Vapour pressure,Use and exposure information:Widespread use by professional workers,Use and exposure information:Uses at industrial sites,Use and exposure information:Service life,Use and exposure information:Manufacture,Use and exposure information:Formulating or re-packing,Use and exposure information:Consumer uses,Toxicity to terrestrial plants,...,Adsorption/desorption,Additional toxicological information,Additional physico-chemical information,Additional information onenvironmentalfateand behaviour,Additional information on residuechemistry,Additional ecotoxicological information,Acute toxicity: other routes,Acute toxicity: oral,Acute toxicity: inhalation,Acute toxicity: dermal
Fullerenes (C60),[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Single-walled carbon nanotubes (SWCNTs),[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Multi-walled carbon nanotubes (MWCNTs),[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Silver nanoparticles,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Titanium dioxide NM105,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Titanium dioxide NM100,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Titanium dioxide NM101,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Titanium dioxide NM102,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Titanium dioxide NM103,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
Titanium dioxide NM104,[TG105],[TG114],[TG104],[],[],[],[],[],[],"[TG208, TG227]",...,[],[],[],[],[],[],[],"[TG401, TG420, TG423,TG425]","[TG403, TG433, TG436]",[TG402]
