In [12]:
from os import environ
from citrination_client import CitrinationClient
from citrination_client import *
from pypif import pif
import csv

In [17]:
client = CitrinationClient(environ['CITRINATION_API_KEY'], 'https://citrination.com')
dataset_id = '151803'

In [18]:
def parse_prop_and_temp(search_result, prop_name):
    rows = []
    pif_records = [x.system for x in search_result.hits]
    for system in pif_records:
        if "x" not in system.chemical_formula and "." not in system.chemical_formula:
            for prop in system.properties:
                if prop.name == prop_name:
                    for cond in prop.conditions:
                        if cond.name == "Temperature":
                            if len(prop.scalars) == len(cond.scalars):
                                for prop_sca, cond_sca in zip(prop.scalars, cond.scalars):
                                    if prop_sca.value and cond_sca.value:
                                        if cond_sca.value.isdigit():
                                            try:
                                                float(prop_sca.value)
                                                row = [system.chemical_formula, float(prop_sca.value), cond_sca.value]
                                                rows.append(row)
                                            except ValueError as e:
                                                print(e)


    with open(prop_name+'.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Formula', prop_name, "Temperature"])
        writer.writerows(rows)

In [19]:
prop_names = ["Fracture Toughness", "Density", "Flexural Strength", "Elastic Modulus"]

for prop_name in prop_names:
    value_query = FieldQuery(extract_as=prop_name, extract_all=True)
    property_query = PropertyQuery(name=FieldQuery(filter=[Filter(equal=prop_name)]), value=value_query)
    formula_query = ChemicalFieldQuery(extract_as="formula")
    system_query = PifSystemQuery(chemical_formula=formula_query, properties=property_query)
    dataset_query = DatasetQuery(id=[Filter(equal=dataset_id)])
    data_query = DataQuery(dataset=dataset_query, system=system_query)
    pif_query = PifSystemReturningQuery(size=5000, random_results=True, query=data_query)

    search_result = client.search.pif_search(pif_query)

    print("We found {} records".format(len(search_result.hits)))
    print([x.extracted for x in search_result.hits[0:2]])
    parse_prop_and_temp(search_result, prop_name)
    


We found 396 records
[{'Fracture Toughness': ['2.7+-0.2', '2.7+-0.2', '2.6+-0.2', '2.5+-0.1', '2.5+-0.3', '2.4+-0.2', '2.4+-0.2', '2.4+-0.2'], 'formula': 'Si3N4'}, {'Fracture Toughness': ['9.0', '9.1', '9.0', '9.0', '11.2', '10.7', '10.2', '9.3'], 'formula': 'WC.xAl2O3.yCo'}]
could not convert string to float: '3.1 (10%)'
could not convert string to float: '3.1 (10%)'
We found 398 records
[{'formula': 'Si3N4', 'Density': ['2.67']}, {'formula': 'ZrO2', 'Density': ['3.2', '3.4', '3.5', '3.5', '3.6', '4.0', '4.8']}]
could not convert string to float: '3.16 (1%)'
We found 484 records
[{'Flexural Strength': ['436+-55'], 'formula': 'Si3N4.wAl2O3.xMo2C.ySiO2.zY2O3'}, {'Flexural Strength': ['420', '465', '375', '325', '305', '260', '515', '435'], 'formula': 'Si3N4.xAl2O3.yY2O3'}]
We found 381 records
[{'formula': 'La2Zr2O7', 'Elastic Modulus': ['175+-11']}, {'formula': '3Al2O3.2SiO2.xZrO2', 'Elastic Modulus': ['192', '180', '174', '167', '163', '157', '161', '163', '166', '170', '177', '180', 

In [41]:
from matminer.utils.conversions import str_to_composition
from matminer.featurizers.composition import ElementProperty
import pandas as pd

for prop_name in prop_names:
    df = pd.read_csv(prop_name+'.csv')
    df["composition"] = df["Formula"].transform(str_to_composition)
    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df_feat = ep_feat.featurize_dataframe(df, col_id="composition", ignore_errors=True)
    print("["+prop_name+"]", "rows in csv ==", len(df), "featurized rows =", len(df_feat))


[Fracture Toughness] rows in csv- 515 featurized rows= 515
[Density] rows in csv- 245 featurized rows= 245
[Flexural Strength] rows in csv- 886 featurized rows= 886
[Elastic Modulus] rows in csv- 1588 featurized rows= 1588
