In [1]:
from urllib.parse import urlencode
import httplib2
import json
import time
import os

# Output directory for fetched data
data_dir = "data_volume/"

if not os.path.isdir(data_dir):
    os.mkdir(data_dir)

In [2]:
# API KEYS
api_key = "O2aTxtQ6kG5hCglB32JznFNCPtOJCYt5C0ToTGkvu39ePrMV" # your key
endpoint = "https://api.mpds.io/v0/download/facet"

# API TEC SEARCH CRITERIA
# API HITS
# linear TEC, cubic: 356, 250 unique
# linear TEC, all: 1597, 791 unique
# volume TEC, all: 288, 177 unique
# volume TEC, cubic: 73, 38 unique
search = {
    "classes": "oxide",
    "props": "volume thermal expansion coefficient",
    #"lattices": "cubic"
}

# Remember which files were saved
entry_log = dict()

## DOWNLOAD PROPERTIES ##
req = httplib2.Http()
# Cycle through available pages
num_pages = 1  # Just to start
pg = 0
while pg < num_pages:
    response, content = req.request(
        uri=endpoint + '?' + urlencode({
            'q': json.dumps(search),
            'pagesize': 100,
            'dtype': 1, # PEER REVIEWED  1, MACHINE_LEARNING 2,  MPDSDataTypes.AB_INITIO 4, and ALL 7
            'page' : pg
        }),
        method='GET',
        headers={'Key': api_key}
    )
    pg += 1
    # Check for Errors in the Request
    if response.status != 200:
        # NB 400 means wrong input, 403 means authorization issue etc.
        # see https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
        raise RuntimeError("Error code %s" % response.status)
    content = json.loads(content)
    if content.get('error'): 
        raise RuntimeError(content['error'])
    
    # Sift through Data
    num_pages = content['npages']

    # Save each json
    for i in range(len(content['out'])):
        filename = content['out'][i]['sample']['material']['entry']
        entry_log[filename] = content['out'][i]['sample']['material']['phase_id']  # Log the phase ID for finding structural stuff later
        with open(data_dir+filename+".json", "w") as outfile:
            json.dump(content['out'][i], outfile)
            outfile.close()
    time.sleep(1)
    print(content['count'])

280
280
280


In [3]:
# Make list of unique materials #
unique_mat = dict()
for k in entry_log.keys():
    phase_num = entry_log[k]
    if phase_num not in unique_mat.keys():
        unique_mat[phase_num] = [k]
    else:
        unique_mat[phase_num].append(k)
print("Number of entries: ", len(entry_log))
print("Unique Materials: ", len(unique_mat))

Number of entries:  280
Unique Materials:  177


In [4]:
# List unique materials phase_ids
unique_mat

{5251: ['P1116243-9'],
 6123: ['P606711-10',
  'P606711-11',
  'P606711-12',
  'P606711-13',
  'P606711-14',
  'P606711-15',
  'P606711-16',
  'P606711-17',
  'P606711-18'],
 6133: ['P800730-5', 'P800730-6'],
 6375: ['P601465-5', 'P601465-6'],
 6476: ['P601617-4', 'P601618-4'],
 6597: ['P1706098-7', 'P1917502-1', 'P1917502-2'],
 7128: ['P1703292-4', 'P1703292-5', 'P1703292-6', 'P1703292-7'],
 7151: ['P1915982-4', 'P1915982-5'],
 7279: ['P800031-2'],
 8411: ['P1708770-13',
  'P1708770-14',
  'P1708770-15',
  'P1708770-16',
  'P1708770-17',
  'P1708770-18'],
 9013: ['P1524130-4'],
 9227: ['P800731-4', 'P800731-5'],
 9769: ['P1105079-4'],
 9770: ['P1105078-5'],
 9953: ['P1120220-4', 'P1911406-5'],
 10680: ['P1811325-7'],
 10682: ['P1308593-19', 'P1308593-20', 'P1308593-21'],
 12734: ['P1915449-2'],
 12735: ['P1915450-4'],
 12736: ['P1915448-4'],
 12924: ['P1524094-5'],
 13143: ['P1128947-9'],
 13147: ['P1128948-12'],
 13167: ['P1522355-4', 'P1800127-2'],
 13174: ['P800732-2'],
 13364: ['P

In [5]:
## FETCH STRUCTURAL FILES ##
# API TEC SEARCH CRITERIA
print("Searching for ", len(unique_mat)," structure files in MPDS...")
for phase_id in unique_mat.keys(): 
    search = { "props": "crystal structure" }
    print(phase_id)
    response, content = req.request(
        uri=endpoint + '?' + urlencode({
            'q': json.dumps(search),
            'pagesize': 1,
            'phases': phase_id,
            'dtype': 1, # PEER REVIEWED  1, MACHINE_LEARNING 2,  MPDSDataTypes.AB_INITIO 4, and ALL 7
            'fmt' : 'cif'
        }),
        method='GET',
        headers={'Key': api_key}
    )

    # Check for Errors in the Request
    if response.status != 200:
        # NB 400 means wrong input, 403 means authorization issue etc.
        # see https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
        raise RuntimeError("Error code %s" % response.status)

    # Save each cif
    # TODO: using first response, in future check for same source as data
    cif_str = content.decode("utf-8").split("end of data item")[0]
    filename = str(phase_id)+".cif"
    with open(data_dir+filename, "w") as outfile:
        outfile.write(cif_str)
    time.sleep(2)  # Avoid error code 429 TODO: get around this


Searching for  177  structure files in MPDS...
5251
6123
6133
6375
6476
6597
7128
7151
7279
8411
9013
9227
9769
9770
9953
10680
10682
12734
12735
12736
12924
13143
13147
13167
13174
13364
13577
13605
13606
13703
13884
13920
13952
13995
13996
14013
14051
14097
14652
14774
14861
15284
15904
15933
15947
15966
16103
16378
17239
17742
17756
18590
18591
18678
18679
18858
18987
18989
19423
19425
22034
26449
26506
26531
26534
26641
26652
26722
26847
26984
27058
27291
27302
27330
28085
28091
28148
30787
31133
31761
32428
32442
32568
32672
32993
33074
33181
33762
33836
34388
35258
35301
35303
37323
37999
45697
46262
46263
46265
48146
48147
48692
48693
49967
50120
50133
50163
51611
51772
51860
52994
53160
57462
57463
57464
57465
57466
57472
60109
62777
63656
67009
67011
68041
69072
69671
71625
71776
71777
71819
71825
71826
73665
74302
74310
74502
75159
75431
76923
77138
77535
77537
77538
77539
78504
78607
80614
81343
81611
81612
84107
85670
88303
94428
95425
95569
96784
96834
97590
104799
104958
