# Get the MP-Computed XANES Spectra for our Target Metals from the Mathew data and the Website

 # Goals:
- Query the Materials Project database for spectra which correspond to MP IDs of transition metal oxides. 
- Save only those which pass sanity checks. 
- For structures which lack coordination number info, compute that exclusively for the absorbing atoms.
- Write said spectra (to be post-processed further in future steps).


In future steps, these will be associated with MP Collations.

## Inputs:
MP_{}\_{}\_{}API_collations.json: File containing the structures + associated Bader charges based on MP ID and Structure matching in XAS_collation form, serialized as dictionaries using the as_dict() method built into MSONables. Produced by PP-A.

## Outputs:
MP_{}\_{}\_scraped\_spectra+coord.json: File containing the spectra scraped from the database which satisfy sanity checks, decorated with coordination number on the absorbing atoms.

In [None]:
import os
from pymatgen.ext.matproj import MPRester
data_prefix = '/Users/steventorrisi/Documents/TRIXS/data/'
storage_directory = os.path.join(data_prefix,'MP_OQMD_combined')

target_elements_groups=[('Co','O'),('Fe','O'),('V','O'),('Cu','O'),
                        ('Ni','O'),('Cr','O'),('Mn','O'),('Ti','O')]
target_metals = set(['Co','Ni','Fe','Cr','V','Mn','Cu','Ti'])
mpr = MPRester('80n2gkFfpXbPxZJTxD')

## Import necessary items

In [None]:
from pymatgen.core import Structure
from trixs.spectra.core import XAS_Spectrum, spectrum_sanity_check
from tqdm import tqdm_notebook
from pprint import pprint
import json
import numpy as np
from monty.json import MSONable
import gc
import matplotlib.pyplot as plt
from pymatgen.analysis.local_env import CrystalNN



#  Load Pre-computed MP IDs

Loop through the MP structures obtained from the written output of an earlier scrape from the materials project API.
For each of those structures, index the structures as XANES collation objects.

In [None]:
mp_ids = {pair:[] for pair in target_elements_groups}
for pair in tqdm_notebook(target_elements_groups):
    file_name = '{}_{}_MP_API_collations.json'.format(pair[0],pair[1])
    read_target = os.path.join(storage_directory,file_name)
    with open(read_target,'r') as f:
        for line in f.readlines():
            mp_ids[pair].append(json.loads(line)['mp_id'])
    print("Loaded in {} for {}".format(len(mp_ids[pair]),pair))

## Scrape spectra by quering via the MP Rester using the MP IDs.
Check to see that absorbing atoms have coordination number; if not, decorate them with CNN
(ETA: 2.5 minutes per 1000 materials)

In [None]:
cnn = CrystalNN(distance_cutoffs = None)
num_scraped = {pair:0 for pair in target_elements_groups}
for pair in target_elements_groups:
    write_target = 'MP_OQMD_combined/{}_{}_scraped_spectra+coord.json'.format(pair[0],pair[1])
    scrape_path = os.path.join(data_prefix,write_target)
    target_metal = pair[0]

    with open(scrape_path,'w') as f:
        for mp_id in tqdm_notebook(mp_ids[pair], desc="Downloading {}".format(pair)):
            doc = mpr.get_data(mp_id,data_type='feff',prop='xas')[0]
            if len(doc['xas'])==0:
                continue
                
            for xas in doc['xas']:
                abs_idx = xas['absorbing_atom']
                struc= xas['structure'].as_dict()
                N = len(xas['structure'])
                elements = [str(struc['sites'][i]['label']) for i in range(N)]
                spec = xas['spectrum']
                
                if len(spec) == 0 \
                    or xas.get('xas_id','')=='ELNES' \
                    or (len(spec) == 2 and len(spec[0]) == 0) \
                    or elements[abs_idx] != target_metal:
                    continue
                
                
                
                if not struc['sites'][abs_idx]['properties'].get('coordination_no',False):
                    cur_coord = cnn.get_cn(xas['structure'],abs_idx)
                    xas['structure'].sites[abs_idx].properties['coordination_no'] = cur_coord

                metadata = {'id':mp_id, 'origin':'scrape'}
                new_spec = XAS_Spectrum.from_atomate_document(xas)
                # Catch negative values
                if not new_spec.sanity_check():
                    continue
                new_spec.metadata = metadata
                f.write(json.dumps(new_spec.as_dict())+'\n')
                num_scraped[pair] +=1
                #print('ding')
    
    print(pair,num_scraped[pair])