# Create Table.
- ### This file creates a table with density of states and lattice structure based on the materials.

In [None]:
import numpy as np
import pandas as pd
import warnings
# filter warnings messages from the notebook
warnings.filterwarnings('ignore')

### Read in material dataset and take only relevant entries.

In [25]:
pearson_data = pd.read_csv('NewAllPearson.txt', sep='\t', encoding = "ISO-8859-1")

In [10]:
def sep_mag_distance(nearest_spacing):
    Mag_atom = nearest_spacing.apply(lambda x: x.split('-')[0])
    Near_Distance = nearest_spacing.apply(lambda x: x.split(':')[1])
    return Mag_atom, Near_Distance

pearson_data = pd.read_csv('NewAllPearson.txt', sep='\t', encoding = "ISO-8859-1")
Mag_atom, Near_Distance = sep_mag_distance(pearson_data['Nearest spacing'])
pearson_distance = pd.DataFrame({'Formula' : pearson_data.Formula, 'Mag_atom': Mag_atom, 'Near_Distance': Near_Distance, 'SGR': pearson_data['SGR no. (std.)']})
pearson_distance.head(5)


Unnamed: 0,Formula,Mag_atom,Near_Distance,SGR
0,Ag0.5Al0.5Cr2S4,Cr,3.559221983102483,227
1,AgAl0.45F6Fe0.55Rb,Fe,3.6949999999999994,62
2,Ag0.66Al3.2Fe9.8La6,Fe,2.444147898893569,140
3,AgAl0.12Fe2.88[OH]6[SO4]2,Fe,3.6698999999999984,166
4,AgxAl0.9Mn1.1,Mn,2.661765955150828,123


### Read in another material dataset with $T_{Curie}$.
### Ccalculate average and std of $T_{Curie}$ for the same material.

In [26]:
def Tc_filter(tc_data):
    Tc_avg = []
    Tc_std = []
    for Curie_T_string in tc_data.iloc[:,1]:
        Curie_Ts = []
        if pd.isna(Curie_T_string) is False:
            Curie_Ts = [ float(T) for T in Curie_T_string.split(' ;') if T !='']
            
        if(len(Curie_Ts)!= 0 and np.std(Curie_Ts) < 0.1* np.average(Curie_Ts)):
            Tc_avg.append(np.average(Curie_Ts))
            Tc_std.append(np.std(Curie_Ts))
        else:
            Tc_avg.append(np.nan)
            Tc_std.append(np.nan)
    return [Tc_avg, Tc_std]

tc_data = pd.read_csv('Curie_Neel.txt', sep='\t', encoding = "ISO-8859-1")
tc_data['Tc_avg'], tc_data['Tc_std']  = Tc_filter(tc_data)
tc_data = tc_data[['ChemicalFormula','Tc_avg', 'Tc_std']]
tc_data = tc_data.rename(columns = {"ChemicalFormula": "Formula"})
tc_data = tc_data.dropna()
tc_data.reset_index(drop=True, inplace=True)
tc_data.head(5)

Unnamed: 0,Formula,Tc_avg,Tc_std
0,Al0.4Fe1.6Y,320.0,0.0
1,Al0.6Fe1.4Y,72.0,0.0
2,Al0.8Fe1.2Ti,73.0,0.0
3,Al0.8Fe1.2Y,49.0,0.0
4,Al10Fe2Tb,17.6,0.0


### Merge the two tables.

In [15]:
table = pd.merge(pearson_distance, tc_data, on = 'Formula')
table.head(5)

Unnamed: 0,Formula,Mag_atom,Near_Distance,SGR,Tc_avg,Tc_std
0,AlAu2Mn,Mn,4.495784914784067,225,216.5,16.5
1,AlAu2Mn,Mn,4.497199128346442,225,216.5,16.5
2,AlAu2Mn,Mn,4.495077808002882,225,216.5,16.5
3,AlB2Fe2,Fe,2.685837244036948,65,288.0,13.735599
4,AlB2Fe2,Fe,2.7211215947079803,65,288.0,13.735599


### Scrape DOS and structure from Materials Project website and put them in the table.

In [15]:
from pymatgen import MPRester, Composition
from tqdm import tqdm

mpr = MPRester(api_key = 'HiiwBuKvSNiOROq9In')
material_id = []
structure = []
DOS = []

def find_SGR(MP_IDs):
    SGR_dict = {}
    for MP_ID in MP_IDs:
        struct = mpr.get_structure_by_material_id(MP_ID)
        SGR = struct.get_space_group_info()[1]
        SGR_dict.update( {SGR: [MP_ID, struct]} )
    return SGR_dict

for i in tqdm(range(len(table["Formula"]))):
    formula = table["Formula"][i]
    reduced_formula = Composition(formula).get_integer_formula_and_factor()[0]
    MP_IDs = mpr.get_materials_ids(reduced_formula)
    if(len(MP_IDs) >= 1):
        SGR_dict = find_SGR(MP_IDs)
        if table['SGR'][i] in SGR_dict:
            MP_ID = SGR_dict[table['SGR'][i]][0]
            structure.append(SGR_dict[table['SGR'][i]][1])
        else:
            index = np.random.randint(0, len(MP_IDs))
            MP_ID = MP_IDs[index]
            structure.append(mpr.get_structure_by_material_id(MP_ID))
        material_id.append(MP_ID)        
        DOS.append(mpr.get_dos_by_material_id(MP_ID))
    else:
        material_id.append(np.nan)
        structure.append(np.nan)
        DOS.append(np.nan)

100%|██████████| 5659/5659 [2:34:11<00:00,  1.55s/it]  


### Save to pickle file.

In [16]:
# Save structure, DOS, TC, and material id to pickle file
table['material_id'] = material_id
table['structure'] = structure
table['DOS']  =  DOS
table.to_pickle('material_2.pkl')