In [1]:
#Pymatgen Imports
from monty.serialization import loadfn, dumpfn
from pymatgen import Structure, Composition, Element
from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.entries.entry_tools import EntrySet
from pymatgen.util.plotting import periodic_table_heatmap

#Python Imports
from itertools import groupby
from re import sub

#Data Imports
import pandas as pd

#Plotting Imports
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
init_notebook_mode()
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
matt_structure_pairs = pd.read_json('polymorph_pairs_matt.json')
matt_structure_pairs

Unnamed: 0,task_id,e_above_hull,icsd_ids,formula,count,closest_e_above_hull,band_gap,small_gap,nelements,theoretical
0,mp-4691,0.000000,2,Ag2CO3,2,0.026723,0.4668,False,3,False
1,mp-560717,0.026723,1,Ag2CO3,2,0.026723,0.7414,False,3,False
2,mp-23485,0.000000,3,Ag2HgI4,2,0.001559,1.2206,False,3,False
3,mp-570256,0.001559,1,Ag2HgI4,2,0.001559,1.0992,False,3,False
4,mp-27966,0.000000,1,Ag2Mo2O7,2,0.000818,1.8486,False,3,False
...,...,...,...,...,...,...,...,...,...,...
3566,mp-20935,0.020340,1,ZrSnIr,2,0.020340,0.0000,True,3,False
3567,mp-4667,0.000000,2,ZrSnRh,2,0.010518,0.0000,True,3,False
3568,mp-1080815,0.010518,1,ZrSnRh,2,0.010518,0.0000,True,3,False
3569,mp-1539,0.000000,4,ZrTe,2,0.024372,0.0000,True,2,False


In [3]:
paired_scan_entries = loadfn('paired_scan_entries_2020-03-31.json')
paired_gga_entries = loadfn('paired_gga_entries_2020-03-31.json')



In [159]:
def sort_by_formula(entries_list):
    elem_dict = dict()
    for entry in entries_list:
        element = entry.structure.composition.reduced_formula
        if not element in elem_dict.keys():
            elem_dict[element] = list([entry])
        else:
            elem_dict[element].append(entry)
    formulas, entries = list(elem_dict.keys()), list(elem_dict.values())
    entries = [list(sorted(entry_list, key=lambda entry: entry.energy_per_atom)) for entry_list in entries]
    return pd.DataFrame({'Formula':formulas,'Entries':entries})

#Key: compound reduced formula (str)
#Value: list of ComputedEntries associated with the compound (not necessarily reduced formula)
scan_sorted = sort_by_formula(paired_scan_entries)
gga_sorted = sort_by_formula(paired_gga_entries)

In [160]:
# As of 4/3, there are 139 (1785 - 1646) compounds present in GGA data that are not in SCAN data
scan_keys = set(scan_sorted['Formula'])
gga_keys = set(gga_sorted['Formula'])
not_in_scan = sorted(list(gga_keys.difference(scan_keys)))
len(not_in_scan)

139

In [173]:
#Trimmed versions with only compounds that appear in SCAN and have at least 2 associated entries (down to 1391)

# scan_trimmed, gga_trimmed = dict(), dict()
# for key in scan_sorted.keys():
#     if len(scan_sorted[key]) == 2:
#         scan_trimmed[key] = sorted(scan_sorted[key], key=lambda entry: entry.energy_per_atom)
#         gga_trimmed[key] = sorted(gga_sorted[key], key=lambda entry: entry.energy_per_atom)
# compounds = sorted(list(scan_trimmed.keys()))

scan_trimmed = scan_sorted[scan_sorted['Entries'].map(len) == 2].reset_index(drop=True)
compounds = list(scan_trimmed['Formula'])
gga_trimmed = gga_sorted[gga_sorted['Formula'].map(lambda f: f in compounds)].reset_index(drop=True)

In [456]:
scan_gs = [entries[0] for entries in scan_trimmed['Entries']]
scan_unstable = [entries[1] for entries in scan_trimmed['Entries']]
gga_gs = [entries[0] for entries in gga_trimmed['Entries']]
gga_unstable = [entries[1] for entries in gga_trimmed['Entries']]

all_data = pd.DataFrame(zip(compounds, scan_gs, scan_unstable, gga_gs, gga_unstable), columns = ['Formula', 'SCAN Ground State', 'SCAN Unstable', 'GGA Ground State', 'GGA Unstable'])
all_data

Unnamed: 0,Formula,SCAN Ground State,SCAN Unstable,GGA Ground State,GGA Unstable
0,GaTe,ComputedStructureEntry 8241 - Ga6 Te6\nEnergy ...,ComputedStructureEntry 8577 - Ga4 Te4\nEnergy ...,ComputedStructureEntry mp-542812 - Ga6 Te6\nEn...,ComputedStructureEntry mp-10009 - Ga4 Te4\nEne...
1,TaS2,ComputedStructureEntry 5578 - Ta2 S4\nEnergy =...,ComputedStructureEntry 7129 - Ta1 S2\nEnergy =...,ComputedStructureEntry mp-1984 - Ta2 S4\nEnerg...,ComputedStructureEntry mp-10014 - Ta1 S2\nEner...
2,LuCuS2,ComputedStructureEntry 7135 - Lu1 Cu1 S2\nEner...,ComputedStructureEntry 8480 - Lu4 Cu4 S8\nEner...,ComputedStructureEntry mp-1001780 - Lu1 Cu1 S2...,ComputedStructureEntry mp-12457 - Lu4 Cu4 S8\n...
3,LiTiS2,ComputedStructureEntry 4496 - Li1 Ti1 S2\nEner...,ComputedStructureEntry 2322 - Li1 Ti1 S2\nEner...,ComputedStructureEntry mp-9615 - Li1 Ti1 S2\nE...,ComputedStructureEntry mp-1001784 - Li1 Ti1 S2...
4,MnGa,ComputedStructureEntry 2187 - Mn1 Ga1\nEnergy ...,ComputedStructureEntry 9511 - Mn13 Ga13\nEnerg...,ComputedStructureEntry mp-1001836 - Mn1 Ga1\nE...,ComputedStructureEntry mp-636105 - Mn13 Ga13\n...
...,...,...,...,...,...
1385,CsPrS2,ComputedStructureEntry 6452 - Cs2 Pr2 S4\nEner...,ComputedStructureEntry 6336 - Cs1 Pr1 S2\nEner...,ComputedStructureEntry mp-9037 - Cs2 Pr2 S4\nE...,ComputedStructureEntry mp-9080 - Cs1 Pr1 S2\nE...
1386,CsTbS2,ComputedStructureEntry 6441 - Cs2 Tb2 S4\nEner...,ComputedStructureEntry 7048 - Cs1 Tb1 S2\nEner...,ComputedStructureEntry mp-9085 - Cs1 Tb1 S2\nE...,ComputedStructureEntry mp-972199 - Cs2 Tb2 S4\...
1387,CsDyS2,ComputedStructureEntry 6447 - Cs2 Dy2 S4\nEner...,ComputedStructureEntry 7037 - Cs1 Dy1 S2\nEner...,ComputedStructureEntry mp-984555 - Cs2 Dy2 S4\...,ComputedStructureEntry mp-9086 - Cs1 Dy1 S2\nE...
1388,K3SbS4,ComputedStructureEntry 8032 - K6 Sb2 S8\nEnerg...,ComputedStructureEntry 6421 - K3 Sb1 S4\nEnerg...,ComputedStructureEntry mp-9781 - K6 Sb2 S8\nEn...,ComputedStructureEntry mp-9911 - K3 Sb1 S4\nEn...


In [457]:
#match: list of formulas where ground states match
#no_match: list of formulas where ground states don't match
matcher = StructureMatcher()
matching = list()
for i in range(len(all_data)):
    if matcher.fit(all_data.at[i, 'SCAN Ground State'].structure, all_data.at[i, 'GGA Ground State'].structure):
        matching.append('Yes')
    else:
        matching.append('No')
all_data['Matching'] = matching

In [458]:
'''
(energy_per_atom of the SCAN ground state minus energy_per_atom of the unstable SCAN polymorph that is also the GGA ground state )
(energy_per_atom of the unstable GGA polymorph that is also the SCAN ground state minus energy_per_atom of GGA ground state)
'''

e1, e2, e3 = list(), list(), list()
for i in range(len(all_data)):
    k1 = (all_data.at[i, 'SCAN Ground State'].energy_per_atom - all_data.at[i, 'SCAN Unstable'].energy_per_atom)
    k2 = (all_data.at[i, 'GGA Unstable'].energy_per_atom - all_data.at[i, 'GGA Ground State'].energy_per_atom)
    k3 = k1 - k2
    e1.append(k1)
    e2.append(k2)
    e3.append(k3)
all_data['E1'] = e1
all_data['E2'] = e2
all_data['Energy Difference'] = e3
all_data

Unnamed: 0,Formula,SCAN Ground State,SCAN Unstable,GGA Ground State,GGA Unstable,Matching,E1,E2,Energy Difference
0,GaTe,ComputedStructureEntry 8241 - Ga6 Te6\nEnergy ...,ComputedStructureEntry 8577 - Ga4 Te4\nEnergy ...,ComputedStructureEntry mp-542812 - Ga6 Te6\nEn...,ComputedStructureEntry mp-10009 - Ga4 Te4\nEne...,Yes,-0.003229,0.003981,-0.007210
1,TaS2,ComputedStructureEntry 5578 - Ta2 S4\nEnergy =...,ComputedStructureEntry 7129 - Ta1 S2\nEnergy =...,ComputedStructureEntry mp-1984 - Ta2 S4\nEnerg...,ComputedStructureEntry mp-10014 - Ta1 S2\nEner...,Yes,-0.000505,0.001142,-0.001647
2,LuCuS2,ComputedStructureEntry 7135 - Lu1 Cu1 S2\nEner...,ComputedStructureEntry 8480 - Lu4 Cu4 S8\nEner...,ComputedStructureEntry mp-1001780 - Lu1 Cu1 S2...,ComputedStructureEntry mp-12457 - Lu4 Cu4 S8\n...,Yes,-0.032833,0.005108,-0.037940
3,LiTiS2,ComputedStructureEntry 4496 - Li1 Ti1 S2\nEner...,ComputedStructureEntry 2322 - Li1 Ti1 S2\nEner...,ComputedStructureEntry mp-9615 - Li1 Ti1 S2\nE...,ComputedStructureEntry mp-1001784 - Li1 Ti1 S2...,Yes,-0.011940,0.021890,-0.033830
4,MnGa,ComputedStructureEntry 2187 - Mn1 Ga1\nEnergy ...,ComputedStructureEntry 9511 - Mn13 Ga13\nEnerg...,ComputedStructureEntry mp-1001836 - Mn1 Ga1\nE...,ComputedStructureEntry mp-636105 - Mn13 Ga13\n...,Yes,-0.059527,0.038018,-0.097545
...,...,...,...,...,...,...,...,...,...
1385,CsPrS2,ComputedStructureEntry 6452 - Cs2 Pr2 S4\nEner...,ComputedStructureEntry 6336 - Cs1 Pr1 S2\nEner...,ComputedStructureEntry mp-9037 - Cs2 Pr2 S4\nE...,ComputedStructureEntry mp-9080 - Cs1 Pr1 S2\nE...,Yes,-0.001781,0.001087,-0.002867
1386,CsTbS2,ComputedStructureEntry 6441 - Cs2 Tb2 S4\nEner...,ComputedStructureEntry 7048 - Cs1 Tb1 S2\nEner...,ComputedStructureEntry mp-9085 - Cs1 Tb1 S2\nE...,ComputedStructureEntry mp-972199 - Cs2 Tb2 S4\...,No,-0.002279,0.000447,-0.002726
1387,CsDyS2,ComputedStructureEntry 6447 - Cs2 Dy2 S4\nEner...,ComputedStructureEntry 7037 - Cs1 Dy1 S2\nEner...,ComputedStructureEntry mp-984555 - Cs2 Dy2 S4\...,ComputedStructureEntry mp-9086 - Cs1 Dy1 S2\nE...,Yes,-0.002305,0.001818,-0.004123
1388,K3SbS4,ComputedStructureEntry 8032 - K6 Sb2 S8\nEnerg...,ComputedStructureEntry 6421 - K3 Sb1 S4\nEnerg...,ComputedStructureEntry mp-9781 - K6 Sb2 S8\nEn...,ComputedStructureEntry mp-9911 - K3 Sb1 S4\nEn...,Yes,-0.012292,0.007862,-0.020154


In [459]:
oxide = lambda entry: any([elem == Element.O for elem in entry.structure.composition.elements])
transition = lambda entry: any([elem.is_transition_metal for elem in entry.structure.composition.elements])
# post_transition = lambda entry: any([elem.is_post_transition_metal for elem in entry.structure.composition.elements])
# alkali = lambda entry: any([elem.is_alkali for elem in entry.structure.composition.elements])
# alkaline = lambda entry: any([elem.is_alkaline for elem in entry.structure.composition.elements])
# metalloid = lambda entry: any([elem.is_metalloid for elem in entry.structure.composition.elements])
halogen = lambda entry: any([elem.is_halogen for elem in entry.structure.composition.elements])

In [460]:
labels, oxide_l, transition_l, halogen_l = list(), list(), list(), list()
yesval, noval = True, False
for i in range(len(all_data)):
    label_set = list()
    entry = all_data.at[i, 'SCAN Ground State']
    if oxide(entry):
        label_set.append('oxide')
        oxide_l.append(yesval)
    else:
        oxide_l.append(noval)
    if transition(entry):
        label_set.append('transition')
        transition_l.append(yesval)
    else:
        transition_l.append(noval)
    if halogen(entry):
        label_set.append('halogen')
        halogen_l.append(yesval)
    else:
        halogen_l.append(noval)
    labels.append(label_set)
    
all_data['Oxide?'] = oxide_l
all_data['Transition?'] = transition_l
all_data['Halogen?'] = halogen_l
all_data['Labels'] = labels

In [461]:
num_elems = list()
for i in range(len(all_data)):
    num_elems.append(len(all_data.at[i, 'SCAN Ground State'].structure.composition.elements))
all_data['Unique Elements'] = num_elems
all_data

Unnamed: 0,Formula,SCAN Ground State,SCAN Unstable,GGA Ground State,GGA Unstable,Matching,E1,E2,Energy Difference,Oxide?,Transition?,Halogen?,Labels,Unique Elements
0,GaTe,ComputedStructureEntry 8241 - Ga6 Te6\nEnergy ...,ComputedStructureEntry 8577 - Ga4 Te4\nEnergy ...,ComputedStructureEntry mp-542812 - Ga6 Te6\nEn...,ComputedStructureEntry mp-10009 - Ga4 Te4\nEne...,Yes,-0.003229,0.003981,-0.007210,False,False,False,[],2
1,TaS2,ComputedStructureEntry 5578 - Ta2 S4\nEnergy =...,ComputedStructureEntry 7129 - Ta1 S2\nEnergy =...,ComputedStructureEntry mp-1984 - Ta2 S4\nEnerg...,ComputedStructureEntry mp-10014 - Ta1 S2\nEner...,Yes,-0.000505,0.001142,-0.001647,False,True,False,[transition],2
2,LuCuS2,ComputedStructureEntry 7135 - Lu1 Cu1 S2\nEner...,ComputedStructureEntry 8480 - Lu4 Cu4 S8\nEner...,ComputedStructureEntry mp-1001780 - Lu1 Cu1 S2...,ComputedStructureEntry mp-12457 - Lu4 Cu4 S8\n...,Yes,-0.032833,0.005108,-0.037940,False,True,False,[transition],3
3,LiTiS2,ComputedStructureEntry 4496 - Li1 Ti1 S2\nEner...,ComputedStructureEntry 2322 - Li1 Ti1 S2\nEner...,ComputedStructureEntry mp-9615 - Li1 Ti1 S2\nE...,ComputedStructureEntry mp-1001784 - Li1 Ti1 S2...,Yes,-0.011940,0.021890,-0.033830,False,True,False,[transition],3
4,MnGa,ComputedStructureEntry 2187 - Mn1 Ga1\nEnergy ...,ComputedStructureEntry 9511 - Mn13 Ga13\nEnerg...,ComputedStructureEntry mp-1001836 - Mn1 Ga1\nE...,ComputedStructureEntry mp-636105 - Mn13 Ga13\n...,Yes,-0.059527,0.038018,-0.097545,False,True,False,[transition],2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,CsPrS2,ComputedStructureEntry 6452 - Cs2 Pr2 S4\nEner...,ComputedStructureEntry 6336 - Cs1 Pr1 S2\nEner...,ComputedStructureEntry mp-9037 - Cs2 Pr2 S4\nE...,ComputedStructureEntry mp-9080 - Cs1 Pr1 S2\nE...,Yes,-0.001781,0.001087,-0.002867,False,False,False,[],3
1386,CsTbS2,ComputedStructureEntry 6441 - Cs2 Tb2 S4\nEner...,ComputedStructureEntry 7048 - Cs1 Tb1 S2\nEner...,ComputedStructureEntry mp-9085 - Cs1 Tb1 S2\nE...,ComputedStructureEntry mp-972199 - Cs2 Tb2 S4\...,No,-0.002279,0.000447,-0.002726,False,False,False,[],3
1387,CsDyS2,ComputedStructureEntry 6447 - Cs2 Dy2 S4\nEner...,ComputedStructureEntry 7037 - Cs1 Dy1 S2\nEner...,ComputedStructureEntry mp-984555 - Cs2 Dy2 S4\...,ComputedStructureEntry mp-9086 - Cs1 Dy1 S2\nE...,Yes,-0.002305,0.001818,-0.004123,False,False,False,[],3
1388,K3SbS4,ComputedStructureEntry 8032 - K6 Sb2 S8\nEnerg...,ComputedStructureEntry 6421 - K3 Sb1 S4\nEnerg...,ComputedStructureEntry mp-9781 - K6 Sb2 S8\nEn...,ComputedStructureEntry mp-9911 - K3 Sb1 S4\nEn...,Yes,-0.012292,0.007862,-0.020154,False,False,False,[],3


In [467]:
#Takes a really really really long time!
#All data in json file - don't need to run anymore!

# from pymatgen.analysis.dimensionality import get_dimensionality_gorai
# dimensions = lambda entry: get_dimensionality_gorai(entry.structure)
# dims = list()
# for i in range(len(all_data)):
#     entry = all_data.at[i, 'SCAN Ground State']
#     dims.append(dimensions(entry))
# all_data['Dimensions'] = dims
# all_data

In [463]:
dimensions = pd.read_json('data.json')['Dimensions']
all_data['Dimensions'] = dimensions

In [465]:
data_copy = all_data.copy()
for i in range(len(data_copy)):
    for column in ['SCAN Ground State', 'SCAN Unstable', 'GGA Ground State', 'GGA Unstable']:
        data_copy.at[i, column] = data_copy.at[i, column].as_dict()
data_copy

Unnamed: 0,Formula,SCAN Ground State,SCAN Unstable,GGA Ground State,GGA Unstable,Matching,E1,E2,Energy Difference,Oxide?,Transition?,Halogen?,Labels,Unique Elements,Dimensions
0,GaTe,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.003229,0.003981,-0.007210,False,False,False,[],2,3
1,TaS2,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.000505,0.001142,-0.001647,False,True,False,[transition],2,2
2,LuCuS2,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.032833,0.005108,-0.037940,False,True,False,[transition],3,3
3,LiTiS2,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.011940,0.021890,-0.033830,False,True,False,[transition],3,2
4,MnGa,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.059527,0.038018,-0.097545,False,True,False,[transition],2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,CsPrS2,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.001781,0.001087,-0.002867,False,False,False,[],3,2
1386,CsTbS2,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,No,-0.002279,0.000447,-0.002726,False,False,False,[],3,2
1387,CsDyS2,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.002305,0.001818,-0.004123,False,False,False,[],3,2
1388,K3SbS4,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,{'@module': 'pymatgen.entries.computed_entries...,Yes,-0.012292,0.007862,-0.020154,False,False,False,[],3,1


In [466]:
data_copy.to_json('data.json')

In [None]:
#Below Here: Old Code!

In [None]:
matching = all_data[all_data['Matching']]
not_matching = all_data[all_data['Matching'].map(lambda x: not x)]

#Find matching ground states from scan and gga in other
#Key: formula, value: associated matching entry (0 if no match found)
matching_in_gga, matching_in_scan = dict(), dict()

for formula in no_match:
    matching_in_gga[formula], matching_in_scan[formula] = 0, 0
    for entry in gga_trimmed[formula]:
        if matcher.fit(scan_trimmed[formula][0].structure, entry.structure):
            matching_in_gga[formula] = entry
            break
    for entry in scan_trimmed[formula]:
        if matcher.fit(gga_trimmed[formula][0].structure, entry.structure):
            matching_in_scan[formula] = entry
            break

#scan ground states not in gga, gga ground states not in scan
scan_notin_gga = [formula for formula in matching_in_gga.keys() if not matching_in_gga[formula]]
gga_notin_scan = [formula for formula in matching_in_scan.keys() if not matching_in_scan[formula]]
len(scan_notin_gga), len(gga_notin_scan)

#Find all non-problematic formulas for non-matching ground states
problems = set(scan_notin_gga).union(set(gga_notin_scan))
no_problems = list(set(no_match).difference(problems))

'''
(energy_per_atom of the SCAN ground state minus energy_per_atom of the unstable SCAN polymorph that is also the GGA ground state )
(energy_per_atom of the unstable GGA polymorph that is also the SCAN ground state minus energy_per_atom of GGA ground state)
'''
scan_gs_minus_in_gga, in_scan_minus_gga_gs = list(), list()
l1, l2 = scan_gs_minus_in_gga, in_scan_minus_gga_gs
for formula in no_problems:
    l1.append(-1*(scan_trimmed[formula][0].energy_per_atom - matching_in_scan[formula].energy_per_atom))
    l2.append(matching_in_gga[formula].energy_per_atom - gga_trimmed[formula][0].energy_per_atom)
    
l3 = dict()
for formula in match:
    l3.update({formula: scan_trimmed[formula][0].energy_per_atom - gga_trimmed[formula][0].energy_per_atom})
    
oxides, transitions, metalloids, halogens = list(), list(), list(), list()
for formula in match:
    if oxide(scan_trimmed[formula][0]):
        oxides.append(l3[formula])
    if transition(scan_trimmed[formula][0]):
        transitions.append(l3[formula])
    if metalloid(scan_trimmed[formula][0]):
        metalloids.append(l3[formula])
    if halogen(scan_trimmed[formula][0]):
        halogens.append(l3[formula])
        
is_binary = lambda entry: len(entry.structure.composition.elements) == 2
is_ternary = lambda entry: len(entry.structure.composition.elements) == 3
binaries, ternaries = list(), list()
for formula in match:
    if is_binary(scan_trimmed[formula][0]):
        binaries.append(l3[formula])
    elif is_ternary(scan_trimmed[formula][0]):
        ternaries.append(l3[formula])
        
        data = {'E1':[], 'E2':[], 'Type':[], 'Formula':[]}
for formula in no_problems:
    data['E1'].append(-1*(scan_trimmed[formula][0].energy_per_atom - matching_in_scan[formula].energy_per_atom))
    data['E2'].append(matching_in_gga[formula].energy_per_atom - gga_trimmed[formula][0].energy_per_atom)
    if oxide(scan_trimmed[formula][0]):
        data['Type'].append('Oxide')
    elif transition(scan_trimmed[formula][0]):
        data['Type'].append('Transition')
    elif halogen(scan_trimmed[formula][0]):
        data['Type'].append('Halogen')
    elif metalloid(scan_trimmed[formula][0]):
        data['Type'].append('Metalloid')
    else:
        data['Type'].append('Other')
    data['Formula'].append(formula)


df = pd.DataFrame(data, columns = ['E1', 'E2', 'Type', 'Formula']) 


fig = px.scatter(df, x='E1', y='E2', color='Type', hover_name='Formula')
fig.show()