In [1]:
# Install the required packages
try:
    import google.colab

    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    !pip install "smact[featurisers] @ git+https://github.com/WMD-group/SMACT.git@master" --quiet

In [3]:
"""
This module imports necessary libraries and modules for generating and analyzing
quaternary oxide compositions using SMACT and machine learning techniques.

"""
# Standard library imports
import multiprocessing
from itertools import combinations, product

# Third-party imports
import pandas as pd

"""
from matminer.featurizers import composition as cf
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers.conversions import StrToComposition
from pymatgen.core import Composition

"""

# Local imports
import smact
from smact import screening
from smact import Element

"""
Imported modules:
- multiprocessing: For parallel processing capabilities
- itertools: For generating combinations and products
- pandas: For data manipulation and analysis
- matminer: For materials data mining and feature extraction
- pymatgen: For materials analysis
- smact: For structure prediction and analysis of new materials
"""

'\nImported modules:\n- multiprocessing: For parallel processing capabilities\n- itertools: For generating combinations and products\n- pandas: For data manipulation and analysis\n- matminer: For materials data mining and feature extraction\n- pymatgen: For materials analysis\n- smact: For structure prediction and analysis of new materials\n'

Step 1: Come up with a list of acceptable elements by removing unwanted ones (radioactive materials, lanthanides and actinides and elements with no known oxidation states)

Step 2: Generate all the combinations that these elements can form in threes

In [5]:
# Define the elements we are interested in
all_el = smact.element_dictionary()
symbol_list = [k for k in all_el.keys()]
no_ox = []

# List of elements to exclude
# Excluding all lanthanides and actindes, as well as all radioactive elements
do_not_want = [
    "Ac",
    "Th",
    "Pa",
    "U",
    "Np",
    "Pu",
    "Am",
    "Cm",
    "Bk",
    "Cf",
    "Es",
    "Fm",
    "Md",
    "No",
    "Lr",
    "La",
    "Ce",
    "Pr",
    "Nd",
    "Pm",
    "Sm",
    "Eu",
    "Gd",
    "Tb",
    "Dy",
    "Ho",
    "Er",
    "Tm",
    "Yb",
    "Lu",
    "Tc",
    "Fr",
    "Ra",
    "Rf",
    "Db",
    "Sg",
    "Bh",
    "Hs",
    "Mt",
    "Ds",
    "Rg",
    "Cn",
    "Uut",
    "Fl",
    "Uup",
    "Lv",
    "Uus",
    "UUo",
    "Po",
    "At",
    "Rn",
    "H"
]


# Create a list of elements we want to use
good_elements = [all_el[x] for x in symbol_list if x not in do_not_want]


for i in good_elements:
    if i.oxidation_states == []:
        good_elements.remove(i)
        no_ox.append(i)
no_ge = list(good_elements)
#print(no_ox[2].symbol)

# Generate all possible combinations of 3 elements from good_elements
all_el_combos = combinations(good_elements, 3)
no_comb = list(all_el_combos)




In [9]:
# Number of combinations of three elements
print(len(no_comb))

# Number of valid elements
print(len(no_ge))

39711
63


Step 3: Using pauling test and neutral ratio to obtain possible oxidation states and stoichiometries formed by these elements

In [12]:

def step_3(w):
    
    #print(cl)
    count = 0
    f_r= []
    c_2 = 0
    stoich_to_check = [2,1,6]

    # Iterates through the list of all the combinations of 3 elements and extracts their
    # oxidation states and electronnegativity values. 
    results = []
    fail = []

    a = w[0]
    b = w[1]
    c = w[2]
    
    x = a.oxidation_states
    y = b.oxidation_states
    z = c.oxidation_states
    
    x_e = a.pauling_eneg
    y_e = b.pauling_eneg
    z_e = c.pauling_eneg
    
    #print(x,y,z)
    #print(x_e,y_e,z_e)

    # Electronegativity values are used to check if ion formation makes sense
    # i.e. The element with a lower electronegativity should have a more positive
    # oxidation state.
    
    for i in x:
        for j in y:
            for k in z:
                count = count+1
                if screening.pauling_test([i,j,k],[x_e,y_e,z_e]):
                    results.append((i,j,k))
                else:
                    fail.append((i,j,k))

    #print(len(results))
    #print(len(fail))

    # Combinations which passed the pauling test are put through the neutral ratio
    # function to check that there are possible stoichiometry combinations. 
    for i in results:
        c_2 = c_2+1
        if smact.neutral_ratios(i,threshold=6)[0]:
            for j in smact.neutral_ratios(i,threshold=6)[1]:
                f_r.append([(a.symbol,b.symbol,c.symbol),i,j])
    

    return f_r,count,c_2




In [13]:
import time

start = time.process_time()
start_2 = time.time()

# Running step 3 with multiprocessing
def run_multiprocessing(no_comb):
    with multiprocessing.Pool(6) as p:
        results = p.map(step_3, no_comb)  # Process the first two combinations
    
    # Aggregating the results from all processes
    all_f_r = []
    total_count = 0
    total_c_2 = 0
    for res in results:
        all_f_r.extend(res[0])  # Collecting all valid compounds
        total_count += res[1]  # Summing the counts of Pauling tests
        total_c_2 += res[2]    # Summing the count of Ratio tests run

    ptsr = round(100*total_c_2/total_count)
    rtsr = round(100*len(all_f_r)/total_c_2)
    osr = round(100*len(all_f_r)/total_count)
    
    # Print the results after aggregation
    print("Starting with", len(no_ge), "valid elements, we generated", len(no_comb),
          "valid combinations of 3 elements which yielded the following results.")
    print("Number of Pauling tests run:", total_count)
    print("Number of neutral_ratio tests run:", total_c_2)
    print("Number of valid compounds:", len(all_f_r))
    print("Symbol, Oxidation state, stoichiometry")
    for i in range(5):
        print(all_f_r[i])  # Print the first 5 results
    return all_f_r
# Call the function with the necessary input
all_f_r = run_multiprocessing(no_comb)

end = time.process_time()
end_2 = time.time()

print(f"CPU Time: {end - start} seconds")
print(f"Elapsed Time: {end_2 - start_2} seconds")

start = time.process_time()
start_2 = time.time()

import csv
file_path = 'output_3.csv'

# Writing to CSV
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write each row (tuple in this case) to the CSV
    for row in all_f_r:
        writer.writerow(row)

print(f"Data saved to {file_path}")

end = time.process_time()
end_2 = time.time()

print(f"CPU Time: {end - start} seconds")
print(f"Elapsed Time: {end_2 - start_2} seconds")

Starting with 63 valid elements, we generated 39711 valid combinations of 3 elements which yielded the following results.
Number of Pauling tests run: 3546977
Number of neutral_ratio tests run: 2626076
Number of valid compounds: 3062481
Symbol, Oxidation state, stoichiometry
[('Li', 'Be', 'B'), (1, 2, -3), (1, 1, 1)]
[('Li', 'Be', 'B'), (1, 2, -3), (1, 4, 3)]
[('Li', 'Be', 'B'), (1, 2, -3), (2, 5, 4)]
[('Li', 'Be', 'B'), (1, 2, -3), (3, 6, 5)]
[('Li', 'Be', 'B'), (1, 2, -3), (4, 1, 2)]
CPU Time: 7.978283842 seconds
Elapsed Time: 61.78952169418335 seconds
Data saved to output_3.csv
CPU Time: 5.336790414999999 seconds
Elapsed Time: 5.336146354675293 seconds


Step 4: Check stochiometry matches [2,1,6]

In [14]:
# Checks if the combination has the correct stoichiometry

import time

start = time.process_time()
start_2 = time.time()

check = [1,2,6]
per = 0
per_list = []

for i in all_f_r:
    if all(num in i[2] for num in check):
        per_list.append(i)
        per = per+1

print("There are",per,"combinations with the 2,6,1 stoichiometry")
print(per_list[:3])

file_path = 'perovskite.csv'

# Writing to CSV
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the header row
    writer.writerow(["Element Combination", "Oxidation States", "Stoichiometry"])  

    # Write each row (tuple in this case) to the CSV
    for row in per_list:
        writer.writerow(row)

print(f"Data saved to {file_path}")

end = time.process_time()
end_2 = time.time()


print(f"CPU Time: {end - start} seconds")
print(f"Elapsed Time: {end_2 - start_2} seconds")

There are 105100 combinations with the 2,6,1 stoichiometry
[[('Li', 'Be', 'C'), (1, 2, -4), (6, 1, 2)], [('Li', 'Be', 'N'), (1, 2, -4), (6, 1, 2)], [('Li', 'Be', 'Si'), (1, 2, -4), (6, 1, 2)]]
Data saved to perovskite.csv
CPU Time: 2.0523475240000018 seconds
Elapsed Time: 2.0500810146331787 seconds


Step 5: Check that there are 2 cations and 1 anion

In [15]:
# Checks if the combinations have 2 cations and 1 anion

an_check = []

for i in per_list: # Each combination, i = combination
    an = 0
    for j in i[1]: # Checks each oxidation state, j = ox state
        if j < 0:
            an+=1
    if an == 1:
        an_check.append(i)

print("There are",len(an_check),"combinations with the 2 cations and 1 anion.")
print(an_check[:3])
#print(len(an_check))

file_path = 'Anion_check.csv'

# Writing to CSV
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Element Combination", "Oxidation States", "Stoichiometry"])  

    # Write each row (tuple in this case) to the CSV
    for row in an_check:
        writer.writerow(row)

print(f"Data saved to {file_path}")
        

There are 87469 combinations with the 2 cations and 1 anion.
[[('Li', 'Be', 'C'), (1, 2, -4), (6, 1, 2)], [('Li', 'Be', 'N'), (1, 2, -4), (6, 1, 2)], [('Li', 'Be', 'Si'), (1, 2, -4), (6, 1, 2)]]
Data saved to Anion_check.csv


In [16]:
# Check if anion has a stoichiometry of 6 

import math
an_stoich_check = []

for i in an_check:
    for j in range(len(i[1])): # j is position of oxidation states or stoichiometry
        if i[1][j]<0 and i[2][j]==6:
            an_stoich_check.append(i)

print(an_stoich_check[:3])
print(len(an_stoich_check))

file_path = 'Anion and stoich check.csv'

# Writing to CSV
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Element Combination", "Oxidation States", "Stoichiometry"])  
    
    # Write each row (tuple in this case) to the CSV
    for row in an_stoich_check:
        writer.writerow(row)

print(f"Data saved to {file_path}")

[[('Li', 'B', 'Si'), (1, -1, 4), (2, 6, 1)], [('Li', 'B', 'Ti'), (1, -1, 4), (2, 6, 1)], [('Li', 'B', 'V'), (1, -1, 4), (2, 6, 1)]]
80317
Data saved to Anion and stoich check.csv


In [17]:
# Check if they have a reasonable oxidation state

ox_1 = [1,4,-1]
ox_2 = [2,2,-1]
ox_3 = [4,4,-2]

an_ox_check = []

for i in an_stoich_check:
    if all(num in i[1] for num in ox_1) or all(num in i[1] for num in ox_2) or all(num in i[1] for num in ox_3):
        an_ox_check.append(i)

print("There are",len(an_ox_check),"combinations with reasonable oxidation states.")
print(an_ox_check[:3])

file_path = 'Anion and stoich check 2.csv'

# Writing to CSV
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Element Combination", "Oxidation States", "Stoichiometry"])  
    
    # Write each row (tuple in this case) to the CSV
    for row in an_ox_check:
        writer.writerow(row)

print(f"Data saved to {file_path}")

There are 66709 combinations with reasonable oxidation states.
[[('Li', 'B', 'Si'), (1, -1, 4), (2, 6, 1)], [('Li', 'B', 'Ti'), (1, -1, 4), (2, 6, 1)], [('Li', 'B', 'V'), (1, -1, 4), (2, 6, 1)]]
Data saved to Anion and stoich check 2.csv
