### Packages

In [1]:
%pip install numpy 
%pip install pandas 
%pip install pycalphad 

In [None]:
import numpy as np
import pandas as pd
import itertools as itr
from pycalphad import Database, equilibrium
from pycalphad import variables as v

# here must be path for calphad version database
db = Database('')

### Metals, phases and elements

In [2]:
metals = (
        'LI','BE','NA','MG','AL','K','CA','SC','TI','V','CR','MN','FE',
        'CO','NI','CU','ZN','GA','Y','ZR','NB','MO','TC',
        'RH','PD','AG','CD','IN','SN','BA','LA','CE','PR','ND','PM','SM','EU',
        'GD','TB','DY','HO','ER','HF','TA','W','RE','OS','IR',
        'PT','AU','HG','TL','PB','BI', 'C', "SI")
main_phases = list(db.phases.keys())
main_elements = list(db.elements)

### Data processing functions

In [5]:
def binary_gb_en(t_start, t_step, w_step, main_eq, main_met, conc_met):
    '''
    Results processing function for binary system

    t_start : initial temperature
    t_step : temperature step
    w_step : concentration step
    main_eq : the results of the calculation of the Gibbs energy by conditions and elements (pycaplhad)
    main_met : main element
    conc_met : concentration element

    Returns the result as a dataframe (pandas)
    '''
    dataframe = pd.DataFrame(columns=['t','materials','conc','G','NP', 'Phases']) 
    t_temp = t_start 
    for id_t, item in enumerate(main_eq.GM[0][0]):
        temp_arr = item.values
        for ind_g, value_g in enumerate(temp_arr):
            dataframe.loc[len(dataframe)] = [
                t_temp, 
                (main_met, conc_met), 
                [w_step * (ind_g+1)], 
                value_g, 
                main_eq.NP[0][0][id_t][ind_g].values.tolist(), 
                main_eq.Phase[0][0][id_t][ind_g].values.tolist()]
        t_temp = t_temp + t_step
    return dataframe

def ternary_gb_en(t_start, t_step, main_eq, main_met, conc_met_f, conc_met_s, w_conc_f, w_conc_s):
    '''
    Results processing function for ternary system

    t_start : initial temperature
    t_step : temperature step
    main_eq : the results of the calculation of the Gibbs energy by conditions and elements (pycaplhad)
    main_met : main element
    conc_met_f : first element
    conc_met_s : second element
    w_conc_f : concentration of the first element
    w_conc_s : concentration of the second element

    Returns the result as a dataframe (pandas)
    '''
    dataframe = pd.DataFrame(columns=['t','materials','conc','G','NP', 'Phases']) 
    t_temp = t_start 
    for id_t, item in enumerate(main_eq.GM[0][0]):
        dataframe.loc[len(dataframe)] = [
            t_temp, 
            (main_met, conc_met_f, conc_met_s), 
            [w_conc_f, w_conc_s],
            item[0][0].values.tolist(), 
            main_eq.NP[0][0][id_t][0][0].values.tolist(), 
            main_eq.Phase[0][0][id_t][0][0].values.tolist()]
        t_temp = t_temp + t_step
    return dataframe

### Training data

In [None]:
# pressure
P = 101325

# mole
N = 1

# elements concentration
w_step = 0.1
w_start = 0
w_end = 1

# temperature
t_step = 40
t_start = 298
t_end = 3000

# binary - 2, ternary - 3
num_elements = 3

# Gibbs energy and phase data generation for binary systems
if num_elements == 2:
    combinations = itr.combinations(metals, 2)
    for i, j in itr.islice(combinations, 0, None): 
        try:
            elem = [i, j]
            cond = {v.X(j):(w_start, w_end, w_step), v.T:(t_start, t_end, t_step), v.P:P, v.N: N}
            main_eq = equilibrium(db, elem, main_phases, cond, output="GM")
            df_temp = binary_gb_en(t_start, t_step, w_step, main_eq, i, j)
            df_temp.to_csv('binary.csv', mode='a', index=False, header=False)
        except:
            with open('gb_errors_binary.txt', 'a') as errors:
                errors.write(str(elem))
        with open('last_mets_binary.txt', 'w') as last:
            last.write(str(elem))

# Gibbs energy and phase data generation for ternary systems
elif num_elements == 3:
    combinations = itr.combinations(metals, 3)
    accuracy = 1
    conc = np.arange(w_start, w_end, w_step)
    w = itr.product(conc, conc) 

    # array of concentrations for the second and third element in the ternary system, first like (1-s-th)
    conc_array = []
    for i, k in w:
        if i+k < 1 and i !=0 and k !=0: conc_array.append((np.round_(i, accuracy),np.round_(k, accuracy)))
    conc_array = np.asarray(conc_array)

    for i, j, k in itr.islice(combinations, 0, None):
        df_temp = pd.DataFrame(columns=['t','materials','conc','G','NP', 'Phases'])
        elem = [i, j, k]
        try:
            for item in conc_array: 
                cond = {v.X(j):(item[0]), v.X(k):(item[1]), v.T:(t_start, t_end, t_step), v.P:P, v.N: N}
                main_eq = equilibrium(db, elem, main_phases, cond, output="GM")
                df_temp = pd.concat([df_temp, ternary_gb_en(t_start, t_step, main_eq, i, j, k, item[0], item[1])])
            df_temp.to_csv('ternary.csv', mode='a', index=False, header=False)
            del df_temp
        except:
            with open('gb_errors_ternary.txt', 'a') as errors:
                errors.write(str(elem))
        with open('last_mets_ternary.txt', 'w') as last:
            last.write(str(elem))


### Validation data (randomize)

In [None]:
import random
import traceback

# pressure
P = 101325
# mole
N = 1
# elements concentration
w_step = 0.18
w_start = 0
w_end = 1
# temperature
t_step = 325
t_start = 298.15
t_end = 3000

# random choice elements
combinations = itr.combinations(metals, 2)
combinations_ind = []
for i in range(0, 30, 1):
    combinations_ind.append(random.randint(1, 1540))
combinations_ind = np.unique(combinations_ind)

print(combinations_ind)

count = 0

# Gibbs energy and phase data generation for binary systems
for i, j in itr.islice(combinations, 0, None):
    if count in combinations_ind:
        try:
            elem = [i, j]
            cond = {v.X(j):(w_start, w_end-w_step, w_step), v.T:(t_start, t_end, t_step), v.P:P, v.N: N}
            main_eq = equilibrium(db, elem, main_phases, cond, output="GM")
            df_temp = binary_gb_en(t_start, t_step, w_step, main_eq, i, j)
            df_temp.to_csv('test.csv', mode='a', index=False, header=False)
        except:
            print(traceback.format_exc())
            with open('valid_gb_errors.txt', 'a') as errors:
                errors.write(str(elem))
        with open('valid_last_mets.txt', 'w') as last:
            last.write(str(elem))
    count += 1

### Misc

In [None]:
data = pd.concat([
    pd.read_csv('test.csv', sep=',', names=['t','materials','conc','G','NP', 'Phases']), 
    pd.read_csv('test.csv', sep=',', names=['t','materials','conc','G','NP', 'Phases'])
    ])
data.describe()

In [None]:
combinations = itr.combinations(metals, 3)
for i, k, m in itr.islice(combinations, 0, None):
    print(i,k,m)