In [1]:
import IPython
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sympy
import datetime

# для расчетов над tdb
from pycalphad import Database, equilibrium, variables as v, binplot

# для MCMC расчетов
import pymc as pm  # пакет для MCMC расчетов 
import arviz as az # пакет для работы с типом данных arviz
import pytensor
import pytensor.tensor as pt
# import theano
# theano.config.exception_verbosity = 'high' # должно выдавать подробное описание ошибки, но не помогает

import aesara

import seaborn as sns

# пути к tdb
cc10_path = "tdbs/CoCr-01Oik_with_new_functions.tdb"

print(f"Running on PyMC v{pm.__version__}") # 5.1.2
print(f"Running on NumPy v{np.__version__}") # 1.22.1
print(f"Running on ArviZ v{az.__version__}") # 0.12.1



Running on PyMC v5.1.2
Running on NumPy v1.22.1
Running on ArviZ v0.12.1


# Функции и данные

In [2]:
def from_xarray_to_pandas(xarray_data, phase_str):
    cr_tuple = xarray_data.X.sel(component = 'CR').data[0][0]
    phase_tuple = xarray_data.Phase.data[0][0]
    t_tuple = xarray_data.T.data
    
    # print('- t_tuple', len(t_tuple))
    
    # переписать короче
    df_res = pd.DataFrame()
    df_res['T'] = ''
    df_res['phase'] = ''
    df_res['cr_conc'] = ''
    
    df = pd.DataFrame()
    df['T'] = t_tuple
    df['phase_1'] = ''
    df['phase_2'] = ''
    df['phase_3'] = ''
    df['cr_conc_1'] = ''
    df['cr_conc_2'] = ''
    df['cr_conc_3'] = ''
    
    # можно ли как то иначе это сделать?
    for i in range(len(t_tuple)):
        df.iloc[i, df.columns.get_loc('phase_1')] = phase_tuple[i][0][0]
        df.iloc[i, df.columns.get_loc('phase_2')] = phase_tuple[i][0][1]
        df.iloc[i, df.columns.get_loc('phase_3')] = phase_tuple[i][0][2]
        df.iloc[i, df.columns.get_loc('cr_conc_1')] = cr_tuple[i][0][0]
        df.iloc[i, df.columns.get_loc('cr_conc_2')] = cr_tuple[i][0][1]
        df.iloc[i, df.columns.get_loc('cr_conc_3')] = cr_tuple[i][0][2]
        
    
    # костыль, заменить на автоматическое определение количества фаз и нужного столбца
    
    df_1 = df[(df['phase_1'] == phase_str)]
    df_1 = df_1[['phase_1', 'cr_conc_1', 'T']]
    df_1.rename(columns = {'phase_1':'phase', 'cr_conc_1':'cr_conc'}, inplace = True)
    
    # print('df_1', df_1)
    
    df_2 = df[(df['phase_2'] == phase_str)]
    df_2 = df_2[['phase_2', 'cr_conc_2', 'T']]
    df_2.rename(columns = {'phase_2':'phase', 'cr_conc_2':'cr_conc'}, inplace = True)
    
    # print('df_2', df_2)
    
    df_3 = df[(df['phase_3'] == phase_str)]
    df_3 = df_3[['phase_3', 'cr_conc_3', 'T']]
    df_3.rename(columns = {'phase_3':'phase', 'cr_conc_3':'cr_conc'}, inplace = True)
    
    # print('df_3', df_3)
    
    df_res = pd.concat([df_1, df_2, df_3])
    # df_res = df_2
    
    # print('- df_res rows', df_res.shape[0])
    
    return df_res

In [3]:
db10 = Database(cc10_path)
db10.symbols

{'ZERO': Piecewise((0.0, And(T < 6000.0, 298.15 <= T)), (0, True)),
 'UN_ASS': Piecewise((0.0, And(T < 6000.0, 298.15 <= T)), (0, True)),
 'R': Piecewise((8.31451, And(T < 6000.0, 298.15 <= T)), (0, True)),
 'GHSERCO': Piecewise((310.241 + 133.36601*T - 25.0861*T*log(T) + 72527.0*T**(-1.0) - 0.002654739*T**2.0 - 1.7348e-07*T**3.0, And(T < 1768.0, 298.15 <= T)), (-17197.666 + 253.28374*T - 40.5*T*log(T) + 9.3488e+30*T**(-9.0), And(T < 6000.0, 1768.0 <= T)), (0, True)),
 'GLIQCO': Piecewise((15085.037 + GHSERCO - 8.931932*T - 2.19801e-21*T**7.0, And(T < 1768.0, 298.15 <= T)), (-846.61 + 243.599944*T - 40.5*T*log(T), And(T < 6000.0, 1768.0 <= T)), (0, True)),
 'GFCCCO': Piecewise((427.59 + GHSERCO - 0.615248*T, And(T < 6000.0, 298.15 <= T)), (0, True)),
 'GBCCCO': Piecewise((2938.0 + GHSERCO - 0.7138*T, And(T < 6000.0, 298.15 <= T)), (0, True)),
 'GHSERCR': Piecewise((-8856.94 + 157.48*T - 26.908*T*log(T) + 139250.0*T**(-1.0) + 0.00189435*T**2.0 - 1.47721e-06*T**3.0, And(T < 2180.0, 298.1

In [4]:
df_sigma_fcc = pd.read_excel('emp_data/sigma_fcc_allibert.xls')
# df_sigma_bcc = pd.read_excel('emp_data/sigma_bcc_allibert.xls')
df_sigma_hcp = pd.read_excel('emp_data/sigma_hcp_allibert.xls')

df_sigma_fcc = pd.concat([df_sigma_fcc, df_sigma_hcp])

df_sigma_fcc['T'] = df_sigma_fcc['T'].round(2)
df_sigma_fcc['cr_conc'] = df_sigma_fcc['cr_conc'].round(6)
df_sigma_fcc_sigma_old = df_sigma_fcc[(df_sigma_fcc['phase'] == 'sigma_old')].reset_index()
df_sigma_fcc_sigma_old

Unnamed: 0,index,cr_conc,T,phase
0,6,0.532019,1321.35,sigma_old
1,7,0.542469,1226.97,sigma_old
2,8,0.547749,1471.91,sigma_old
3,9,0.55405,1370.79,sigma_old
4,10,0.556156,1269.66,sigma_old
5,11,0.559283,1422.47,sigma_old
6,7,0.541762,928.09,sigma_old
7,8,0.545313,1020.22,sigma_old
8,9,0.55196,1069.66,sigma_old
9,10,0.558231,973.03,sigma_old


In [5]:
db10 = Database(cc10_path)

# phases10 = list(db10.phases.keys())
press = 101325
elements = ['CR', 'CO', 'VA']
el_cnt = 1

df_sigma_fcc_sigma_old = df_sigma_fcc_sigma_old.sort_values('T')

T = df_sigma_fcc_sigma_old['T'].to_numpy()
phase = 'SIGMA_OLD'
# phases = ['FCC_A1', 'HCP_A3', 'SIGMA_OLD']

y_obs = df_sigma_fcc_sigma_old['cr_conc'].values
conditions = {v.X('CR'):0.5, v.P: 101325, v.T: T, v.N: el_cnt}
parameters_list = ['SIGMA_OLD_COCRCO_0', 'SIGMA_OLD_COCRCO_1', 'SIGMA_OLD_COCRCR_0', 'SIGMA_OLD_COCRCR_1']

print('T', T)
print('y_obs', y_obs)
# print('phases', phases)
print('phase', phase)

T [ 928.09  973.03 1020.22 1069.66 1121.35 1168.54 1217.98 1226.97 1269.66
 1321.35 1370.79 1422.47 1471.91]
y_obs [0.541762 0.558231 0.545313 0.55196  0.559991 0.565255 0.560713 0.542469
 0.556156 0.532019 0.55405  0.559283 0.547749]
phase SIGMA_OLD


# Class

In [6]:
# define a pytensor Op for our likelihood function
class LogLike(pt.Op):
#     определяем тип входящих и исходящих данных
    itypes = [pt.dvector]  # expects a vector of parameter values when called
    otypes = [pt.fvector]  # outputs a single scalar value (the log likelihood)

    def __init__(self, db, conditions, phase, elements, y_obs):
        self.db_tdb = db
        self.conditions_dict = conditions
        self.phases_list = []
        self.phase_str = phase
        self.elements_list = elements
        self.y_obs_nparray = y_obs
        
        self.y_eqs = []
        self.likelihoods = []

    def perform(self, node, inputs, outputs):
        
        (theta,) = inputs  # this will contain my variables
        
        COCRCO_0, COCRCO_1, COCRCR_0, COCRCR_1 = theta

        self.phases_list = list(self.db_tdb.phases.keys())
        
        new_parameters = {
         'SIGMA_OLD_COCRCO_0' : COCRCO_0
         ,'SIGMA_OLD_COCRCO_1': COCRCO_1
         ,'SIGMA_OLD_COCRCR_0': COCRCR_0
         ,'SIGMA_OLD_COCRCR_1': COCRCR_1
        }
        
        # print('** theta', theta)
        # try: 
        y_eq = from_xarray_to_pandas(equilibrium(self.db_tdb
                                            , self.elements_list
                                            , self.phases_list
                                            , self.conditions_dict
                                            , parameters = new_parameters
                                        ), self.phase_str)['cr_conc'].astype(float).to_numpy() # cr_conc - нужно абстрагировать
            # print('** try y_eq', y_eq.size, y_eq)
        # except:
            # y_eq = 0.0
            # print('** except y_eq', y_eq.size, y_eq)
        # start
        # try:
        #     likelihood = np.array(- np.sum((self.y_obs_nparray - y_eq)**2))
        #     # print('** try y_obs', self.y_obs_nparray)
        #     # print('** try y_eq', y_eq)
        #     # print('** try likelihood', likelihood)
        # except:
        #     likelihood = 0.0
        #     # print('** except likelihood', likelihood)
        self.y_eqs.append(y_eq)
        # self.likelihoods.append(likelihood)
        
        # outputs[0][0] = likelihood
        # end

        outputs[0][0] = y_eq
                             
                             

In [None]:
# define a pytensor Op for our likelihood function
class LogLike(pt.Op):
#     определяем тип входящих и исходящих данных
    itypes = [pt.dvector]  # expects a vector of parameter values when called
    otypes = [pt.fvector]  # outputs a single scalar value (the log likelihood)

    def __init__(self, db, conditions, phase, elements, y_obs):
        self.db_tdb = db
        self.conditions_dict = conditions
        self.phases_list = []
        self.phase_str = phase
        self.elements_list = elements
        self.y_obs_nparray = y_obs
        
        self.y_eqs = []
        self.likelihoods = []

    def perform(self, node, inputs, outputs):
        
        (theta,) = inputs  # this will contain my variables
        
        COCRCO_0, COCRCO_1, COCRCR_0, COCRCR_1 = theta

        self.phases_list = list(self.db_tdb.phases.keys())
        
        new_parameters = {
         'SIGMA_OLD_COCRCO_0' : COCRCO_0
         ,'SIGMA_OLD_COCRCO_1': COCRCO_1
         ,'SIGMA_OLD_COCRCR_0': COCRCR_0
         ,'SIGMA_OLD_COCRCR_1': COCRCR_1
        }

        y_eq = from_xarray_to_pandas(equilibrium(self.db_tdb
                                            , self.elements_list
                                            , self.phases_list
                                            , self.conditions_dict
                                            , parameters = new_parameters
                                        ), self.phase_str)['cr_conc'].astype(float).to_numpy() # cr_conc - нужно абстрагировать

        self.y_eqs.append(y_eq)

        outputs[0][0] = y_eq
                             
                             

# Model

In [7]:
pytensor.config.exception_verbosity = 'high' 

test_model = pm.Model()

logl = LogLike(db10, conditions, phase, elements, y_obs)

with test_model:
    # uniform priors on m and c
    COCRCO_0 = pm.Normal("SIGMA_OLD_COCRCO_0", mu=-103863.0, sigma=1)
    COCRCO_1 = pm.Normal("SIGMA_OLD_COCRCO_1", mu=47.47, sigma=1)
    COCRCR_0 = pm.Normal("SIGMA_OLD_COCRCR_0", mu=-248108.8, sigma=1)
    COCRCR_1 = pm.Normal("SIGMA_OLD_COCRCR_1", mu=79.12, sigma=1) 
    


    # theta = [COCRCO_0, COCRCO_1, COCRCR_0, COCRCR_1]
    theta = pt.as_tensor_variable([COCRCO_0, COCRCO_1, COCRCR_0, COCRCR_1])
    # theta_2 = np.array([COCRCO_0, COCRCO_1, COCRCR_0, COCRCR_1])
    
    # likelihood = pm.DensityDist("likelihood", logp = logl_2, observed=y_obs)
    
    # likelihood_2 = pm.Potential("likelihood", logl(theta))
    # pm.HalfNormal("obs", logl(theta), observed=y_obs)#, 
    obs = pm.Deterministic("obs", logl(theta))
    pm.Normal("y_obs", mu=obs, sigma = 0.000001, observed=y_obs)
    
    idata = pm.sample_prior_predictive(samples=5)

    
    # trace = pm.sample(draws=2000, tune=500, idata_kwargs={"log_likelihood": True}, progressbar=True)

Sampling: [SIGMA_OLD_COCRCO_0, SIGMA_OLD_COCRCO_1, SIGMA_OLD_COCRCR_0, SIGMA_OLD_COCRCR_1, y_obs]


In [8]:
idata

In [10]:
idata.prior_predictive['y_obs'][0][4]

In [11]:
idata.observed_data['y_obs']

In [12]:
with test_model:
    trace = pm.sample(5, tune=5)

Only 5 samples in chain.
Ambiguities exist in dispatched function _unify

The following signatures may result in ambiguous behavior:
	[object, ConstrainedVar, Mapping], [ConstrainedVar, object, Mapping]
	[object, ConstrainedVar, Mapping], [ConstrainedVar, object, Mapping]
	[object, ConstrainedVar, Mapping], [ConstrainedVar, Var, Mapping]
	[ConstrainedVar, Var, Mapping], [object, ConstrainedVar, Mapping]


Consider making the following additions:

@dispatch(ConstrainedVar, ConstrainedVar, Mapping)
def _unify(...)

@dispatch(ConstrainedVar, ConstrainedVar, Mapping)
def _unify(...)

@dispatch(ConstrainedVar, ConstrainedVar, Mapping)
def _unify(...)

@dispatch(ConstrainedVar, ConstrainedVar, Mapping)
def _unify(...)
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Slice: [SIGMA_OLD_COCRCO_0]
>Slice: [SIGMA_OLD_COCRCO_1]
>Slice: [SIGMA_OLD_COCRCR_0]
>Slice: [SIGMA_OLD_COCRCR_1]


# Проверки

In [76]:
t = logl.y_eqs
t

[array([0.53605749, 0.53611805, 0.53641   , 0.53695533, 0.5377801 ,
        0.53875879, 0.54001898, 0.54027461, 0.54160388, 0.54783302,
        0.55441358, 0.56106662, 0.56725844]),
 array([0.53622697, 0.53632502, 0.53665979, 0.53725434, 0.53813613,
        0.53917273, 0.54050084, 0.54076973, 0.54216644, 0.54809864,
        0.55473688, 0.56145266, 0.56770784]),
 array([0.54013074, 0.54036779, 0.54085521, 0.54161893, 0.54269032,
        0.5439143 , 0.54545717, 0.54576752, 0.54737303, 0.5520355 ,
        0.55869886, 0.56545039, 0.57175084]),
 array([0.53294489, 0.53290845, 0.53309496, 0.53352473, 0.53422155,
        0.53507537, 0.53619435, 0.53642296, 0.53761723, 0.54473935,
        0.55132233, 0.55797181, 0.5641534 ]),
 array([0.53478863, 0.53489449, 0.53524115, 0.53585243, 0.53675683,
        0.53781919, 0.5391803 , 0.53945593, 0.54088814, 0.54682693,
        0.55356242, 0.5603805 , 0.56673526]),
 array([0.5342753 , 0.53431173, 0.53457944, 0.53510035, 0.53590041,
        0.53685705, 0.

In [36]:
len(t)

50