In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from cofactor.model import LatticePredictor, FEATURES, OUTPUTS
from cofactor.calculator import CofactorCalculator as CC
from cofactor.calculator import Lattice

In [4]:
model = LatticePredictor.from_features()

In [8]:
tetragonal = pd.read_csv('data/tetragonal.csv')
tetragonal = tetragonal[tetragonal.Zr > 0]

monoclinic = pd.read_csv('data/monoclinic.csv')
monoclinic = monoclinic[monoclinic.Zr > 0]

In [21]:
tetragonal = tetragonal.rename(columns={'a': 'tetr_a', 'c': 'tetr_c'})
monoclinic = monoclinic.rename(columns={'a': 'mono_a', 'b': 'mono_b', 'c': 'mono_c', 'beta': 'mono_beta'})

In [49]:
df = pd.concat([tetragonal, monoclinic], sort=False, ignore_index=True)

In [54]:
model.fit_df(df)

In [57]:
model.save('models/model.pkl')

### Stats on the model

In [81]:
for output in model.outputs:
    y = df[output].dropna()
    X = df.loc[y.index, model.features]
    print('{:>10}   RMSE = {:.2e}  R$^2$ = {:.2f}'.format(output, *model.get_stats(X, y, output)))

    tetr_a   RMSE = 4.80e-03  R$^2$ = 0.98
    tetr_c   RMSE = 5.72e-03  R$^2$ = 0.99
    mono_a   RMSE = 3.69e-03  R$^2$ = 0.99
    mono_b   RMSE = 4.70e-03  R$^2$ = 0.95
    mono_c   RMSE = 4.28e-03  R$^2$ = 0.99
 mono_beta   RMSE = 5.79e-02  R$^2$ = 0.93


### Data

In [94]:
df.to_csv('data/lattices.csv')

### Final equation

In [4]:
model = LatticePredictor.from_file('models/model.pkl')



In [17]:
for out, r in model._regressors.items():
    equation = ' + '.join([
        '%.3e %s' % (coef, feat)
        for coef, feat in zip(r.coef_, model.features)
    ])
    print('%s = %s' % (out, equation))

tetr_a = 5.082e-05 T + -2.359e-02 en_p + 1.041e-01 ea + -4.797e-03 valence + 1.045e+00 rad_slater + 1.043e-03 rad_clementi
tetr_c = 6.613e-05 T + 8.965e-01 en_p + -1.048e-01 ea + 6.072e-01 valence + 1.660e+00 rad_slater + 7.793e-02 rad_clementi
mono_a = 3.903e-05 T + -2.353e-02 en_p + 1.152e-01 ea + -6.787e-02 valence + 1.643e+00 rad_slater + 6.993e-03 rad_clementi
mono_b = 8.146e-06 T + -2.440e-01 en_p + 2.157e-01 ea + 1.074e-01 valence + 1.033e+00 rad_slater + 5.552e-01 rad_clementi
mono_c = 6.348e-05 T + -1.041e-01 en_p + 7.283e-02 ea + 6.085e-02 valence + 8.917e-01 rad_slater + -5.223e-01 rad_clementi
mono_beta = -4.632e-04 T + -3.007e+00 en_p + 5.013e-01 ea + 2.032e+00 valence + -2.137e+00 rad_slater + 6.040e+00 rad_clementi


In [19]:
df = pd.DataFrame([r.coef_ for r in model._regressors.values()], index=model._regressors.keys(), columns=model.features)

In [20]:
df

Unnamed: 0,T,en_p,ea,valence,rad_slater,rad_clementi
tetr_a,5.1e-05,-0.023591,0.104057,-0.004797,1.045149,0.001043
tetr_c,6.6e-05,0.89646,-0.1048,0.607161,1.660068,0.077934
mono_a,3.9e-05,-0.023528,0.115172,-0.067867,1.643379,0.006993
mono_b,8e-06,-0.243959,0.215683,0.107374,1.032506,0.555161
mono_c,6.3e-05,-0.104093,0.072826,0.060852,0.891702,-0.522294
mono_beta,-0.000463,-3.007386,0.501284,2.032031,-2.137021,6.039688
