In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from cofactor.model import LatticePredictor, ALL_FEATURES, OUTPUTS
from cofactor.features import gen_non_linear
from cofactor.calculator import CofactorCalculator as CC
from cofactor.calculator import Lattice

#### Data

In [11]:
tetragonal = pd.read_csv('data/tetragonal.csv')
tetragonal = tetragonal[tetragonal.Zr > 0]

monoclinic = pd.read_csv('data/monoclinic.csv')
monoclinic = monoclinic[monoclinic.Zr > 0]

In [12]:
tetragonal, nonlinear = gen_non_linear(tetragonal, 2, ALL_FEATURES)
monocinic, nonlinear = gen_non_linear(monoclinic, 2, ALL_FEATURES)

In [13]:
tetragonal = tetragonal.rename(columns={'a': 'tetr_a', 'c': 'tetr_c'})
monoclinic = monoclinic.rename(columns={'a': 'mono_a', 'b': 'mono_b', 'c': 'mono_c', 'beta': 'mono_beta'})

In [14]:
df = pd.concat([tetragonal, monoclinic], sort=False, ignore_index=True)

#### Model

In [15]:
model = LatticePredictor.from_features(ALL_FEATURES + nonlinear)

In [16]:
model.fit_df(df)

In [17]:
model.save('models/nonlinear_model.pkl')

### Stats on the model

In [18]:
for output in model.outputs:
    y = df[output].dropna()
    X = df.loc[y.index, model.features]
    print('{:>10}   RMSE = {:.2e}  R$^2$ = {:.2f}'.format(output, *model.get_stats(X, y, output)))

    tetr_a   RMSE = 1.82e-02  R$^2$ = 0.67
    tetr_c   RMSE = 2.67e-02  R$^2$ = 0.78
    mono_a   RMSE = 1.12e-02  R$^2$ = 0.90
    mono_b   RMSE = 3.16e-03  R$^2$ = 0.98
    mono_c   RMSE = 1.80e-02  R$^2$ = 0.74
 mono_beta   RMSE = 1.40e-01  R$^2$ = 0.43


### Data

In [94]:
df.to_csv('data/lattices.csv')