# How to Train a Classifier on Ideal Crystals

This tutorial shows how to create crystals using the `ase` package and then transform neighbouring information of each atom into a feature vector. These feature vector representations can then be used to classify atoms using supervised learning algorithms implemented in `sklearn`.

In [2]:
%matplotlib notebook
import numpy as np
from scipy import interpolate, spatial
import itertools, warnings
from ase.lattice.cubic import FaceCenteredCubic, SimpleCubicFactory
from ase.spacegroup import crystal
import matplotlib.pylab as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import linear_model, utils, preprocessing
import sklearn
import atomtoolbox as atb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

## 1. Generating Structure Database

In [3]:
a0 = 3.52
L12 = atb.L12Factory()
atoms_gamma_prime = L12(directions=[[1,0,0],[0,1,0],[0,0,1]], size=(1,1,1), 
                          symbol=["Al","Ni"],pbc=(1,1,1),latticeconstant=a0)

a = 4.05
atoms_fccAl = crystal('Al', [(0,0,0)], spacegroup=225, cellpar=[a, a, a, 90, 90, 90])
atoms_bccAl = crystal('Al', [(0,0,0)], spacegroup=229, cellpar=[a, a, a, 90, 90, 90])

## 2. Setting up Design Matrices

In [4]:
all_filter = lambda s,s_ref: np.array([True for v in range(s.shape[0])])
like_filter = lambda s,s_ref: s == s_ref
unlike_filter = lambda s,s_ref: s != s_ref

# angle histograms
tbaf_params = {"k":20,"_range":(0,np.pi),"normed":True,"element_filter":all_filter}
Phi_gamma_prime_angles = atb.get_crystal_design_matrix(atoms=atoms_gamma_prime, r_cut=4., 
                                            features_class=atb.ThreeBodyAngleFeatures,
                                            params_features=tbaf_params)
Phi_fccAl_angles = atb.get_crystal_design_matrix(atoms=atoms_fccAl, r_cut=4., 
                                            features_class=atb.ThreeBodyAngleFeatures,
                                            params_features=tbaf_params)
Phi_bccAl_angles = atb.get_crystal_design_matrix(atoms=atoms_bccAl, r_cut=4., 
                                            features_class=atb.ThreeBodyAngleFeatures,
                                            params_features=tbaf_params)
Phi_angles = np.vstack((Phi_gamma_prime_angles,Phi_fccAl_angles,Phi_bccAl_angles))

# bond order parameters
bopf_params = {"k":[4,6,8],"element_filter":all_filter}
Phi_gamma_prime_bop = atb.get_crystal_design_matrix(atoms=atoms_gamma_prime, r_cut=4., 
                                            features_class=atb.BondOrderParameterFeatures,
                                            params_features=bopf_params)
Phi_fccAl_bop = atb.get_crystal_design_matrix(atoms=atoms_fccAl, r_cut=4., 
                                            features_class=atb.BondOrderParameterFeatures,
                                            params_features=bopf_params)
Phi_bccAl_bop = atb.get_crystal_design_matrix(atoms=atoms_bccAl, r_cut=4., 
                                            features_class=atb.BondOrderParameterFeatures,
                                            params_features=bopf_params)
Phi_bop = np.vstack((Phi_gamma_prime_bop,Phi_fccAl_bop,Phi_bccAl_bop))

print("Phis: bop = {} angles = {}".format(Phi_bop.shape,Phi_angles.shape))
Phi = np.hstack((Phi_angles,Phi_bop))
print("Phi: all = {}".format(Phi.shape))

Phis: bop = (10, 3) angles = (10, 20)
Phi: all = (10, 23)


## 3. Generating Labels

In [5]:
label_map = {"gp":0,"fccAl":1,"bccAl":2}
t = [label_map["gp"] for v in range(Phi_gamma_prime_angles.shape[0])] +\
    [label_map["fccAl"] for v in range(Phi_fccAl_angles.shape[0])] +\
    [label_map["bccAl"] for v in range(Phi_bccAl_angles.shape[0])]
t = np.array(t,dtype=int)
print(t,Phi.shape)

[0 0 0 0 1 1 1 1 2 2] (10, 23)


## 4. Training Classifiers

Example with various classifiers, standardization and test/train splits: http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html

In [6]:
classifiers = {"Nearest Neighbors":KNeighborsClassifier(3),
               "Linear SVM":SVC(kernel="linear", C=0.025),
               "RBF SVM":SVC(gamma=2, C=1),
               "Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)),
               "Decision Tree":DecisionTreeClassifier(max_depth=5),
               "Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
               "Neural Net":MLPClassifier(alpha=1),
               "AdaBoost":AdaBoostClassifier(),
               "Naive Bayes":GaussianNB(),
               "QDA":QuadraticDiscriminantAnalysis()}

classifier_names = sorted(list(classifiers.keys()))

for clf_name in classifier_names:
    print("\nprocessing: {}".format(clf_name))
    classifiers[clf_name].fit(Phi,t)
    print("score = ",classifiers[clf_name].score(Phi,t))


processing: AdaBoost
score =  1.0

processing: Decision Tree
score =  1.0

processing: Gaussian Process
score =  1.0

processing: Linear SVM
score =  1.0

processing: Naive Bayes
score =  1.0

processing: Nearest Neighbors
score =  1.0

processing: Neural Net
score =  1.0

processing: QDA
score =  0.4

processing: RBF SVM
score =  1.0

processing: Random Forest
score =  1.0


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
