In [2]:
import pandas as pd
import numpy as np
from pymatgen.core import Composition

# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 60)

# Matplotlib visualization
import matplotlib.pyplot as plt
from matplotlib import rcParams

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize

# Seaborn for visualization
import seaborn as sns

# Splitting data into training and testing
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
# 全局设置字体及大小，设置公式字体即可，若要修改刻度字体，可在此修改全局字体
config = {
    "mathtext.fontset":'stix',
    "font.family":'serif',
    "font.serif": ['Times New Roman'],
    "font.size": 24,# 字号，大家自行调节
    'axes.unicode_minus': False # 处理负号，即-号
}
rcParams.update(config)
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (8, 6),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.rcParams['figure.dpi'] = 300 #分辨率

E:\anaconda3\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
E:\anaconda3\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll


In [3]:
data = pd.read_csv('spinels_clf.csv',index_col=0)
data

Unnamed: 0,A_Density,B_Density,A_dipole Polarizability,B_dipole Polarizability,A_covalent Radius,A_FirstIonization,B_number of Valence Electrons,B_number,A_Period,A_Electronegativity,A_number of s+p Electrons,B_number of s+p Electrons,A_number of d Electrons,B_number of d Electrons,A_Mulliken EN,B_Mulliken EN,A-B_Density,(A+B)-O_Density,A-B_dipole Polarizability,(A+B)-O_dipole Polarizability,(A+B)-O_covalent Radius,A-B_FirstIonization,A-B_number of Valence Electrons,(A+B)-O_number of Valence Electrons,A-B_number,(A+B)-O_number,A-B_Electronegativity,(A+B)-O_Electronegativity,A-B_number of s+p Electrons,(A+B)-O_number of s+p Electrons,...,Ag,Cd,In,Sn,Sb,Cs,Ba,La,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu,W,Ir,Hg,Bi,compound possible,max ionic char,avg ionic char,is_doping,is_metal
0,0.971,7.44,162.7,68.00,155.0,495.8,7.0,25.0,3.0,0.93,1.0,2.0,0.0,5.0,2.85,3.72,6.469,8.40957,94.70,225.40,211.0,221.5,6.0,2.0,14.0,28.0,0.62,0.96,1.0,3.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.792998,0.164896,0,1
1,8.690,12.40,46.0,66.00,136.0,867.8,9.0,45.0,5.0,1.69,2.0,1.0,10.0,8.0,4.33,4.30,3.710,21.08857,20.00,106.70,198.0,148.1,3.0,15.0,3.0,85.0,0.59,0.53,1.0,3.0,...,0.0,0.142857,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,1,0.534957,0.093711,0,0
2,1.540,7.31,160.8,65.00,171.0,589.8,3.0,49.0,4.0,1.00,2.0,3.0,0.0,10.0,2.20,3.10,5.770,8.84857,95.80,220.50,250.0,31.5,1.0,1.0,29.0,61.0,0.78,0.66,1.0,1.0,...,0.0,0.000000,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,1,0.774266,0.150250,0,0
3,2.330,8.91,37.3,49.00,116.0,786.5,10.0,28.0,3.0,1.90,4.0,2.0,0.0,8.0,4.77,4.40,6.580,11.23857,11.70,81.00,163.0,49.4,6.0,8.0,14.0,34.0,0.01,0.37,2.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,1,0.447278,0.108843,0,0
4,7.870,1.74,62.0,71.20,116.0,762.5,2.0,12.0,4.0,1.83,2.0,2.0,6.0,0.0,4.06,3.75,6.130,9.60857,9.20,127.90,192.0,24.8,6.0,4.0,14.0,30.0,0.52,0.30,0.0,2.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.678329,0.152348,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113718,6.110,5.24,87.0,184.00,134.0,650.9,3.0,63.0,4.0,1.63,2.0,2.0,3.0,0.0,3.60,3.10,0.870,11.34857,97.00,265.70,239.0,103.8,2.0,2.0,40.0,78.0,0.43,0.61,0.0,2.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.285714,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.714753,0.164182,0,1
113719,6.110,8.80,87.0,156.00,134.0,650.9,3.0,67.0,4.0,1.63,2.0,2.0,3.0,0.0,3.60,3.10,2.690,14.90857,69.00,237.70,237.0,69.9,2.0,2.0,44.0,82.0,0.40,0.58,0.0,2.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.285714,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.705072,0.162358,0,1
113720,6.110,12.00,87.0,26.14,134.0,650.9,10.0,46.0,4.0,1.63,2.0,0.0,3.0,10.0,3.60,4.45,5.890,18.10857,60.86,107.84,191.0,153.5,5.0,9.0,23.0,61.0,0.57,0.39,2.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.559139,0.100933,0,1
113721,6.110,7.26,87.0,200.00,134.0,650.9,3.0,61.0,4.0,1.63,2.0,2.0,3.0,0.0,3.60,3.10,1.150,13.36857,113.00,281.70,244.0,110.9,2.0,2.0,38.0,76.0,0.50,0.68,0.0,2.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.736585,0.168376,0,1


In [4]:
from sklearn.model_selection import train_test_split
X = data.drop(columns='is_metal')
y = data['is_metal']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,shuffle=True,random_state=0)
print(X_train.shape,X_test.shape)

(90978, 90) (22745, 90)


In [5]:
from interpret import show
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name='Train Data')
show(hist)

In [None]:
from interpret.glassbox import ExplainableBoostingClassifier, LogisticRegression, ClassificationTree

ebm = ExplainableBoostingClassifier(n_jobs=-1)
ebm.fit(X_train, y_train)