In [6]:
import collections

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from pymatgen.core import Composition

rcparams = {
    'legend.fontsize': 20,
    'figure.figsize': (12, 8),
    'axes.labelsize': 24,
    'axes.titlesize': 28,
    'xtick.labelsize': 20,
    'ytick.labelsize': 20
}
sns.set(rc=rcparams)
mpl.rcParams.update(rcparams)

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV, KFold

In [7]:
data_url = 'https://raw.githubusercontent.com/materialsvirtuallab/nano281/master/labs/lab2/data2022.csv'
data = pd.read_csv(data_url, index_col=0, na_filter=False)

data['composition'] = [Composition(f) for f in data['formula']]

In [13]:
el_data_url = "https://raw.githubusercontent.com/materialsvirtuallab/nano281/master/labs/lab2/element_properties.csv"
el_data = pd.read_csv(el_data_url, index_col=0)
el_data = el_data[
    ["AtomicRadius", "AtomicWeight", "Electronegativity"]
]

In [15]:
props = collections.defaultdict(list)

for comp in data['composition']:
    for c in el_data.columns:
        vals = [el_data[c][el.symbol] * amt for el, amt in comp.items()]
        props['%sMean' % c].append(sum(vals) / comp.num_atoms)
        props['%sMin' % c].append(min(vals))
        props['%sMax' % c].append(max(vals))

data = data.assign(**props)
data = data.dropna()
print(data.shape)

(681, 16)
