# Deep Learning for silicon isotopes

In [1]:
# Imports
# %matplotlib inline
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [2]:
# Set the seed value for the notebook so the results are reproducible
seed(1)

In [3]:
# Read the csv file into a pandas DataFrame
silicon = pd.read_csv('presolargrains_Si_for_SVM.csv')
silicon.head()

Unnamed: 0,Type,silicon_29_28,silicon_30_28
0,X,-662.0,-770.0
1,X,-451.0,-719.0
2,X,-594.0,-717.0
3,X,-499.0,-709.0
4,X,-366.0,-705.0


## Data Preprocessing

In [4]:
# Drop Type from the X values, use it as our dependent variable y
X = silicon.drop("Type", axis=1)
y = silicon["Type"]
print(X.shape, y.shape)

(14679, 2) (14679,)


## Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## Scale the data using MinMaxScalar

In [6]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Label encode the data set

In [7]:
# Use LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [8]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)

In [9]:
y_train_categorical.shape

(11009, 8)

## Create a Deep Learning Model

In [10]:
# Create model and add layers
model = Sequential()

In [11]:

model.add(Dense(units=100, activation='relu', input_dim=2))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=8, activation='softmax'))


In [None]:
'''
number_inputs = 2
number_hidden_nodes = 6
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))
'''

In [None]:
'''
number_classes = 8
model.add(Dense(units=number_classes, activation='softmax'))
'''

## Compile and train the model

In [12]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               300       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 808       
Total params: 11,208
Trainable params: 11,208
Non-trainable params: 0
_________________________________________________________________


In [14]:
X_train_scaled.shape

(11009, 2)

In [15]:
# Train the model
# Set early stopping as callback
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 11009 samples
Epoch 1/100
11009/11009 - 1s - loss: 0.8100 - accuracy: 0.8309
Epoch 2/100
11009/11009 - 0s - loss: 0.6181 - accuracy: 0.8459
Epoch 3/100
11009/11009 - 0s - loss: 0.5556 - accuracy: 0.8682
Epoch 4/100
11009/11009 - 0s - loss: 0.5334 - accuracy: 0.8707
Epoch 5/100
11009/11009 - 0s - loss: 0.5206 - accuracy: 0.8713
Epoch 6/100
11009/11009 - 0s - loss: 0.5090 - accuracy: 0.8714
Epoch 7/100
11009/11009 - 0s - loss: 0.4955 - accuracy: 0.8742
Epoch 8/100
11009/11009 - 0s - loss: 0.4814 - accuracy: 0.8766
Epoch 9/100
11009/11009 - 0s - loss: 0.4701 - accuracy: 0.8792
Epoch 10/100
11009/11009 - 0s - loss: 0.4644 - accuracy: 0.8801
Epoch 11/100
11009/11009 - 0s - loss: 0.4568 - accuracy: 0.8814
Epoch 12/100
11009/11009 - 0s - loss: 0.4508 - accuracy: 0.8823
Epoch 13/100
11009/11009 - 0s - loss: 0.4467 - accuracy: 0.8836
Epoch 14/100
11009/11009 - 0s - loss: 0.4419 - accuracy: 0.8846
Epoch 15/100
11009/11009 - 0s - loss: 0.4372 - accuracy: 0.8858
Epoch 16/100
11009/11009 -

<tensorflow.python.keras.callbacks.History at 0x10b247860>

## Quantify the trained model

In [16]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Learning - Loss: {model_loss}, Accuracy: {model_accuracy}")

3670/3670 - 0s - loss: 0.4446 - accuracy: 0.8875
Deep Learning - Loss: 0.44456485172058646, Accuracy: 0.8874659538269043


## Predict

In [17]:
# Predict classes (Types)
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [18]:
# Create and print dataframe with predicted and actual types
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,Z,Z
1,M,AB
2,M,M
3,M,M
4,M,M
5,M,M
6,M,M
7,M,M
8,M,M
9,M,M


## Save the model

In [19]:
# Save model to file
#filename = 'deeplearning_C_Si.sav'
#joblib.dump(model, filename)
model.save('deeplearning_Si', save_format='tf')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: deeplearning_Si/assets


## Summary:

### Deep Learning silicon: 88.7%