# Deep Learning carbon, silicon, nitrogen, and aluminum isotopes

In [36]:
# Imports
# %matplotlib inline
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [37]:
# Set the seed value for the notebook so the results are reproducible
seed(1)

In [38]:
# Read the csv file into a pandas DataFrame
C_Si_N_Al = pd.read_csv('presolargrains_C_Si_N_Al.csv')
C_Si_N_Al.head()

Unnamed: 0,Type,carbon_12_13,nitrogen_14_15,aluminum_26_27,silicon_29_28,silicon_30_28
0,X,1581.0,116.0,0.0095,-684.0,-490.0
1,X,234.75,187.2,0.3327,-683.0,-501.0
2,X,140.0,97.0,0.017,-653.0,-446.0
3,X,223.0,102.0,0.114,-600.0,-459.0
4,X,1693.89,63.2,0.2364,-588.0,-605.0


## Data Preprocessing

In [39]:
# Drop Type from the X values, use it as our dependent variable y
X = C_Si_N_Al.drop("Type", axis=1)
y = C_Si_N_Al["Type"]
print(X.shape, y.shape)

(328, 5) (328,)


## Train Test Split

In [40]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## Scale the data using MinMaxScalar

In [41]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Label encode the data set

In [42]:
# Use LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [43]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)

## Create a Deep Learning Model

In [44]:
# Create model and add layers
model = Sequential()

In [45]:
number_inputs = 5
number_hidden_nodes = 15
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

In [50]:
number_classes = 7
model.add(Dense(units=number_classes, activation='softmax'))

## Compile and train the model

In [51]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [52]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 15)                90        
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 128       
_________________________________________________________________
dense_6 (Dense)              (None, 7)                 63        
Total params: 281
Trainable params: 281
Non-trainable params: 0
_________________________________________________________________


In [53]:
# Train the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=300,
    shuffle=True,
    verbose=2
)

Train on 246 samples
Epoch 1/300
246/246 - 0s - loss: 1.9925 - accuracy: 0.0081
Epoch 2/300
246/246 - 0s - loss: 1.9736 - accuracy: 0.0081
Epoch 3/300
246/246 - 0s - loss: 1.9552 - accuracy: 0.0122
Epoch 4/300
246/246 - 0s - loss: 1.9378 - accuracy: 0.0772
Epoch 5/300
246/246 - 0s - loss: 1.9204 - accuracy: 0.2236
Epoch 6/300
246/246 - 0s - loss: 1.9033 - accuracy: 0.2520
Epoch 7/300
246/246 - 0s - loss: 1.8866 - accuracy: 0.2520
Epoch 8/300
246/246 - 0s - loss: 1.8698 - accuracy: 0.2520
Epoch 9/300
246/246 - 0s - loss: 1.8538 - accuracy: 0.2520
Epoch 10/300
246/246 - 0s - loss: 1.8380 - accuracy: 0.2520
Epoch 11/300
246/246 - 0s - loss: 1.8222 - accuracy: 0.2520
Epoch 12/300
246/246 - 0s - loss: 1.8068 - accuracy: 0.2520
Epoch 13/300
246/246 - 0s - loss: 1.7917 - accuracy: 0.2561
Epoch 14/300
246/246 - 0s - loss: 1.7767 - accuracy: 0.2642
Epoch 15/300
246/246 - 0s - loss: 1.7625 - accuracy: 0.3293
Epoch 16/300
246/246 - 0s - loss: 1.7479 - accuracy: 0.4837
Epoch 17/300
246/246 - 0s - 

Epoch 137/300
246/246 - 0s - loss: 0.8829 - accuracy: 0.7602
Epoch 138/300
246/246 - 0s - loss: 0.8810 - accuracy: 0.7602
Epoch 139/300
246/246 - 0s - loss: 0.8785 - accuracy: 0.7683
Epoch 140/300
246/246 - 0s - loss: 0.8760 - accuracy: 0.7724
Epoch 141/300
246/246 - 0s - loss: 0.8740 - accuracy: 0.7683
Epoch 142/300
246/246 - 0s - loss: 0.8717 - accuracy: 0.7724
Epoch 143/300
246/246 - 0s - loss: 0.8694 - accuracy: 0.7724
Epoch 144/300
246/246 - 0s - loss: 0.8675 - accuracy: 0.7724
Epoch 145/300
246/246 - 0s - loss: 0.8654 - accuracy: 0.7724
Epoch 146/300
246/246 - 0s - loss: 0.8633 - accuracy: 0.7805
Epoch 147/300
246/246 - 0s - loss: 0.8612 - accuracy: 0.7886
Epoch 148/300
246/246 - 0s - loss: 0.8591 - accuracy: 0.7886
Epoch 149/300
246/246 - 0s - loss: 0.8569 - accuracy: 0.7886
Epoch 150/300
246/246 - 0s - loss: 0.8550 - accuracy: 0.7886
Epoch 151/300
246/246 - 0s - loss: 0.8535 - accuracy: 0.7764
Epoch 152/300
246/246 - 0s - loss: 0.8524 - accuracy: 0.7805
Epoch 153/300
246/246 - 

Epoch 272/300
246/246 - 0s - loss: 0.7244 - accuracy: 0.7886
Epoch 273/300
246/246 - 0s - loss: 0.7235 - accuracy: 0.7886
Epoch 274/300
246/246 - 0s - loss: 0.7229 - accuracy: 0.7886
Epoch 275/300
246/246 - 0s - loss: 0.7224 - accuracy: 0.7846
Epoch 276/300
246/246 - 0s - loss: 0.7218 - accuracy: 0.7846
Epoch 277/300
246/246 - 0s - loss: 0.7215 - accuracy: 0.7846
Epoch 278/300
246/246 - 0s - loss: 0.7203 - accuracy: 0.7927
Epoch 279/300
246/246 - 0s - loss: 0.7200 - accuracy: 0.7886
Epoch 280/300
246/246 - 0s - loss: 0.7192 - accuracy: 0.7886
Epoch 281/300
246/246 - 0s - loss: 0.7185 - accuracy: 0.7886
Epoch 282/300
246/246 - 0s - loss: 0.7179 - accuracy: 0.7886
Epoch 283/300
246/246 - 0s - loss: 0.7175 - accuracy: 0.7886
Epoch 284/300
246/246 - 0s - loss: 0.7171 - accuracy: 0.7846
Epoch 285/300
246/246 - 0s - loss: 0.7161 - accuracy: 0.7886
Epoch 286/300
246/246 - 0s - loss: 0.7156 - accuracy: 0.7927
Epoch 287/300
246/246 - 0s - loss: 0.7151 - accuracy: 0.7927
Epoch 288/300
246/246 - 

<tensorflow.python.keras.callbacks.History at 0x14a393358>

## Quantify the trained model

In [54]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Learning - Loss: {model_loss}, Accuracy: {model_accuracy}")

82/82 - 0s - loss: 0.7307 - accuracy: 0.7683
Deep Learning - Loss: 0.7307439868043109, Accuracy: 0.7682926654815674


## Predict

In [55]:
# Predict classes (Types)
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [56]:
# Create and print dataframe with predicted and actual types
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,AB,AB
1,M,M
2,M,M
3,X,X
4,M,M
5,M,Y
6,M,M
7,M,M
8,X,X
9,M,AB


## Save the model

In [57]:
# Save model to file
filename = 'deeplearning_C_Si_N_Al.sav'
joblib.dump(model, filename)

TypeError: can't pickle _thread.RLock objects

## Summary:

### Neural Network/Deep Learning for carbon, silicon, nitrogen, and aluminum isotopes: 76.8%