# Deep Learning carbon, silicon, and nitrogen isotopes

In [18]:
# Imports
# %matplotlib inline
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [19]:
# Set the seed value for the notebook so the results are reproducible
seed(1)

In [20]:
# Read the csv file into a pandas DataFrame
C_Si_N = pd.read_csv('presolargrains_C_Si_N.csv')
C_Si_N.head()

Unnamed: 0,Type,carbon_12_13,nitrogen_14_15,silicon_29_28,silicon_30_28
0,X,74.3,207.4,-685.0,-520.0
1,X,1581.0,116.0,-684.0,-490.0
2,X,234.75,187.2,-683.0,-501.0
3,X,455.0,140.0,-662.0,-770.0
4,X,823.0,44.0,-658.0,-234.0


## Data Preprocessing

In [21]:
# Drop Type from the X values, use it as our dependent variable y
X = C_Si_N.drop("Type", axis=1)
y = C_Si_N["Type"]
print(X.shape, y.shape)

(1301, 4) (1301,)


## Train Test Split

In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## Scale the data using MinMaxScalar

In [23]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Label encode the data set

In [24]:
# Use LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [25]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)

## Create a Deep Learning Model

In [26]:
# Create model and add layers
model = Sequential()

In [27]:
number_inputs = 4
number_hidden_nodes = 12
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

In [28]:
number_classes = 8
model.add(Dense(units=number_classes, activation='softmax'))

## Compile and train the model

In [29]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [30]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 12)                60        
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 104       
Total params: 164
Trainable params: 164
Non-trainable params: 0
_________________________________________________________________


In [31]:
# Train the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=300,
    shuffle=True,
    verbose=2
)

Train on 975 samples
Epoch 1/300
975/975 - 0s - loss: 2.0535 - accuracy: 0.1292
Epoch 2/300
975/975 - 0s - loss: 1.9934 - accuracy: 0.5692
Epoch 3/300
975/975 - 0s - loss: 1.9280 - accuracy: 0.5692
Epoch 4/300
975/975 - 0s - loss: 1.8537 - accuracy: 0.5692
Epoch 5/300
975/975 - 0s - loss: 1.7699 - accuracy: 0.5692
Epoch 6/300
975/975 - 0s - loss: 1.6755 - accuracy: 0.5692
Epoch 7/300
975/975 - 0s - loss: 1.5772 - accuracy: 0.5692
Epoch 8/300
975/975 - 0s - loss: 1.4769 - accuracy: 0.5692
Epoch 9/300
975/975 - 0s - loss: 1.3916 - accuracy: 0.5692
Epoch 10/300
975/975 - 0s - loss: 1.3294 - accuracy: 0.5692
Epoch 11/300
975/975 - 0s - loss: 1.2873 - accuracy: 0.5692
Epoch 12/300
975/975 - 0s - loss: 1.2578 - accuracy: 0.5692
Epoch 13/300
975/975 - 0s - loss: 1.2361 - accuracy: 0.5692
Epoch 14/300
975/975 - 0s - loss: 1.2190 - accuracy: 0.5703
Epoch 15/300
975/975 - 0s - loss: 1.2050 - accuracy: 0.5713
Epoch 16/300
975/975 - 0s - loss: 1.1921 - accuracy: 0.5723
Epoch 17/300
975/975 - 0s - 

Epoch 137/300
975/975 - 0s - loss: 0.7899 - accuracy: 0.7477
Epoch 138/300
975/975 - 0s - loss: 0.7895 - accuracy: 0.7477
Epoch 139/300
975/975 - 0s - loss: 0.7884 - accuracy: 0.7477
Epoch 140/300
975/975 - 0s - loss: 0.7877 - accuracy: 0.7477
Epoch 141/300
975/975 - 0s - loss: 0.7872 - accuracy: 0.7477
Epoch 142/300
975/975 - 0s - loss: 0.7864 - accuracy: 0.7477
Epoch 143/300
975/975 - 0s - loss: 0.7858 - accuracy: 0.7477
Epoch 144/300
975/975 - 0s - loss: 0.7855 - accuracy: 0.7477
Epoch 145/300
975/975 - 0s - loss: 0.7841 - accuracy: 0.7477
Epoch 146/300
975/975 - 0s - loss: 0.7835 - accuracy: 0.7477
Epoch 147/300
975/975 - 0s - loss: 0.7828 - accuracy: 0.7477
Epoch 148/300
975/975 - 0s - loss: 0.7823 - accuracy: 0.7477
Epoch 149/300
975/975 - 0s - loss: 0.7817 - accuracy: 0.7477
Epoch 150/300
975/975 - 0s - loss: 0.7809 - accuracy: 0.7477
Epoch 151/300
975/975 - 0s - loss: 0.7803 - accuracy: 0.7477
Epoch 152/300
975/975 - 0s - loss: 0.7799 - accuracy: 0.7477
Epoch 153/300
975/975 - 

Epoch 272/300
975/975 - 0s - loss: 0.7165 - accuracy: 0.7723
Epoch 273/300
975/975 - 0s - loss: 0.7162 - accuracy: 0.7754
Epoch 274/300
975/975 - 0s - loss: 0.7158 - accuracy: 0.7754
Epoch 275/300
975/975 - 0s - loss: 0.7156 - accuracy: 0.7713
Epoch 276/300
975/975 - 0s - loss: 0.7152 - accuracy: 0.7785
Epoch 277/300
975/975 - 0s - loss: 0.7148 - accuracy: 0.7733
Epoch 278/300
975/975 - 0s - loss: 0.7143 - accuracy: 0.7733
Epoch 279/300
975/975 - 0s - loss: 0.7138 - accuracy: 0.7744
Epoch 280/300
975/975 - 0s - loss: 0.7138 - accuracy: 0.7723
Epoch 281/300
975/975 - 0s - loss: 0.7139 - accuracy: 0.7723
Epoch 282/300
975/975 - 0s - loss: 0.7128 - accuracy: 0.7764
Epoch 283/300
975/975 - 0s - loss: 0.7120 - accuracy: 0.7764
Epoch 284/300
975/975 - 0s - loss: 0.7116 - accuracy: 0.7744
Epoch 285/300
975/975 - 0s - loss: 0.7112 - accuracy: 0.7774
Epoch 286/300
975/975 - 0s - loss: 0.7111 - accuracy: 0.7774
Epoch 287/300
975/975 - 0s - loss: 0.7104 - accuracy: 0.7754
Epoch 288/300
975/975 - 

<tensorflow.python.keras.callbacks.History at 0x14a1857f0>

## Quantify the trained model

In [32]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Learning - Loss: {model_loss}, Accuracy: {model_accuracy}")

326/326 - 0s - loss: 0.7066 - accuracy: 0.7822
Deep Learning - Loss: 0.706584806639724, Accuracy: 0.7822085618972778


## Predict

In [33]:
# Predict classes (Types)
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [34]:
# Create and print dataframe with predicted and actual types
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,X,X
1,X,X
2,M,M
3,M,Y
4,M,M
5,M,M
6,M,AB
7,M,M
8,M,AB
9,M,AB


## Save the model

In [35]:
# Save model to file
filename = 'deeplearning_C_Si_N.sav'
joblib.dump(model, filename)

TypeError: can't pickle _thread.RLock objects

## Summary:

### Neural Network/Deep Learning for carbon, silicon, and nitrogen isotopes: 78.2%