# Deep Learning for carbon and silicon isotopes

In [1]:
# Imports
# %matplotlib inline
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [2]:
# Set the seed value for the notebook so the results are reproducible
seed(1)

In [3]:
# Read the csv file into a pandas DataFrame
carbon_silicon = pd.read_csv('presolargrains_C_Si.csv')
carbon_silicon.head()

Unnamed: 0,Type,carbon_isotopes,silicon_isotopes
0,C,1.3,313.0
1,AB,1.42,-59.0
2,AB,1.854,8.0
3,AB,1.88,20.0
4,AB,1.91,9.0


## Data Preprocessing

In [4]:
# Drop Type from the X values, use it as our dependent variable y
X = carbon_silicon.drop("Type", axis=1)
y = carbon_silicon["Type"]
print(X.shape, y.shape)

(14426, 2) (14426,)


## Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## Scale the data using MinMaxScalar

In [6]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Label encode the data set

In [7]:
# Use LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [8]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)

In [9]:
y_train_categorical.shape

(10819, 8)

## Create a Deep Learning Model

In [10]:
# Create model and add layers
model = Sequential()

In [11]:

model.add(Dense(units=100, activation='relu', input_dim=2))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=8, activation='softmax'))


In [12]:
'''
number_inputs = 2
number_hidden_nodes = 6
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))
'''

"\nnumber_inputs = 2\nnumber_hidden_nodes = 6\nmodel.add(Dense(units=number_hidden_nodes,\n                activation='relu', input_dim=number_inputs))\n"

In [13]:
'''
number_classes = 8
model.add(Dense(units=number_classes, activation='softmax'))
'''

"\nnumber_classes = 8\nmodel.add(Dense(units=number_classes, activation='softmax'))\n"

## Compile and train the model

In [14]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               300       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 808       
Total params: 11,208
Trainable params: 11,208
Non-trainable params: 0
_________________________________________________________________


In [16]:
X_train_scaled.shape

(10819, 2)

In [17]:
# Train the model
# Set early stopping as callback
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 10819 samples
Epoch 1/100
10819/10819 - 1s - loss: 0.8124 - accuracy: 0.8398
Epoch 2/100
10819/10819 - 0s - loss: 0.5903 - accuracy: 0.8544
Epoch 3/100
10819/10819 - 0s - loss: 0.5324 - accuracy: 0.8657
Epoch 4/100
10819/10819 - 0s - loss: 0.4985 - accuracy: 0.8697
Epoch 5/100
10819/10819 - 0s - loss: 0.4711 - accuracy: 0.8712
Epoch 6/100
10819/10819 - 0s - loss: 0.4491 - accuracy: 0.8719
Epoch 7/100
10819/10819 - 0s - loss: 0.4288 - accuracy: 0.8743
Epoch 8/100
10819/10819 - 0s - loss: 0.4062 - accuracy: 0.8748
Epoch 9/100
10819/10819 - 0s - loss: 0.3844 - accuracy: 0.8774
Epoch 10/100
10819/10819 - 0s - loss: 0.3676 - accuracy: 0.8822
Epoch 11/100
10819/10819 - 0s - loss: 0.3407 - accuracy: 0.8896
Epoch 12/100
10819/10819 - 0s - loss: 0.3207 - accuracy: 0.8984
Epoch 13/100
10819/10819 - 0s - loss: 0.3062 - accuracy: 0.9054
Epoch 14/100
10819/10819 - 0s - loss: 0.2899 - accuracy: 0.9137
Epoch 15/100
10819/10819 - 0s - loss: 0.2773 - accuracy: 0.9191
Epoch 16/100
10819/10819 -

<tensorflow.python.keras.callbacks.History at 0x10ed97278>

## Quantify the trained model

In [23]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Learning - Loss: {model_loss}, Accuracy: {model_accuracy}")

3607/3607 - 0s - loss: 0.1875 - accuracy: 0.9312
Deep Learning - Loss: 0.18751426216184944, Accuracy: 0.9312447905540466


## Predict

In [19]:
# Predict classes (Types)
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [20]:
# Create and print dataframe with predicted and actual types
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,M,M
1,M,M
2,M,M
3,M,M
4,M,Z
5,M,M
6,M,M
7,M,M
8,M,M
9,M,M


## Save the model

In [24]:
# Save model to file
#filename = 'deeplearning_C_Si.sav'
#joblib.dump(model, filename)
model.save('deeplearning_C_Si', save_format='tf')

INFO:tensorflow:Assets written to: deeplearning_C_Si/assets


## Summary:

### Deep Learning carbon and silicon: 93.1%