# Deep Learning carbon and nitrogen isotopes

In [1]:
# Imports
# %matplotlib inline
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [2]:
# Set the seed value for the notebook so the results are reproducible
seed(1)

In [3]:
# Read the csv file into a pandas DataFrame
carbon_nitrogen = pd.read_csv('presolargrains_C_N.csv')
carbon_nitrogen.head()

Unnamed: 0,Type,carbon_12_13,nitrogen_14_15
0,AB,1.57,59.0
1,AB,1.88,67.0
2,AB,1.94,78.0
3,AB,1.96,80.0
4,AB,2.15,118.0


## Data Preprocessing

In [4]:
# Drop Type from the X values, use it as our dependent variable y
X = carbon_nitrogen.drop("Type", axis=1)
y = carbon_nitrogen["Type"]
print(X.shape, y.shape)

(2189, 2) (2189,)


## Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## Scale the data using MinMaxScalar

In [6]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Label encode the data set

In [7]:
# Use LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [8]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

## Create a Deep Learning Model

In [9]:
# Create model and add layers
model = Sequential()

In [10]:
number_inputs = 2
number_hidden_nodes = 6
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

In [11]:
number_classes = 8
model.add(Dense(units=number_classes, activation='softmax'))

## Compile and train the model

In [12]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 3)                 6         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 32        
Total params: 38
Trainable params: 38
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Train the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=300,
    shuffle=True,
    verbose=2
)

Train on 1641 samples
Epoch 1/300
1641/1641 - 0s - loss: 2.0550 - accuracy: 0.4278
Epoch 2/300
1641/1641 - 0s - loss: 1.9556 - accuracy: 0.6868
Epoch 3/300
1641/1641 - 0s - loss: 1.8455 - accuracy: 0.6990
Epoch 4/300
1641/1641 - 0s - loss: 1.7190 - accuracy: 0.6990
Epoch 5/300
1641/1641 - 0s - loss: 1.5715 - accuracy: 0.6990
Epoch 6/300
1641/1641 - 0s - loss: 1.4089 - accuracy: 0.6990
Epoch 7/300
1641/1641 - 0s - loss: 1.2688 - accuracy: 0.6990
Epoch 8/300
1641/1641 - 0s - loss: 1.1700 - accuracy: 0.6990
Epoch 9/300
1641/1641 - 0s - loss: 1.1081 - accuracy: 0.6990
Epoch 10/300
1641/1641 - 0s - loss: 1.0721 - accuracy: 0.6990
Epoch 11/300
1641/1641 - 0s - loss: 1.0503 - accuracy: 0.6990
Epoch 12/300
1641/1641 - 0s - loss: 1.0361 - accuracy: 0.6990
Epoch 13/300
1641/1641 - 0s - loss: 1.0262 - accuracy: 0.6990
Epoch 14/300
1641/1641 - 0s - loss: 1.0185 - accuracy: 0.6990
Epoch 15/300
1641/1641 - 0s - loss: 1.0120 - accuracy: 0.6990
Epoch 16/300
1641/1641 - 0s - loss: 1.0067 - accuracy: 0.

Epoch 133/300
1641/1641 - 0s - loss: 0.7349 - accuracy: 0.7861
Epoch 134/300
1641/1641 - 0s - loss: 0.7341 - accuracy: 0.7861
Epoch 135/300
1641/1641 - 0s - loss: 0.7331 - accuracy: 0.7861
Epoch 136/300
1641/1641 - 0s - loss: 0.7320 - accuracy: 0.7892
Epoch 137/300
1641/1641 - 0s - loss: 0.7313 - accuracy: 0.7904
Epoch 138/300
1641/1641 - 0s - loss: 0.7304 - accuracy: 0.7855
Epoch 139/300
1641/1641 - 0s - loss: 0.7297 - accuracy: 0.7904
Epoch 140/300
1641/1641 - 0s - loss: 0.7287 - accuracy: 0.7885
Epoch 141/300
1641/1641 - 0s - loss: 0.7280 - accuracy: 0.7885
Epoch 142/300
1641/1641 - 0s - loss: 0.7271 - accuracy: 0.7885
Epoch 143/300
1641/1641 - 0s - loss: 0.7262 - accuracy: 0.7904
Epoch 144/300
1641/1641 - 0s - loss: 0.7253 - accuracy: 0.7904
Epoch 145/300
1641/1641 - 0s - loss: 0.7245 - accuracy: 0.7885
Epoch 146/300
1641/1641 - 0s - loss: 0.7237 - accuracy: 0.7916
Epoch 147/300
1641/1641 - 0s - loss: 0.7229 - accuracy: 0.7904
Epoch 148/300
1641/1641 - 0s - loss: 0.7225 - accuracy:

1641/1641 - 0s - loss: 0.6621 - accuracy: 0.8062
Epoch 264/300
1641/1641 - 0s - loss: 0.6618 - accuracy: 0.8050
Epoch 265/300
1641/1641 - 0s - loss: 0.6614 - accuracy: 0.8068
Epoch 266/300
1641/1641 - 0s - loss: 0.6608 - accuracy: 0.8062
Epoch 267/300
1641/1641 - 0s - loss: 0.6603 - accuracy: 0.8062
Epoch 268/300
1641/1641 - 0s - loss: 0.6598 - accuracy: 0.8062
Epoch 269/300
1641/1641 - 0s - loss: 0.6603 - accuracy: 0.8062
Epoch 270/300
1641/1641 - 0s - loss: 0.6595 - accuracy: 0.8062
Epoch 271/300
1641/1641 - 0s - loss: 0.6589 - accuracy: 0.8056
Epoch 272/300
1641/1641 - 0s - loss: 0.6581 - accuracy: 0.8062
Epoch 273/300
1641/1641 - 0s - loss: 0.6580 - accuracy: 0.8074
Epoch 274/300
1641/1641 - 0s - loss: 0.6573 - accuracy: 0.8062
Epoch 275/300
1641/1641 - 0s - loss: 0.6572 - accuracy: 0.8056
Epoch 276/300
1641/1641 - 0s - loss: 0.6564 - accuracy: 0.8062
Epoch 277/300
1641/1641 - 0s - loss: 0.6562 - accuracy: 0.8062
Epoch 278/300
1641/1641 - 0s - loss: 0.6556 - accuracy: 0.8056
Epoch 

<tensorflow.python.keras.callbacks.History at 0x1496d15c0>

## Quantify the trained model

In [14]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Learning - Loss: {model_loss}, Accuracy: {model_accuracy}")

548/548 - 0s - loss: 0.7427 - accuracy: 0.8011
Deep Learning - Loss: 0.7427292165965059, Accuracy: 0.8010948896408081


## Predict

In [15]:
# Predict classes (Types)
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [16]:
# Create and print dataframe with predicted and actual types
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,M,AB
1,M,Z
2,M,M
3,M,AB
4,M,M
5,M,M
6,M,AB
7,M,M
8,M,AB
9,M,Y


## Save the model

In [17]:
# Save model to file
filename = 'deeplearning_C_N.sav'
joblib.dump(model, filename)

TypeError: can't pickle _thread.RLock objects

## Summary:

### Neural Network/Deep Learning for carbon and nitrogen isotopes: 80.1%