# Deep Learning for carbon and silicon isotopes

In [47]:
# Imports
# %matplotlib inline
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [48]:
# Set the seed value for the notebook so the results are reproducible
seed(1)

In [49]:
# Read the csv file into a pandas DataFrame
carbon_silicon = pd.read_csv('presolargrains_C_Si.csv')
carbon_silicon.head()

Unnamed: 0,Type,carbon_isotopes,silicon_isotopes
0,C,1.3,313.0
1,AB,1.42,-59.0
2,AB,1.854,8.0
3,AB,1.88,20.0
4,AB,1.91,9.0


## Data Preprocessing

In [50]:
# Drop Type from the X values, use it as our dependent variable y
X = carbon_silicon.drop("Type", axis=1)
y = carbon_silicon["Type"]
print(X.shape, y.shape)

(14426, 2) (14426,)


## Train Test Split

In [51]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## Scale the data using MinMaxScalar

In [52]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Label encode the data set

In [53]:
# Use LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [54]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)

In [55]:
y_train_categorical.shape

(10819, 8)

## Create a Deep Learning Model

In [56]:
# Create model and add layers
model = Sequential()

In [57]:
model.add(Dense(units=100, activation='relu', input_dim=40))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=8, activation='softmax'))

In [37]:
'''
number_inputs = 2
number_hidden_nodes = 3
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))
'''

In [38]:
'''
number_classes = 8
model.add(Dense(units=number_classes, activation='softmax'))
'''

## Compile and train the model

In [58]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [59]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 100)               4100      
_________________________________________________________________
dense_10 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_11 (Dense)             (None, 8)                 808       
Total params: 15,008
Trainable params: 15,008
Non-trainable params: 0
_________________________________________________________________


In [60]:
x_train_scaled.shape

NameError: name 'x_train_scaled' is not defined

In [61]:
# Train the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

ValueError: Error when checking input: expected dense_9_input to have shape (40,) but got array with shape (2,)

## Quantify the trained model

In [34]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Learning - Loss: {model_loss}, Accuracy: {model_accuracy}")

ValueError: Error when checking input: expected dense_2_input to have shape (1,) but got array with shape (2,)

## Predict

In [35]:
# Predict classes (Types)
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

ValueError: Error when checking input: expected dense_2_input to have shape (1,) but got array with shape (2,)

In [36]:
# Create and print dataframe with predicted and actual types
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).reset_index(drop=True)

ValueError: array length 3906 does not match index length 3607

## Save the model

In [18]:
# Save model to file
filename = 'deeplearning_C_Si.sav'
joblib.dump(model, filename)

TypeError: can't pickle _thread.RLock objects

## Summary:

### Deep Learning carbon and silicon: %