In [1]:
import pandas as pd
import numpy as np

from tensorflow import keras
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

In [2]:
data = pd.read_csv('K:/Google Drive/DOUTORADO/Tese 2.0/Chapter I/KELLOGs/dataset.csv')

Elements = ['As', 'Ba', 'Cd', 'Co', 'Cr', 'Cu', 'Pb', 'Zn', 'Mo']

filter = pd.read_csv('filter.csv')

data = data[data['id.layer_uuid_c'].isin(filter['id.layer_uuid_c'])]

SEED = 255
np.random.seed(SEED)

subsoil = pd.read_csv('subsoil.csv', header=None)
topsoil = pd.read_csv('topsoil.csv', header=None)

topsoil_data = data.loc[data['id.layer_uuid_c'].isin(topsoil[0])]
subsoil_data = data.loc[data['id.layer_uuid_c'].isin(subsoil[0])]

data = subsoil_data

In [3]:
data_splits = {}  # Create a dictionary to store the splits

for element in Elements:
    Class = pd.DataFrame()
    Class['Class'] = pd.cut(data[element],
                           bins=[data[element].min(), data[element].quantile(0.75), data[element].max()],
                           labels=[0, 1],
                           right=False)

    Class = pd.concat([Class, data.iloc[:, 10:]], axis=1)
    x = Class.dropna().drop(columns=['Class'])
    y = Class['Class'].dropna()

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True, stratify=y)

    # Store the splits in the dictionary
    data_splits[f'{element}_x_train'] = x_train
    data_splits[f'{element}_x_test'] = x_test
    data_splits[f'{element}_y_train'] = y_train
    data_splits[f'{element}_y_test'] = y_test

In [4]:
def create_custom_cnn():
    model = keras.Sequential([
        layers.Input(shape=(1701, 1)),
        layers.SeparableConv1D(16, 5, strides=2, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.SeparableConv1D(16, 5, strides=2, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.SeparableConv1D(32, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.SeparableConv1D(32, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.SeparableConv1D(32, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.SeparableConv1D(64, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.SeparableConv1D(64, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.SeparableConv1D(128, 1, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(16, activation='relu'),
        layers.Dense(1, activation='sigmoid')  # Assuming binary classification
    ])

    return model

# Create an instance of the model
model = create_custom_cnn()

# Print the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 separable_conv1d (Separable  (None, 851, 16)          37        
 Conv1D)                                                         
                                                                 
 batch_normalization (BatchN  (None, 851, 16)          64        
 ormalization)                                                   
                                                                 
 separable_conv1d_1 (Separab  (None, 426, 16)          352       
 leConv1D)                                                       
                                                                 
 batch_normalization_1 (Batc  (None, 426, 16)          64        
 hNormalization)                                                 
                                                                 
 max_pooling1d (MaxPooling1D  (None, 213, 16)          0

In [5]:
cnn_results = []

for i in Elements:
    X_train = np.array(data_splits[f'{i}_x_train'])
    y_train = np.array(data_splits[f'{i}_y_train'])
    X_test = np.array(data_splits[f'{i}_x_test'])
    y_test = np.array(data_splits[f'{i}_y_test'])
    
    model = create_custom_cnn()
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['Accuracy','Recall'])
    
    batch_size = 5
    num_epochs = 200
    
    model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs, verbose=2)
    
    predictions = model.predict(X_test)
        
    y_pred = (predictions > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f'Accuracy for {i}: {accuracy:.4f}')
    print(f'Precision for {i}: {precision:.4f}')
    print(f'Recall for {i}: {recall:.4f}')
    print(f'F1 Score for {i}: {f1:.4f}')
    print('______________________________')
    
    result_dict = {
        'Element': i,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }
    cnn_results.append(result_dict)

cnn_results_df = pd.DataFrame(cnn_results)
cnn_results_df.to_csv('subsoil_cnn_results.csv', index=False)

Epoch 1/200
82/82 - 6s - loss: 0.4275 - Accuracy: 0.8122 - recall: 0.5098 - 6s/epoch - 77ms/step
Epoch 2/200
82/82 - 1s - loss: 0.2836 - Accuracy: 0.8780 - recall: 0.7451 - 565ms/epoch - 7ms/step
Epoch 3/200
82/82 - 1s - loss: 0.2768 - Accuracy: 0.8780 - recall: 0.7059 - 664ms/epoch - 8ms/step
Epoch 4/200
82/82 - 1s - loss: 0.2566 - Accuracy: 0.8976 - recall: 0.7549 - 648ms/epoch - 8ms/step
Epoch 5/200
82/82 - 1s - loss: 0.3069 - Accuracy: 0.8902 - recall: 0.6863 - 642ms/epoch - 8ms/step
Epoch 6/200
82/82 - 1s - loss: 0.2241 - Accuracy: 0.9024 - recall: 0.7941 - 652ms/epoch - 8ms/step
Epoch 7/200
82/82 - 1s - loss: 0.3055 - Accuracy: 0.8829 - recall: 0.6569 - 639ms/epoch - 8ms/step
Epoch 8/200
82/82 - 1s - loss: 0.2049 - Accuracy: 0.9220 - recall: 0.8725 - 651ms/epoch - 8ms/step
Epoch 9/200
82/82 - 1s - loss: 0.2543 - Accuracy: 0.8756 - recall: 0.7451 - 653ms/epoch - 8ms/step
Epoch 10/200
82/82 - 1s - loss: 0.2075 - Accuracy: 0.9195 - recall: 0.7843 - 646ms/epoch - 8ms/step
Epoch 11/20

In [6]:
# X_train = np.array(data_splits['As_x_train'])
# y_train = np.array(data_splits['As_y_train'])
# X_test = np.array(data_splits['As_x_test'])
# y_test = np.array(data_splits['As_y_test'])

# model = create_custom_cnn()

# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['Accuracy','Recall'])

# batch_size = 5
# num_epochs = 200

# model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs)

# predictions = model.predict(X_test)

In [7]:
# # Assuming you have true labels in y_test and predictions from your model
# y_pred = (predictions > 0.5).astype(int)  # Assuming binary classification and a threshold of 0.5

# # Accuracy
# accuracy = accuracy_score(y_test, y_pred)
# print(f'Accuracy: {accuracy:.4f}')

# # Confusion Matrix
# confusion = confusion_matrix(y_test, y_pred)
# print('Confusion Matrix:\n', confusion)

# # Precision
# precision = precision_score(y_test, y_pred)
# print(f'Precision: {precision:.4f}')

# # Recall
# recall = recall_score(y_test, y_pred)
# print(f'Recall: {recall:.4f}')

# # F1-score
# f1 = f1_score(y_test, y_pred)
# print(f'F1 Score: {f1:.4f}')