<a href="https://colab.research.google.com/github/asengardeon/rede-neural-com-pytorch/blob/main/CRISTOPHER_RedesNeurais2021_06_25_17_59.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# APAGAR (links úteis)
# https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
# https://towardsdatascience.com/your-first-neural-network-in-pytorch-725631ae0fc
# https://medium.com/@nutanbhogendrasharma/build-neural-network-with-pytorch-52ee7074660

In [2]:
# Importando as bibliotecas
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Carregando o dataframe lithology
df_lithology = pd.read_csv('lithology.csv', sep = ';', decimal = '.')

In [4]:
# Visualizando as informações do dataframe lithology
df_lithology.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39117 entries, 0 to 39116
Data columns (total 31 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   DEPTH_MD                          39117 non-null  float64
 1   X_LOC                             39117 non-null  float64
 2   Y_LOC                             39117 non-null  float64
 3   Z_LOC                             39117 non-null  float64
 4   CALI                              39117 non-null  float64
 5   RSHA                              39117 non-null  float64
 6   RMED                              39117 non-null  float64
 7   RDEP                              39117 non-null  float64
 8   RHOB                              39117 non-null  float64
 9   GR                                39117 non-null  float64
 10  NPHI                              39117 non-null  float64
 11  PEF                               39117 non-null  float64
 12  DTC 

In [5]:
# Verificando a correlação das variáveis em relação a variável FORCE_2020_LITHOFACIES_LITHOLOGY
corr = df_lithology.corr(method = 'pearson')
force_2020_lithofacies_lithology_corr = corr[['FORCE_2020_LITHOFACIES_LITHOLOGY']]
force_2020_lithofacies_lithology_corr.FORCE_2020_LITHOFACIES_LITHOLOGY.abs().sort_values()

RSHA                                0.004514
Delta_DEPTH_MD                      0.004955
Delta_DTC                           0.005740
Delta_GR                            0.014424
DRHO                                0.015981
RHOB                                0.022883
Normalized_RHOB                     0.023248
Delta_RHOB                          0.024108
Delta_Carbon_Index                  0.024417
MUDWEIGHT                           0.025657
CALI                                0.031974
Y_LOC                               0.058606
X_LOC                               0.063437
Carbon_Index                        0.068956
DCAL                                0.075241
PEF                                 0.083927
Z_LOC                               0.126443
DEPTH_MD                            0.126468
Normalized_GR                       0.127538
RDEP                                0.128699
DTC                                 0.130576
ROP                                 0.132193
FORMATION_

In [6]:
# Convertendo a coluna FORCE_2020_LITHOFACIES_LITHOLOGY para índices
mapping = {
    30000: 0,
    65030: 1,
    65000: 2,
    80000: 3,
    74000: 4,
    70000: 5,
    70032: 6,
    88000: 7,
    86000: 8,
    99000: 9,
    90000: 10,
    93000: 11
}
df_lithology.FORCE_2020_LITHOFACIES_LITHOLOGY = df_lithology.FORCE_2020_LITHOFACIES_LITHOLOGY.apply(lambda x: mapping[x])

In [7]:
# Dividindo o dataframe lithology em Features (X) e Target (y)
X1 = df_lithology.drop(['FORCE_2020_LITHOFACIES_LITHOLOGY', 'GROUP_encoded', 'FORMATION_encoded'], axis = 1).values
y1 = df_lithology.FORCE_2020_LITHOFACIES_LITHOLOGY.values

X2 = df_lithology.drop(['FORCE_2020_LITHOFACIES_LITHOLOGY', 'GROUP_encoded', 'FORMATION_encoded'], axis = 1).values
y2 = df_lithology.FORCE_2020_LITHOFACIES_LITHOLOGY.values

X3 = df_lithology.drop(['FORCE_2020_LITHOFACIES_LITHOLOGY', 'GROUP_encoded', 'FORMATION_encoded'], axis = 1).values
y3 = df_lithology.FORCE_2020_LITHOFACIES_LITHOLOGY.values

In [8]:
# Dividindo as features e o target do dataframe lithology em treino e teste
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size = 0.1)
X1_train = torch.FloatTensor(X1_train)
X1_test = torch.FloatTensor(X1_test)
y1_train = torch.LongTensor(y1_train)
y1_test = torch.LongTensor(y1_test)

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size = 0.1)
X2_train = torch.FloatTensor(X2_train)
X2_test = torch.FloatTensor(X2_test)
y2_train = torch.LongTensor(y2_train)
y2_test = torch.LongTensor(y2_test)

X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y3, test_size = 0.1)
X3_train = torch.FloatTensor(X3_train)
X3_test = torch.FloatTensor(X3_test)
y3_train = torch.LongTensor(y3_train)
y3_test = torch.LongTensor(y3_test)

In [9]:
# Definindo os modelos
class FirstModel(nn.Module):
    # Definindo quantidade de camadas (7) e quantidade de neurônios
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(in_features = 28, out_features = 56)
        self.l2 = nn.Linear(in_features = 56, out_features = 84)
        self.l3 = nn.Linear(in_features = 84, out_features = 112)
        self.l4 = nn.Linear(in_features = 112, out_features = 84)
        self.l5 = nn.Linear(in_features = 84, out_features = 56)
        self.l6 = nn.Linear(in_features = 56, out_features = 28)
        self.output = nn.Linear(in_features = 28, out_features = 12)

    # Utilizando ReLU para a função de ativação
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        x = F.relu(self.l5(x))
        x = F.relu(self.l6(x))
        x = self.output(x)
        return x

class SecondModel(nn.Module):
    # Definindo quantidade de camadas (5) e quantidade de neurônios
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(in_features = 28, out_features = 42)
        self.l2 = nn.Linear(in_features = 42, out_features = 70)
        self.l3 = nn.Linear(in_features = 70, out_features = 42)
        self.l4 = nn.Linear(in_features = 42, out_features = 28)
        self.output = nn.Linear(in_features = 28, out_features = 12)

    # Utilizando SoftMax para a função de ativação
    def forward(self, x):
        x = F.softmax(self.l1(x))
        x = F.softmax(self.l2(x))
        x = F.softmax(self.l3(x))
        x = F.softmax(self.l4(x))
        x = self.output(x)
        return x

class ThirdModel(nn.Module):
    # Definindo quantidade de camadas (3) e quantidade de neurônios
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(in_features = 28, out_features = 35)
        self.l2 = nn.Linear(in_features = 35, out_features = 28)
        self.output = nn.Linear(in_features = 28, out_features = 12)

    # Utilizando Tanh para a função de ativação
    def forward(self, x):
        x = F.tanh(self.l1(x))
        x = F.tanh(self.l2(x))
        x = self.output(x)
        return x

In [10]:
# Inicializando o primeiro modelo
first_model = FirstModel()
first_model

FirstModel(
  (l1): Linear(in_features=28, out_features=56, bias=True)
  (l2): Linear(in_features=56, out_features=84, bias=True)
  (l3): Linear(in_features=84, out_features=112, bias=True)
  (l4): Linear(in_features=112, out_features=84, bias=True)
  (l5): Linear(in_features=84, out_features=56, bias=True)
  (l6): Linear(in_features=56, out_features=28, bias=True)
  (output): Linear(in_features=28, out_features=12, bias=True)
)

In [11]:
# Inicializando o segundo modelo
second_model = SecondModel()
second_model

SecondModel(
  (l1): Linear(in_features=28, out_features=42, bias=True)
  (l2): Linear(in_features=42, out_features=70, bias=True)
  (l3): Linear(in_features=70, out_features=42, bias=True)
  (l4): Linear(in_features=42, out_features=28, bias=True)
  (output): Linear(in_features=28, out_features=12, bias=True)
)

In [12]:
# Inicializando o terceiro modelo
third_model = ThirdModel()
third_model

ThirdModel(
  (l1): Linear(in_features=28, out_features=35, bias=True)
  (l2): Linear(in_features=35, out_features=28, bias=True)
  (output): Linear(in_features=28, out_features=12, bias=True)
)

In [13]:
# Definindo critério e otimização do primeiro modelo
first_criterion = nn.CrossEntropyLoss()
first_optimizer = torch.optim.Adam(first_model.parameters(), lr = 0.01)

In [14]:
# Definindo critério e otimização do segundo modelo
second_criterion = nn.CrossEntropyLoss()
second_optimizer = torch.optim.Adam(second_model.parameters(), lr = 0.01)

In [15]:
# Definindo critério e otimização do terceiro modelo
third_criterion = nn.CrossEntropyLoss()
third_optimizer = torch.optim.Adam(third_model.parameters(), lr = 0.01)

In [16]:
# Executando os modelos na base de treino
def print_current_iteration_data(model, epoch, loss):
    if epoch % 10 == 0:
        print(f'Model: {model} - Epoch: {epoch} - Loss: {loss}')

epochs = 100
first_model_losses = []
second_model_losses = []
third_model_losses = []

for epoch in range(epochs):
    # Primeiro modelo
    y_hat = first_model.forward(X1_train)
    loss = first_criterion(y_hat, y1_train)
    first_model_losses.append(loss)

    print_current_iteration_data('First model', epoch, loss)

    first_optimizer.zero_grad()
    loss.backward()
    first_optimizer.step()

    # Segundo modelo
    y_hat = second_model.forward(X2_train)
    loss = second_criterion(y_hat, y2_train)
    second_model_losses.append(loss)

    print_current_iteration_data('Second model', epoch, loss)

    second_optimizer.zero_grad()
    loss.backward()
    second_optimizer.step()

    # Terceiro modelo
    y_hat = third_model.forward(X3_train)
    loss = third_criterion(y_hat, y3_train)
    third_model_losses.append(loss)

    print_current_iteration_data('Third model', epoch, loss)

    third_optimizer.zero_grad()
    loss.backward()
    third_optimizer.step()

Model: First model - Epoch: 0 - Loss: 4738.6943359375
Model: Second model - Epoch: 0 - Loss: 2.5248208045959473
Model: Third model - Epoch: 0 - Loss: 2.3100221157073975
Model: First model - Epoch: 10 - Loss: 1403.642578125
Model: Second model - Epoch: 10 - Loss: 2.2659919261932373
Model: Third model - Epoch: 10 - Loss: 1.1658735275268555
Model: First model - Epoch: 20 - Loss: 83.95203399658203
Model: Second model - Epoch: 20 - Loss: 2.039792776107788
Model: Third model - Epoch: 20 - Loss: 1.1475228071212769
Model: First model - Epoch: 30 - Loss: 169.88507080078125
Model: Second model - Epoch: 30 - Loss: 1.8438239097595215
Model: Third model - Epoch: 30 - Loss: 1.134674072265625
Model: First model - Epoch: 40 - Loss: 5.862580299377441
Model: Second model - Epoch: 40 - Loss: 1.6741023063659668
Model: Third model - Epoch: 40 - Loss: 1.1346585750579834
Model: First model - Epoch: 50 - Loss: 2.2572851181030273
Model: Second model - Epoch: 50 - Loss: 1.535702109336853
Model: Third model - Ep

In [17]:
# Executando os modelos na base de teste
first_model_preds = []
second_model_preds = []
third_model_preds = []

with torch.no_grad():
    # Primeiro modelo
    for item in X1_test:
        y_hat = first_model.forward(item)
        first_model_preds.append(y_hat.argmax().item())

    # Segundo modelo
    for item in X2_test:
        y_hat = second_model.forward(item)
        second_model_preds.append(y_hat.argmax().item())

    # Terceiro modelo
    for item in X3_test:
        y_hat = third_model.forward(item)
        third_model_preds.append(y_hat.argmax().item())

In [18]:
# Criando dataframes
df_first_model_preds = pd.DataFrame({'Y': y1_test, 'YHat': first_model_preds})
df_first_model_preds['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df_first_model_preds['Y'], df_first_model_preds['YHat'])]

df_second_model_preds = pd.DataFrame({'Y': y2_test, 'YHat': second_model_preds})
df_second_model_preds['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df_second_model_preds['Y'], df_second_model_preds['YHat'])]

df_third_model_preds = pd.DataFrame({'Y': y3_test, 'YHat': third_model_preds})
df_third_model_preds['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df_third_model_preds['Y'], df_third_model_preds['YHat'])]

In [19]:
# Calculando acurácia
first_model_accuracy = df_first_model_preds.Correct.sum() / len(df_first_model_preds)
second_model_accuracy = df_second_model_preds.Correct.sum() / len(df_second_model_preds)
third_model_accuracy = df_third_model_preds.Correct.sum() / len(df_third_model_preds)

print(f'Model: First model - Accuracy: {first_model_accuracy}')
print(f'Model: Second model - Accuracy: {second_model_accuracy}')
print(f'Model: Third model - Accuracy: {third_model_accuracy}')

Model: First model - Accuracy: 0.6641104294478528
Model: Second model - Accuracy: 0.6789366053169734
Model: Third model - Accuracy: 0.6625766871165644


In [21]:
# Carregando o dataframe hidden
df_hidden = pd.read_csv('hidden.csv', sep = ';', decimal = '.')

In [23]:
# Visualizando as informações do dataframe hidden
df_hidden.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2642 entries, 0 to 2641
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   DEPTH_MD            2642 non-null   float64
 1   X_LOC               2642 non-null   float64
 2   Y_LOC               2642 non-null   float64
 3   Z_LOC               2642 non-null   float64
 4   CALI                2642 non-null   float64
 5   RSHA                2642 non-null   float64
 6   RMED                2642 non-null   float64
 7   RDEP                2642 non-null   float64
 8   RHOB                2642 non-null   float64
 9   GR                  2642 non-null   float64
 10  NPHI                2642 non-null   float64
 11  PEF                 2642 non-null   float64
 12  DTC                 2641 non-null   float64
 13  SP                  2641 non-null   float64
 14  BS                  2641 non-null   float64
 15  ROP                 2641 non-null   float64
 16  DCAL  

In [24]:
# Removendo as colunas GROUP_encoded e FORMATION_encoded do dataframe hidden
X = df_hidden.drop(['GROUP_encoded', 'FORMATION_encoded'], axis = 1).values

In [25]:
X = torch.FloatTensor(X)

In [26]:
# Executando os modelos no dataframe hidden
first_model_preds = []
second_model_preds = []
third_model_preds = []

with torch.no_grad():
    for item in X:
        # Primeiro modelo
        y_hat = first_model.forward(item)
        first_model_preds.append(y_hat.argmax().item())

        # Segundo modelo
        y_hat = second_model.forward(item)
        second_model_preds.append(y_hat.argmax().item())

        # Terceiro modelo
        y_hat = third_model.forward(item)
        third_model_preds.append(y_hat.argmax().item())

In [27]:
# Criando dataframes das predições
df_first_model_preds = pd.DataFrame({'lithology': first_model_preds})
df_second_model_preds = pd.DataFrame({'lithology': second_model_preds})
df_third_model_preds = pd.DataFrame({'lithology': third_model_preds})

In [28]:
# Convertendo os valores indexados da coluna lithology para FORCE_2020_LITHOFACIES_LITHOLOGY
mapping = {
    0: 30000,
    1: 65030,
    2: 65000,
    3: 80000,
    4: 74000,
    5: 70000,
    6: 70032,
    7: 88000,
    8: 86000,
    9: 99000,
    10: 90000,
    11: 93000
}
df_first_model_preds.lithology = df_first_model_preds.lithology.apply(lambda x: mapping[x])
df_second_model_preds.lithology = df_second_model_preds.lithology.apply(lambda x: mapping[x])
df_third_model_preds.lithology = df_third_model_preds.lithology.apply(lambda x: mapping[x])

In [29]:
# Exportando os resultados para CSV
df_first_model_preds.to_csv('cristopher_resultado_rede_neural_1.csv', index = False)
df_second_model_preds.to_csv('cristopher_resultado_rede_neural_2.csv', index = False)
df_third_model_preds.to_csv('cristopher_resultado_rede_neural_3.csv', index = False)