## MODELAGEM COM REDES NEURAIS

### IMPORTAÇÃO, DIVISÃO E PRÉ-PROCESSAMENTO DOS DADOS

In [1]:
import pandas as pd
import numpy as np

path = r"C:\Users\Graziela Santana\Desktop\TCC\Solubilidade\TCC_Solubilidade.xlsx"
df = pd.read_excel(path, sheet_name = 'Dados')

df.head()

Unnamed: 0,Temp,Press,Mol,CO2
0,323.2,5.0,0.0,1.473
1,323.2,10.0,0.0,2.137
2,323.2,15.0,0.0,2.206
3,323.2,20.0,0.0,2.285
4,323.2,30.0,0.0,2.492


In [2]:
# criação de coluna para classificar as faixas operacionais de acordo com as ideais para projetos de ccs
def ccs(df):
    if df['Temp'] < 373 and 20 <= df['Press'] <= 40:
        return 0
    else:
        return 1

df['ccs'] = df.apply(ccs, axis=1)
df['ccs'].value_counts()

1    860
0    118
Name: ccs, dtype: int64

In [3]:
# divisão dos dados em treino e teste
from sklearn.model_selection import train_test_split

previsores = df.iloc[:,0:3];
alvo = df.iloc[:,3];
strat = df.iloc[:,4];

x_treino, x_teste, y_treino, y_teste = train_test_split(previsores, alvo, test_size = 0.2, random_state = 0, stratify = strat)

In [4]:
teste = x_teste.copy()
teste['CO2'] = y_teste

def ccs(teste):
    if teste['Temp'] < 373 and 20 <= teste['Press'] <= 40:
        return 0
    else:
        return 1

teste['ccs'] = teste.apply(ccs, axis=1)
teste['ccs'].value_counts()

1    172
0     24
Name: ccs, dtype: int64

In [5]:
ccs_dados = teste.query('ccs != 1')
ccs_dados['mp'] = ccs_dados['Mol']/ccs_dados['Press']
ccs_dados['mt'] = ccs_dados['Mol']/ccs_dados['Temp']
x_ccs = ccs_dados.drop(columns=['CO2','ccs'])
y_ccs = ccs_dados['CO2']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ccs_dados['mp'] = ccs_dados['Mol']/ccs_dados['Press']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ccs_dados['mt'] = ccs_dados['Mol']/ccs_dados['Temp']


In [6]:
x_treino['mp'] = x_treino['Mol']/x_treino['Press']
x_treino['mt'] = x_treino['Mol']/x_treino['Temp']
x_teste['mp'] = x_teste['Mol']/x_teste['Press']
x_teste['mt'] = x_teste['Mol']/x_teste['Temp']

In [7]:
x_treino = x_treino.values
x_teste = x_teste.values
y_teste = y_teste.values
y_treino = y_treino.values
x_ccs = x_ccs.values
y_ccs = y_ccs.values

In [8]:
# pré-processamento dos dados
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
x_treino = scaler.fit_transform(x_treino)
x_teste = scaler.transform(x_teste)
x_ccs = scaler.transform(x_ccs)

### MODELAGEM

In [None]:
import optuna
from tensorflow import keras
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

def model(trial):
    modelo = keras.Sequential()
    
    n_layers = trial.suggest_int('n_layers', 1, 8)
    for i in range(n_layers):
        num_units = trial.suggest_int(f'n_units_l{i}', 8, 512, log=True)
        activation = trial.suggest_categorical(f'activation_l{i}', ['relu', 'tanh', 'sigmoid'])
        modelo.add(keras.layers.Dense(num_units, activation=activation))
    
    modelo.add(keras.layers.Dense(1, activation='linear'))
    
    optimizer_name = trial.suggest_categorical('optimizer', ['adam', 'sgd', 'rmsprop'])
    lr = trial.suggest_float('learning_rate', 1e-5, 0.1, log=True)
    
    if optimizer_name == 'adam':
        optimizer = keras.optimizers.Adam(learning_rate=lr)
    elif optimizer_name == 'sgd':
        optimizer = keras.optimizers.SGD(learning_rate=lr)
    else:
        optimizer = keras.optimizers.RMSprop(learning_rate=lr)
    
    modelo.compile(optimizer=optimizer,
                  loss='mean_squared_error')
    
    return modelo

def objective(trial):
    rmses = []
    kf = KFold(n_splits=10, shuffle=True, random_state=10)
    
    for train_idx, val_idx in kf.split(x_treino):
        x_train, x_val = x_treino[train_idx], x_treino[val_idx]
        y_train, y_val = y_treino[train_idx], y_treino[val_idx]
        
        modelo = model(trial)
        modelo.fit(x_train, y_train, epochs=trial.suggest_int('epochs', 10, 100), batch_size=trial.suggest_int('batch_size', 16, 128), verbose=0)
        y_pred = modelo.predict(x_val)
        
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        rmses.append(rmse)
    
    avg_rmse = np.mean(rmses)
    
    return avg_rmse

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print('Valor mínimo médio do RMSE:', study.best_value)
print('Melhores hiperparâmetros:', study.best_params)

Valor mínimo médio do RMSE: 0.4540925735632981
Melhores hiperparâmetros: {'n_layers': 5, 'n_units_l0': 264, 'activation_l0': 'relu', 'n_units_l1': 116, 'activation_l1': 'relu', 'n_units_l2': 226, 'activation_l2': 'tanh', 'n_units_l3': 10, 'activation_l3': 'relu', 'n_units_l4': 121, 'activation_l4': 'tanh', 'optimizer': 'adam', 'learning_rate': 0.001360925706792783, 'epochs': 91, 'batch_size': 74}

In [9]:
study1 = {'n_layers': 5, 'n_units_l0': 264, 'activation_l0': 'relu', 'n_units_l1': 116, 'activation_l1': 'relu', 'n_units_l2': 226, 'activation_l2': 'tanh', 'n_units_l3': 10, 'activation_l3': 'relu', 'n_units_l4': 121, 'activation_l4': 'tanh', 'optimizer': 'adam', 'learning_rate': 0.001360925706792783, 'epochs': 91, 'batch_size': 74}

### AVALIAÇÃO DO MODELO

In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from tensorflow import keras

def build_model(params):
    modelo_ofci = keras.Sequential()
    
    for i in range(params['n_layers']):
        num_units = params[f'n_units_l{i}']
        activation = params[f'activation_l{i}']
        modelo_ofci.add(keras.layers.Dense(num_units, activation=activation))
    
    modelo_ofci.add(keras.layers.Dense(1, activation='linear'))
    
    if params['optimizer'] == 'adam':
        optimizer = keras.optimizers.Adam(learning_rate=params['learning_rate'])
    elif params['optimizer'] == 'sgd':
        optimizer = keras.optimizers.SGD(learning_rate=params['learning_rate'])
    else:
        optimizer = keras.optimizers.RMSprop(learning_rate=params['learning_rate'])
    
    modelo_ofci.compile(optimizer=optimizer,
                  loss='mean_squared_error')
    
    return modelo_ofci

best_params = study1
modelo_ofc = build_model(best_params)

modelo_ofc.fit(x_treino, y_treino, epochs=best_params['epochs'], batch_size=best_params['batch_size'])

test_predictions = modelo_ofc.predict(x_teste)
test_rmse = np.sqrt(mean_squared_error(y_teste, test_predictions))

print('MAE:', mean_absolute_error(y_teste, test_predictions))
print('RMSE:', test_rmse)
print('MAPE:', mean_absolute_percentage_error(y_teste, test_predictions))
    
#MAE: 0.2408618890845769
#RMSE: 0.40449226198808363
#MAPE: 633312641030.9989

Epoch 1/91
Epoch 2/91
Epoch 3/91
Epoch 4/91
Epoch 5/91
Epoch 6/91
Epoch 7/91
Epoch 8/91
Epoch 9/91
Epoch 10/91
Epoch 11/91
Epoch 12/91
Epoch 13/91
Epoch 14/91
Epoch 15/91
Epoch 16/91
Epoch 17/91
Epoch 18/91
Epoch 19/91
Epoch 20/91
Epoch 21/91
Epoch 22/91
Epoch 23/91
Epoch 24/91
Epoch 25/91
Epoch 26/91
Epoch 27/91
Epoch 28/91
Epoch 29/91
Epoch 30/91
Epoch 31/91
Epoch 32/91
Epoch 33/91
Epoch 34/91
Epoch 35/91
Epoch 36/91
Epoch 37/91
Epoch 38/91
Epoch 39/91
Epoch 40/91
Epoch 41/91
Epoch 42/91
Epoch 43/91
Epoch 44/91
Epoch 45/91
Epoch 46/91
Epoch 47/91
Epoch 48/91
Epoch 49/91
Epoch 50/91
Epoch 51/91
Epoch 52/91
Epoch 53/91
Epoch 54/91
Epoch 55/91
Epoch 56/91
Epoch 57/91
Epoch 58/91
Epoch 59/91
Epoch 60/91
Epoch 61/91
Epoch 62/91
Epoch 63/91
Epoch 64/91
Epoch 65/91
Epoch 66/91
Epoch 67/91
Epoch 68/91
Epoch 69/91
Epoch 70/91
Epoch 71/91
Epoch 72/91
Epoch 73/91
Epoch 74/91
Epoch 75/91
Epoch 76/91
Epoch 77/91
Epoch 78/91
Epoch 79/91
Epoch 80/91
Epoch 81/91
Epoch 82/91
Epoch 83/91
Epoch 84/91
E

In [11]:
previsoes_ccs = modelo_ofc.predict(x_ccs)

print(mean_absolute_error(y_ccs, previsoes_ccs))
print(np.sqrt(mean_squared_error(y_ccs, previsoes_ccs)))
print(mean_absolute_percentage_error(y_ccs, previsoes_ccs))

#0.18447341677973003
#0.36838611144576117
#10.10852254400089

0.18447341677973003
0.36838611144576117
10.10852254400089


In [None]:
ccs_dados = teste.query('ccs != 1')
x_ccs = ccs_dados.drop(columns=['CO2','ccs'])
y_ccs = ccs_dados['CO2']

previsoes_ccs = modelo_ofc.predict(x_ccs)

print(mean_absolute_error(y_ccs, previsoes_ccs))
print(np.sqrt(mean_squared_error(y_ccs, previsoes_ccs)))
print(mean_absolute_percentage_error(y_ccs, previsoes_ccs))

#1.4645316188795425
#1.6134244245417122
#72.43227522699698