In [None]:
X_train_nn = torch.from_numpy(X_train.to_numpy()).float()
y_train_nn = torch.squeeze(torch.from_numpy(y_train.to_numpy()).float())
X_val_nn = torch.from_numpy(X_val.to_numpy()).float()
y_val_nn = torch.squeeze(torch.from_numpy(y_val.to_numpy()).float())

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device
#device(type='cuda')

In [None]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

In [None]:
batch_size = 64
num_classes = 1
input_size = X.shape[1]
num_classes = 1
learning_rate = 0.01
hidden_size_1 = 200
hidden_size_2 = 400
dropout_rate = 0.1

In [None]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size_1, hidden_size_2, dropout_rate):
        super(Net, self).__init__()
        self.hiden_layer1 = nn.Linear(input_size, hidden_size_1)
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.hiden_layer2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.output = nn.Linear(hidden_size_2, 1)
        
    def forward(self, x):
        x = torch.relu(self.hiden_layer1(x))
        x = self.dropout1(x)
        x = torch.relu(self.hiden_layer2(x))
        x = self.dropout2(x)
        x = torch.sigmoid(self.output(x))
        return x

In [None]:
net = Net(input_size=input_size, hidden_size_1=hidden_size_1, hidden_size_2=hidden_size_2, dropout_rate=dropout_rate)
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X_train_nn, y_train_nn = X_train_nn.to(device), y_train_nn.to(device)
X_val_nn, y_val_nn = X_val_nn.to(device), y_val_nn.to(device)

net = net.to(device)
criterion = criterion.to(device)

In [None]:
def calculate_accuracy(y_true, y_pred):
    predicted = y_pred.ge(.5).view(-1)
    acc =  (y_true == predicted).sum().float() / len(y_true)
    
def calculate_f1(y_true, y_pred):
    #Class Probability
    predicted = y_pred.ge(.5).squeeze().long()
    y_true = y_true.long()
    #F1 Score Calculator
    f1 = f1_score(y_true, predicted, average='micro')

    return f1

def calculate_precision(y_true, y_pred):
    #Class Probability
    predicted = y_pred.ge(.5).squeeze().long()
    y_true = y_true.long()
    #F1 Score Calculator
    precision = precision_score(y_true, predicted)

    return precision 

def round_tensor(t, decimal_places=3):
    return round(t.item(), decimal_places)

In [None]:
# Sub-Paramters
early_stop_epochs = 5
patience = 0
best_val_loss = float('inf')
num_epoch = 500

predictions = []

for epoch in range(num_epoch):
    y_pred = net(X_train_nn)
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train_nn)
    if epoch % 10 == 0:
        train_score = calculate_f1(y_train_nn, y_pred)
        y_val_pred = net(X_val_nn)
        y_val_pred = torch.squeeze(y_val_pred)
        val_loss = criterion(y_val_pred, y_val_nn)
        val_score = calculate_f1(y_val_nn, y_val_pred)        
        print(f'epoch {epoch} - train loss: {round_tensor(train_loss)}, train f1 : {round_tensor(train_score)} val loss: {round_tensor(val_loss)}, val f1: {round_tensor(val_score)}')
        # Verify boost on validation set
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience = 0
        else:
            patience += 1
            if patience >= early_stop_epochs:
                print('Early Stopping')
                break
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    
    predictions.append({'epoch': epoch, 'y_pred': y_pred.detach().numpy(), 'y_val_pred': y_val_pred.detach().numpy()})

In [None]:
X_nn = torch.from_numpy(X.to_numpy()).float()
X_train_nn = torch.from_numpy(X_train.to_numpy()).float()
previsão = net(X_nn).detach().numpy()
previsão = previsão.round(5)
results['nn_prob'] = previsão
results['nn_pred'] = results['nn_prob'].apply(lambda x: 1 if x >= 0.5 else 0)
CM(data.is_canceled, results.nn_pred)

In [None]:
def objective_ann(trial):
    input_size = X.shape[1]
    learning_rate = trial.suggest_float('learning_rate', 1e-3, 1e-1, log=True)
    hidden_size_1 = trial.suggest_int('hidden_size_1', 100, 400, step=50)
    hidden_size_2 = trial.suggest_int('hidden_size_2', 100, 400, step=50)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.4, step=0.1)

    net = Net(input_size=input_size, hidden_size_1=hidden_size_1, hidden_size_2=hidden_size_2, dropout_rate=dropout_rate)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    early_stop_epochs = 30
    patience = 0
    best_val_loss = float('inf')
    num_epoch = 300

    for epoch in range(num_epoch):
        y_pred = net(X_train)
        y_pred = torch.squeeze(y_pred)
        train_loss = criterion(y_pred, y_train)
        if epoch % 10 == 0:
            train_score = calculate_f1(y_train, y_pred)
            y_val_pred = net(X_val)
            y_val_pred = torch.squeeze(y_val_pred)
            val_loss = criterion(y_val_pred, y_val)
            val_acc = calculate_f1(y_val, y_val_pred)
            # Verificar se houve melhora no conjunto de validação
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience = 0
            else:
                patience += 1
                if patience >= early_stop_epochs:
                    break
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

    return val_acc

study = optuna.create_study(direction='maximize')
study.optimize(objective_ann, n_trials=100)

trial = study.best_trial

In [None]:
# Print the result
nn_best_params = study.best_params
nn_best_score = study.best_value
print(f"Best score: {nn_best_score}\n")
print(f"Optimized parameters: {nn_best_params}\n")

In [None]:
params_dir = os.path.join(ROOT_DIR, "config", "params")
os.makedirs(params_dir, exist_ok=True) #O parâmetro exist_ok=True faz com que a função não retorne um erro caso a pasta já exista.
now = dt.now().strftime('%Y%m%d%H%M%S')

filename_xgb = f'xgb_best_params_{now}_v1.0.json'
params_path = os.path.join(params_dir, filename_xgb)


with open(params_path, 'w') as f:
    json.dump(xgb_best_params, f)

    
# for dirname, _, filenames in os.walk(f'{ROOT_DIR}\config\params'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))    

In [None]:
results[['is_canceled','lgbm_prob','lgbm_pred','xgb_prob','xgb_pred','nn_prob','nn_pred']].sample(10)

In [None]:
# import optuna
# import logging

# logger = logging.getLogger()

# logger.setLevel(logging.INFO)  # Setup the root logger.
# logger.addHandler(logging.FileHandler("foo.log", mode="w"))

# optuna.logging.enable_propagation()  # Propagate logs to the root logger.
# optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.

# study = optuna.create_study()

# logger.info("Start optimization.")
# study.optimize(objective, n_trials=10)

# with open("foo.log") as f:
#     assert f.readline().startswith("A new study created")
#     assert f.readline() == "Start optimization.\n"

In [None]:
# train = pd.concat([X,y], axis=1)
# train.loc[train.is_canceled ==1].tail()