In [60]:
import pandas as pd

train_df = pd.read_csv('../data/train_fp.csv', index_col='Unnamed: 0')
print("Train DF shape: {}".format(train_df.shape))


Train DF shape: (514, 4097)


### Test FFNN

In [61]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
import random


In [62]:
X_train, X_test, y_train, y_test = train_test_split(train_df.iloc[:,:-1],
                                                    train_df.label.values,
                                                    test_size=0.2)


In [63]:
# Create dataset class
class BinaryMoleculeDataset(Dataset):
    
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X.astype(bool).astype(float))
        self.y = torch.FloatTensor(y).reshape(-1, 1)

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]


In [64]:

# Create FFNN for binary input
class BinaryFFNN(nn.Module):

    def __init__(self,
                 input_size,
                 hidden_sizes=[2048, 1024, 512, 256],
                 dropout_rate=0.2):
        super(BinaryFFNN, self).__init__()

        layers = []

        # Input layer
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(nn.ReLU())
        layers.append(nn.BatchNorm1d(hidden_sizes[0]))
        layers.append(nn.Dropout(dropout_rate))

        # Hidden layers
        for i in range(len(hidden_sizes)-1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(hidden_sizes[i+1]))
            layers.append(nn.Dropout(dropout_rate))
        
        # Output layer
        layers.append(nn.Linear(hidden_sizes[-1], 1))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)


In [65]:
# Create training function with early stopping and learning rate

def train_binary_model(model,
                       train_loader,
                       val_loader,
                       criterion,
                       optimizer,
                       num_epochs=100,
                       patience=20,
                       device='cuda'):
    
    model = model.to(device)
    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []

    # Add learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.5,
        patience=5
    )

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        
        for X_batch, y_batch in train_loader:
            
            X_batch, y_batch= X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            ouputs = model(X_batch)
            loss = criterion(ouputs, y_batch)
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                          max_norm=1)
            
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_losses.append(train_loss)
            
        # Validation phase
        model.eval()
        val_loss= 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        val_losses.append(val_loss)

        # Learning rate scheduling
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # torch.save(model.state_dict(),
            #            'best_binary_model.pt')
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
        #     print(f'Early stopping at epoch {epoch}')
            break

        # if epoch % 10 == 0:
        #     print(f'Epoch {epoch}: Train Loss = {train_loss:.4f}, Val loss = {val_loss:.4f}')

    return train_losses, val_losses

In [66]:
def main(X,
         y,
         hidden_sizes=[2048, 1024, 512],
         dropout_rate=0.2,
         patience=20,
         lr=0.001,
         test_size=0.2,
         batch_size=32):
    
    X = X.astype(bool).astype(float)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # Create dataloaders
    train_dataset = BinaryMoleculeDataset(X_train, y_train)
    test_dataset = BinaryMoleculeDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size)
    
    # Initialize model
    input_size = X_train.shape[1]
    model = BinaryFFNN(input_size,
                       hidden_sizes,
                       dropout_rate)

    # Initialize loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=1e-5)
    
    # Train model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_losses, val_losses = train_binary_model(model,
                                                  train_loader,
                                                  test_loader,
                                                  criterion,
                                                  optimizer,
                                                  patience=patience,
                                                  device=device)
    return model, train_losses, val_losses

In [67]:
def evaluate_model(model,
                   X_test,
                   y_test,
                   batch_size=32,
                   device='cuda',
                   print=False):
    # Evaluate model and print metrics
    model.eval()
    test_dataset = BinaryMoleculeDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    predictions = []
    actuals = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(y_batch.numpy())

    predictions = np.array(predictions)
    actuals = np.array(actuals)

    mse = np.mean((predictions - actuals) ** 2)
    rmse = root_mean_squared_error(actuals, predictions)
    mae = np.mean(np.abs(predictions - actuals))
    r2 = 1 - np.sum((actuals - predictions) ** 2) / np.sum((actuals- np.mean(actuals)) ** 2)

    if print:
        print("Test results:")
        print(f"MSE: {mse:.3f}")
        print(f"R²: {r2:.3f}")

    return rmse

In [68]:
#Set seeds
def set_seeds(seed=42):
    """Set all random seeds for reproducibility"""
    np.random.seed(seed)
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # for multi-GPU
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Create train_eval function for hyperparameter optimization
def train_evaluate(parametrization):
    set_seeds()
    fnn_model, _, _ = main(X_train.to_numpy(),
                           y_train,
                           **parametrization)
    return evaluate_model(fnn_model, X_test.to_numpy(), y_test)

In [69]:
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.service.utils.report_utils import exp_to_df
from ax.utils.notebook.plotting import init_notebook_plotting, render

In [70]:
ax_client = AxClient()

[INFO 11-01 18:22:36] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.


In [71]:
# hidden_sizes=[2048, 1024, 512],
# dropout_rate=0.2,
# patience = 20
# lr = 0.001

# Create an experiment with required arguments: name, parameters, and objective_name.
ax_client.create_experiment(
    name="ffnn_hyperparameter_search",  # The name of the experiment.
    parameters=[
        {
            "name": "lr",  # The name of the parameter.
            "type": "range",  # The type of the parameter ("range", "choice" or "fixed").
            "bounds": [1e-5, 1],  # The bounds for range parameters. 
            # "values" The possible values for choice parameters .
            # "value" The fixed value for fixed parameters.
            "value_type": "float",  # Optional, the value type ("int", "float", "bool" or "str"). Defaults to inference from type of "bounds".
            "log_scale": True,  # Optional, whether to use a log scale for range parameters. Defaults to False.
            # "is_ordered" Optional, a flag for choice parameters.
        },
        {
            "name": "patience",  
            "type": "range",  
            "bounds": [5, 20],
            "value_type": "int" 
        },
        {
            "name": "dropout_rate",
            "type": "range",
            "bounds": [1e-2, 0.5],
            "value_type": "float",
        },
    ],
    objectives={"rmse": ObjectiveProperties(minimize=True)},  # The objective name and minimization setting.
    # parameter_constraints: Optional, a list of strings of form "p1 >= p2" or "p1 + p2 <= some_bound".
    # outcome_constraints: Optional, a list of strings of form "constrained_metric <= some_bound".
    overwrite_existing_experiment=True,
)

[INFO 11-01 18:22:36] ax.service.utils.instantiation: Created search space: SearchSpace(parameters=[RangeParameter(name='lr', parameter_type=FLOAT, range=[1e-05, 1.0], log_scale=True), RangeParameter(name='patience', parameter_type=INT, range=[5, 20]), RangeParameter(name='dropout_rate', parameter_type=FLOAT, range=[0.01, 0.5])], parameter_constraints=[]).
[INFO 11-01 18:22:36] ax.modelbridge.dispatch_utils: Using Models.BOTORCH_MODULAR since there is at least one ordered parameter and there are no unordered categorical parameters.
[INFO 11-01 18:22:36] ax.modelbridge.dispatch_utils: Calculating the number of remaining initialization trials based on num_initialization_trials=None max_initialization_trials=None num_tunable_parameters=3 num_trials=None use_batch_trials=False
[INFO 11-01 18:22:36] ax.modelbridge.dispatch_utils: calculated num_initialization_trials=6
[INFO 11-01 18:22:36] ax.modelbridge.dispatch_utils: num_completed_initialization_trials=0 num_remaining_initialization_tria

In [72]:
ax_client.attach_trial(
    parameters={"lr":0.01,
                "patience": 10,
                "dropout_rate": 0.2,
    }
)

[INFO 11-01 18:22:36] ax.core.experiment: Attached custom parameterizations [{'lr': 0.01, 'patience': 10, 'dropout_rate': 0.2}] as trial 0.


({'lr': 0.01, 'patience': 10, 'dropout_rate': 0.2}, 0)

In [73]:
baseline_parameters = ax_client.get_trial_parameters(trial_index=0)
ax_client.complete_trial(trial_index=0, raw_data=train_evaluate(baseline_parameters))

[INFO 11-01 18:22:38] ax.service.ax_client: Completed trial 0 with data: {'rmse': (1.994148, None)}.


In [74]:
for i in range(25):
    parameters, trial_index = ax_client.get_next_trial()
    # Local evaluation here can be replaced with deployment to external system.
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters))


Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 18:22:38] ax.service.ax_client: Generated new trial 1 with parameters {'lr': 0.125509, 'patience': 17, 'dropout_rate': 0.294307} using model Sobol.


[INFO 11-01 18:22:41] ax.service.ax_client: Completed trial 1 with data: {'rmse': (2.192219, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 18:22:41] ax.service.ax_client: Generated new trial 2 with parameters {'lr': 0.002835, 'patience': 6, 'dropout_rate': 0.023438} using model Sobol.
[INFO 11-01 18:22:43] ax.service.ax_client: Completed trial 2 with data: {'rmse': (2.269725, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 18:22:43] ax.service.ax_client: Generated new trial 3 with parameters {'lr': 1.8e-05, 'patience': 16, 'dropout_rate': 0.469583} using model Sobol.
[INFO 11-01 18:22:46] ax.service.ax_client: Completed trial 3 with data: {'rmse': (4.541746, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 18:22:46] ax.service.ax_client: Generated new trial 4 with

In [75]:
ax_client.get_trials_data_frame()



Unnamed: 0,trial_index,arm_name,trial_status,generation_method,rmse,lr,patience,dropout_rate
0,0,0_0,COMPLETED,Manual,1.994148,0.01,10,0.2
1,1,1_0,COMPLETED,Sobol,2.192219,0.125509,17,0.294307
2,2,2_0,COMPLETED,Sobol,2.269725,0.002835,6,0.023438
3,3,3_0,COMPLETED,Sobol,4.541746,1.8e-05,16,0.469583
4,4,4_0,COMPLETED,Sobol,1.996731,0.01401,11,0.200625
5,5,5_0,COMPLETED,Sobol,2.022304,0.004602,14,0.119491
6,6,6_0,COMPLETED,Sobol,4.273731,5.1e-05,9,0.33677
7,7,7_0,COMPLETED,BoTorch,2.15148,0.017091,20,0.01
8,8,8_0,COMPLETED,BoTorch,2.085775,0.012174,20,0.5
9,9,9_0,COMPLETED,BoTorch,2.260286,0.007774,15,0.293744


In [76]:
best_parameters, values = ax_client.get_best_parameters()
best_parameters

{'lr': 1.0, 'patience': 20, 'dropout_rate': 0.07219747003552322}

In [77]:
mean, covariance = values
mean

{'rmse': 1.8268846256346438}

In [78]:
render(ax_client.get_feature_importances())

In [82]:
render(ax_client.get_contour_plot(param_x="patience", param_y="lr", metric_name="rmse"))

[INFO 11-01 18:26:23] ax.service.ax_client: Retrieving contour plot with parameter 'patience' on X-axis and 'lr' on Y-axis, for metric 'rmse'. Remaining parameters are affixed to the middle of their range.


In [80]:
render(
    ax_client.get_optimization_trace()
)  

In [81]:
ax_client.get_trials_data_frame().to_csv("../data/optimization_results/FFNN_optimization.csv")

