### Data preparation

In [None]:
from rdkit import Chem
mols = Chem.SDMolSupplier('../data_files/Final_Dataset.csv')
len(mols)

In [None]:
from FFiNet.data_pipeline.data_generating import DataGenerating

data_generate = DataGenerating(folder='../data_pipeline/data_files/',
                                raw_dataset_name='Final_Dataset.csv',
                                feature_dict_name='features_dict_Final_Dataset.npy',
                                dataset_name='Final_Dataset.pt')

In [None]:
data_generate.features_generating()

In [None]:
import pandas as pd
data_generate.features_dict_exist = True
target = pd.read_csv('../data_pipeline/data_files/Final_Dataset.csv').Density

data_generate.dataset_creating(target_name=target)

### Train the model

In [None]:
from FFiNet.train_evaluate.utils import TrainArgs
from FFiNet.train_evaluate.train_graph import evaluate
from FFiNet.models.FFiNet_model import FFiNetModel
import torch.nn as nn

train_args = TrainArgs(
                lr=0.001, 
                model_save_path='..\\train_evaluate\\saved_models\\',
                batch_size=64, 
                patience=100, 
                task='regression',
                num_tasks=1,
                normalize=False, 
                interval=10,
                task_name=['Final_Dataset'], 
                metrics='MAE',
                num_epochs = 10000,
                tolerance=0,  
                save=True,
                writer=False,
                logs=True,
                #split=[train_idx, valid_idx, test_idx], 
            )

params = {
    'hidden_dim': 16,
    'hidden_layers': 3,
    'num_heads': 8,
    'activation': nn.PReLU(), 
    'dropout': 0.3,
    'prediction_layers': 3,
    'prediction_dropout': 0.3,
    'prediction_hidden_dim': 256,
}

evaluate(3,
        data_path='../data_pipeline/data_files/Final_Dataset.pt', 
        model_class=FFiNetModel, 
        model_args=params, 
        train_args=train_args
        )


### Using Huang & Massa Dataset to test

In [None]:
# Before using Huang & Massa Dataset to test, the same data appear in both dataset should be removed from training
import torch
from torch_geometric.loader import DataLoader
from FFiNet.train_evaluate.utils import evaluate_score
from FFiNet.models.FFiNet_model import FFiNetModel

model1 = FFiNetModel(
    feature_per_layer=[65 if train_args.task_name[0] == 'pdbbind' else + 66] + [params['hidden_dim']] * params['hidden_layers'], 
    num_heads=params['num_heads'], 
    pred_hidden_dim=params['prediction_hidden_dim'], 
    pred_dropout=params['prediction_dropout'], 
    pred_layers=params['prediction_layers'], 
    activation=params['activation'], 
    dropout=params['dropout'],
    num_tasks=train_args.num_tasks
)
for num in range(3):
    parameter1_dict = torch.load(f'../train_evaluate/saved_models/FFiNetModel_({num}).pt')
    model1.load_state_dict(parameter1_dict)
    model1.eval()

    dataset = torch.load('../data_pipeline/data_files/Huang_&_Massa_density.pt')
    dataset_i = DataLoader(dataset, batch_size=128)

    for i, batch in enumerate(dataset_i):
        batch = batch.to(train_args.device)
        model1.to(train_args.device)
        y_hat = model1(batch)
        y_true = batch.y.reshape((-1, train_args.num_tasks))

    val_metric = evaluate_score(model1, dataset_i, train_args)
    print(val_metric)