# Analyze result
モデルの予測性能などを評価するノートブック

In [28]:
%load_ext autoreload
%autoreload 2

In [1]:
import torch
import os
import csv 
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from src.score_prediction_models.docking_score_predictor import DockingScorePredictor
from src.data.dataloader import get_dataloader
from tqdm.notebook import tqdm

In [2]:
with open('../config/filepath.yml') as file:
    path_config = yaml.safe_load(file)

with open('../config/model.yml') as file:
    model_config = yaml.safe_load(file)

with open('../config/data.yml') as file:
    data_config = yaml.safe_load(file)

dude_dir = os.path.join('..', path_config['data']['DUD-E'])
alphafold_dir = os.path.join('..', path_config['data']['alphafold'])
smiles_dir = os.path.join('..', path_config['data']['smiles'])
output_dir = os.path.join('..', path_config['data']['plots'])
hist_dir = os.path.join('..', path_config['data']['hist'])
preprocessed_dir = os.path.join('..', path_config['data']['preprocessed'])
sample_dir = os.path.join('..', path_config['data']['samples'])
model_dir = os.path.join('..', path_config['data']['docking'])



In [3]:
ds_model_config = model_config['docking_score_regression_model']

model = DockingScorePredictor(
    embed_dim=ds_model_config['embed_dim'],
    num_heads=ds_model_config['num_heads'],
    ffn_hidden_dim=ds_model_config['ffn_hidden_dim'],
    num_transformer_blocks=ds_model_config['num_transformer_blocks'],
).cuda()

model_file = os.path.join(model_dir, 'docking_score_regression_model_2024-12-23_14-28-07', 'model_2024-12-23_14-28-07.pth')

model.load_state_dict(torch.load(model_file))
model.eval()

test_file = os.path.join('..', path_config['data']['test'], 'test.csv')

test_dataloader = get_dataloader(
    csv_file=test_file,
    smiles_max_len=data_config['dataset']['smiles_max_len'],
    protein_max_len=data_config['dataset']['protein_max_len'],
    batch_size=1,
    shuffle=False,
)


  model.load_state_dict(torch.load(model_file))


In [4]:
with torch.no_grad():
    y_true = []
    y_pred = []
    train_pbar = tqdm(test_dataloader, desc='Testing')
    for i, batch in enumerate(train_pbar):
        smiles_emb = batch['smiles_embedding'].cuda()
        af2_emb = batch['protein_embedding'].cuda()
        docking_score = batch['docking_score'].cuda()
        output = model(smiles_emb, af2_emb)
        y_true.append(docking_score.cpu().numpy())
        y_pred.append(output.cpu().numpy())

y_true = np.array(y_true).flatten()
y_pred = np.array(y_pred).flatten()

plt.figure(figsize=(10, 10))
plt.scatter(y_true, y_pred, alpha=0.7)
plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], color='red', linestyle='--')
plt.xlabel('Actual Docking Score')
plt.ylabel('Predicted Docking Score')
plt.title('Docking Score Prediction')
plt.grid()
plt.show()

mse = mean_squared_error(y_true, y_pred)
print(f'Mean Squared Error: {mse}')

Testing:   0%|          | 0/32092 [00:00<?, ?it/s]

FileNotFoundError: Embedding file not found: data/alphafold/P15121/structure.npy