<a href="https://colab.research.google.com/github/james-yu2005/Affi-NN-ity/blob/main/Copy_of_Protein_Encoded_Base_Model_Affi_NN_ity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
 !pip install PyTDC rdkit-pypi torch-geometric pandas tqdm fair-esm
 !pip install --pre deepchem
 !pip install biopython
# PyTDC: Loading clean DTI datasets (like DAVIS)
# rdkit-pypi: Parsing drug SMILES and converting to molecule graphs
# torch-geometric: Building GNN architecture for drug inputs
# tqdm: Shows real-time progress for preprocessing steps like SMILES parsing)

Collecting fair-esm
  Downloading fair_esm-2.0.0-py3-none-any.whl.metadata (37 kB)
Downloading fair_esm-2.0.0-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.1/93.1 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fair-esm
Successfully installed fair-esm-2.0.0


In [46]:
import torch
import torch_geometric
from torch_geometric.data import Dataset, Data
from torch.nn import MultiheadAttention
import numpy as np
import os
from tqdm import tqdm
from rdkit import Chem
from rdkit.Chem import Draw
from tdc.multi_pred import DTI
import pandas as pd

In [30]:
# Cell B: Load pretrained ESM-2 model
# we're using the 650M‐parameter variant; adjust repr_layers if you pick a different size
esm_model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
batch_converter = alphabet.get_batch_converter()
esm_model = esm_model.eval().cuda()   # move to GPU if available


In [36]:
import deepchem as dc                       # DeepChem for dataset loading
from deepchem.feat import RawFeaturizer     # returns raw file paths (ligand, protein)
from Bio.PDB import PDBParser, PDBList      # parse & download PDB files
from Bio.PDB.Polypeptide import PPBuilder   # build polypeptide sequences
import torch
import itertools                           # to slice the dataset iterator

# 1. Load the “general” PDBBind set (raw file paths) — no heavy featurization yet
tasks, (train, valid, test), transformers = dc.molnet.load_pdbbind(
    featurizer=RawFeaturizer(),  # get raw paths instead of precomputed features
    set_name="general",          # full ~12 800-complex collection
    pocket=False,                # download full PDB files, not just pocket fragments
    splitter="random",
    transformers=["normalization"],
    reload=False                 # use cached data if already downloaded
)
print(f"General set sizes → train: {len(train)}, valid: {len(valid)}, test: {len(test)}")

# 2. Prepare PDB tools
pdb_list = PDBList()            # downloader (will skip existing files)
parser   = PDBParser(QUIET=True)
ppb      = PPBuilder()          # for extracting one‑letter sequences

# 3. Only process the first N complexes (here N=100) to save time
N = 100
sample_iter = itertools.islice(train.itersamples(), N)

# 4. Loop over that small slice and embed proteins
protein_per_res = {}   # maps pdb_id -> per-residue tensor
protein_global  = {}   # maps pdb_id -> pooled tensor

for X, y, w, pdb_id in sample_iter:
    _, protein_file = X  # raw file path for the full PDB

    # 4.1 Download (if missing) & parse the PDB
    pdb_path  = pdb_list.retrieve_pdb_file(
        pdb_id.lower(), pdir="pdb_files/full", file_format="pdb"
    )
    structure = parser.get_structure(pdb_id, pdb_path)

    # 4.2 Extract the complete amino-acid sequence
    seq = ""
    for pp in ppb.build_peptides(structure):
        seq += str(pp.get_sequence())
    if not seq:
        continue  # skip if no standard residues found

    # 4.3 Tokenize & run through ESM-2
    labels, strs, toks = batch_converter([(pdb_id, seq)])
    toks = toks.cuda()
    with torch.no_grad():
        out = esm_model(toks, repr_layers=[33], return_contacts=False)

    # 4.4 Pull out embeddings
    emb_res  = out["representations"][33][0]      # shape (L,1280)
    emb_glob = emb_res.mean(0, keepdim=True)      # shape (1,1280)

    # 4.5 Store CPU‑resident embeddings
    protein_per_res[pdb_id] = emb_res.cpu()
    protein_global[pdb_id]  = emb_glob.cpu()

print(f"Embedded {len(protein_per_res)} proteins (out of {N} sampled)")

General set sizes → train: 14143, valid: 1768, test: 1768
Downloading PDB structure '4qw5'...
Downloading PDB structure '5alk'...
Downloading PDB structure '6ew6'...
Downloading PDB structure '5lwd'...
Downloading PDB structure '3zya'...
Downloading PDB structure '2qf6'...
Downloading PDB structure '4r75'...
Downloading PDB structure '4dfn'...
Downloading PDB structure '3spf'...
Downloading PDB structure '3uyr'...
Downloading PDB structure '5zeq'...
Downloading PDB structure '2itk'...
Downloading PDB structure '4qjw'...
Downloading PDB structure '6fjm'...
Downloading PDB structure '5f61'...
Downloading PDB structure '5u4f'...
Downloading PDB structure '2f70'...
Downloading PDB structure '3uo5'...
Downloading PDB structure '2avi'...
Downloading PDB structure '3i6c'...
Downloading PDB structure '1h35'...
Downloading PDB structure '3eql'...
Downloading PDB structure '3rup'...
Downloading PDB structure '2qbr'...
Downloading PDB structure '3gxt'...
Downloading PDB structure '3g8o'...
Downlo

In [43]:
# Pick one PDB ID
pdb0 = next(iter(protein_global))
emb = protein_global[pdb0].numpy().squeeze()  # (1280,)

# Compute stats
stats = {
    "mean": emb.mean(),
    "std": emb.std(),
    "min": emb.min(),
    "max": emb.max()
}
print(f"Global embedding stats for {pdb0}:", stats)


Global embedding stats for 4qw5: {'mean': -0.001827392, 'std': 0.27268922, 'min': -7.506588, 'max': 1.2055587}


In [44]:
class MoleculeDataset(Dataset):
    def __init__(self, root, dataframe, split='train', test_fraction=0.2, val_fraction=0.1,
                 transform=None, pre_transform=None, random_state=42):
        self.dataframe = dataframe.reset_index()

        # Following defines how the data will be split for train, test and val
        self.split = split
        self.test_fraction = test_fraction
        self.val_fraction = val_fraction
        self.random_state = random_state

        # Initialize empty list for processed data. This will later hold the fully processed dataset
        # Each item is a PyG Data object representing a drug-target pair
        self.molecule_data = []

        # Initliaze protein encoder stuff

        # This method (defined next) actually splits the dataframe for train, test, val
        self._split_data()

        super(MoleculeDataset, self).__init__(root, transform, pre_transform)


    # Split the dataframe into train, validation and test sets
    def _split_data(self):
        from sklearn.model_selection import train_test_split

        # First split off the test set
        train_val_df, test_df = train_test_split(
            self.dataframe,
            test_size=self.test_fraction,
            random_state=self.random_state
        )

        # Then split the train set into train and validation
        if self.val_fraction > 0:
            train_df, val_df = train_test_split(
                train_val_df,
                test_size=self.val_fraction / (1 - self.test_fraction),
                random_state=self.random_state
            )
        else:
            train_df = train_val_df
            val_df = train_val_df.iloc[0:0]  # Empty DataFrame with same columns

        # Assign the appropriate dataframe based on the split parameter
        if self.split == 'train':
            self.dataframe = train_df
        elif self.split == 'val':
            self.dataframe = val_df
        elif self.split == 'test':
            self.dataframe = test_df
        else:
            raise ValueError(f"Split '{self.split}' not recognized. Use 'train', 'val', or 'test'.")


    # Process molecules from SMILES into graph format and convert protein sequences to one-hot encoding
    def process(self):
        for index, row in tqdm(self.dataframe.iterrows(), total=self.dataframe.shape[0]): # Going row-by-row through the dataset, showing a progress bar with tqdm
            # Ensure column names are correct
            smiles = row["Drug"]  # If "Drug", it contains SMILES strings

            # Using RDKit to convert the SMILES string into a molecule object. If it fails (invalid SMILES), skip it.
            mol_obj = Chem.MolFromSmiles(smiles)
            if mol_obj is None:
                continue

            # Creates of tensor where each row represents a single atom in the molecule and its columns represents its features (like hybridization, aromatic ring, etc.)
            node_feats = self._get_node_features(mol_obj)

            # Goes through each bond in the molecule and extracts whether its single or not and whether its part of a ring or not.
            # Each bond is stored twice (once for each direction) so the tensor shape is [num_edges * 2, 2]
            edge_feats = self._get_edge_features(mol_obj)

            # Tensor shape: [2, num_edges]
            # First row is the source node (from where the bond originates). Second row is the destination node
            # NOT an adjacency matrix. Each column is an edge the first row number is the start and second row number is the end
            edge_index = self._get_adjacency_info(mol_obj)

            # Used integer based token indexing for proteins

            # Data is a class that represents a single drug molecule graph
            data = Data(
                x=node_feats,
                edge_index=edge_index,
                edge_attr=edge_feats,
                y=torch.tensor([row["Y"]], dtype=torch.float)
            )

            # Attach the one-hot-encoded proteins to their respective drug graphs
            # assume protein_global and protein_per_res are precomputed dicts
            data.prot_global   = protein_global[pdb_id]    # shape [1,1280]
            data.prot_per_res  = protein_per_res[pdb_id]   # shape [L,1280]

            # Store in list instead of saving to disk
            self.molecule_data.append(data)

    # ISSUE 07.05.25
    # If we return [20000], and batch 32 of them together, PyG tries to stack them as torch.cat([ [20,000], [20,000], ..., [20,000] ]) giving [640,000]
    # Instead of getting: [32, 20000] which is what the Linear layer expects
    # Unsqueeze adds a new first dimension converting [20,000] to [1, 20,000]
    # NOTE: [1, 20000] doesn't mean there's a physical "1" in the beginning of each vector.
    # The 1 is a new dimension that converts each flattened tensor into a row so that when 32 (batch size) such tensors are stacked it becomes a 2D matrix.
    # Without the 1, if 32 tensors were stacked it would become a really long 1D matrix giving [640,000] instead of [32, 64,000].


    # The following 3 functions have been (kind of) explained in the process function. Basically extracts node & edge features.
    def _get_node_features(self, mol):
        all_node_feats = []
        for atom in mol.GetAtoms():
            node_feats = [
                atom.GetAtomicNum(),
                atom.GetDegree(),
                atom.GetFormalCharge(),
                atom.GetHybridization(),
                atom.GetIsAromatic(),
                atom.GetTotalNumHs(),
                atom.GetNumRadicalElectrons(),
                atom.IsInRing(),
                atom.GetChiralTag()
            ]
            all_node_feats.append(node_feats)
        return torch.tensor(np.array(all_node_feats), dtype=torch.float)


    def _get_edge_features(self, mol):
        all_edge_feats = []
        for bond in mol.GetBonds():
            edge_feats = [
                bond.GetBondTypeAsDouble(),
                bond.IsInRing()
            ]
            all_edge_feats += [edge_feats, edge_feats]  # Bidirectional edges
        return torch.tensor(np.array(all_edge_feats), dtype=torch.float)


    def _get_adjacency_info(self, mol):
        edge_indices = []
        for bond in mol.GetBonds():
            i, j = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
            edge_indices += [[i, j], [j, i]]  # Bidirectional edges
        return torch.tensor(edge_indices, dtype=torch.long).t().contiguous()


    def len(self):
        return len(self.dataframe)


    # Returns the processed molecule at index idx
    # Each item in self.molecule_data is a PyG Data object representing one drug–protein pair.
    # This is the empty list we initialized earlier
    def get(self, idx):
        return self.molecule_data[idx]


    # Return processed file names. Since we're storing in memory, we'll return an empty list or a dummy file name. """
    def processed_file_names(self):
        return []

In [45]:
# Create train, validation, and test datasets. The process() method converts each row into a PyTorch Geometric Data object
train_dataset = MoleculeDataset(root='.', dataframe=df_DAVIS, split='train')
train_dataset.process()

val_dataset = MoleculeDataset(root='.', dataframe=df_DAVIS, split='val')
val_dataset.process()

test_dataset = MoleculeDataset(root='.', dataframe=df_DAVIS, split='test')
test_dataset.process()

# Create DataLoaders
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

100%|██████████| 18039/18039 [00:23<00:00, 770.85it/s]
100%|██████████| 18039/18039 [00:23<00:00, 769.95it/s]
100%|██████████| 2578/2578 [00:03<00:00, 784.84it/s]
100%|██████████| 2578/2578 [00:03<00:00, 820.81it/s]
100%|██████████| 5155/5155 [00:06<00:00, 745.58it/s]
100%|██████████| 5155/5155 [00:06<00:00, 817.51it/s]


In [48]:
import torch
import torch.nn.functional as F
from torch.nn import Linear, ReLU, Sequential, Embedding
from torch_geometric.nn import GINConv, global_add_pool

class GINDrugTargetModel(torch.nn.Module):
    def __init__(self, node_feat_dim=9, embedding_dim=64, hidden_dim=128, output_dim=1):
        super(GINDrugTargetModel, self).__init__()

        # Node feature embedding
        self.node_embedding = Sequential(
            Linear(node_feat_dim, hidden_dim),
            ReLU(),
            Linear(hidden_dim, hidden_dim)
        )

        # GIN convolution layers
        nn1 = Sequential(Linear(hidden_dim, hidden_dim), ReLU(), Linear(hidden_dim, hidden_dim))
        self.conv1 = GINConv(nn1)
        self.bn1 = torch.nn.BatchNorm1d(hidden_dim)

        nn2 = Sequential(Linear(hidden_dim, hidden_dim), ReLU(), Linear(hidden_dim, hidden_dim))
        self.conv2 = GINConv(nn2)
        self.bn2 = torch.nn.BatchNorm1d(hidden_dim)

        # Final predictor combining molecule and protein embeddings
        self.predictor = Sequential(
            Linear(2 * hidden_dim, hidden_dim),
            ReLU(),
            torch.nn.Dropout(0.2),
            Linear(hidden_dim, hidden_dim // 2),
            ReLU(),
            Linear(hidden_dim // 2, output_dim)
        )
        # in __init__
        self.prot_proj = torch.nn.Sequential(
          torch.nn.Linear(1280, hidden_dim),
          torch.nn.ReLU(),
          torch.nn.Linear(hidden_dim, hidden_dim)
        )
        self.cross_attn = MultiheadAttention(embed_dim=hidden_dim, num_heads=8)



    def forward(self, batch):
      # — ligand GNN as before —
      x = self.node_embedding(batch.x)
      x = F.relu(self.conv1(x, batch.edge_index))
      x = self.bn1(x)
      x = F.relu(self.conv2(x, batch.edge_index))
      x = self.bn2(x)
      lig_vec = global_add_pool(x, batch.batch)            # [B, hidden_dim]

      # — new protein path —
      prot_glob = batch.prot_global.to(lig_vec.device)     # [B, 1280]
      prot_feat = self.prot_proj(prot_glob)                # [B, hidden_dim]

      # — optional cross‑attention on per‑residue embeddings —
      # res_emb = batch.prot_per_res.transpose(0,1).to(x.device) # [1280, B, L]
      # attn_out, _ = self.cross_attn(res_emb, x.unsqueeze(1), x.unsqueeze(1))
      # prot_feat = attn_out.mean(0)                         # [B, hidden_dim]

      # — fuse & predict —
      combined = torch.cat([lig_vec, prot_feat], dim=1)    # [B, 2*hidden_dim]
      return self.predictor(combined)



In [49]:
import torch
import torch.nn.functional as F

def train_model(model, train_loader, val_loader, num_epochs=100, lr=0.001, device='cuda'):
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        total_train_loss = 0

        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()

            preds = model(batch).squeeze()  # Model predicts pKd scores
            targets = batch.y.squeeze()     # True pKd values from the data

            loss = F.mse_loss(preds, targets) # Compute mean squared error loss
            loss.backward() # Backpropagate the error
            optimizer.step() # Update weights

            total_train_loss += loss.item() * batch.num_graphs # batch.num_graphs is the number of samples in the batch (32)

        avg_train_loss = total_train_loss / len(train_loader.dataset) # This gives average MSE loss over all training samples.

        # Validation Phase
        model.eval()
        total_val_loss = 0

        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)

                preds = model(batch).squeeze()
                targets = batch.y.squeeze()

                loss = F.mse_loss(preds, targets)
                total_val_loss += loss.item() * batch.num_graphs

        avg_val_loss = total_val_loss / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

    return model

In [50]:
for batch in train_loader:
    print(f"Node features shape: {batch.x.shape}")          # 1007 (1st number) = total number of nodes (atoms) across all 32 graphs in the batch
                                                            # 9 (2nd number) = number of features per node (like atomic number, valency, etc.)
    print(f"Edge features shape: {batch.edge_attr.shape}")  # 2208 (1st number) = total number of edges (bonds) across the 32 graphs
                                                            # 2 (2nd number) = number of features per edge (e.g., bond type, in ring)
    print(f"Target features shape: {batch.y.shape}")
    print(f"Target features type: {type(batch.y)}")
    print(f"Target features element: {batch.y[0]}")         # Prints pKd of the first sample in the training batch (actual not predicted)
    break

Node features shape: torch.Size([1042, 9])
Edge features shape: torch.Size([2298, 2])
Target features shape: torch.Size([32])
Target features type: <class 'torch.Tensor'>
Target features element: 4.999995708465576


In [51]:
def test_model(model, test_loader, device='cuda'):
    model.eval()
    preds = []
    trues = []

    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            pred = model(batch)
            preds.append(pred.view(-1).cpu())
            trues.append(batch.y.view(-1).cpu())

    preds = torch.cat(preds, dim=0)
    trues = torch.cat(trues, dim=0)

    mse = F.mse_loss(preds, trues)
    print(f'Test MSE: {mse.item():.4f}')

    # Print few predictions vs actual value
    print("\nSample Predictions vs True Binding Affinities:")
    for i in range(min(100, len(preds))):  # Show 10 samples (or fewer if smaller dataset)
        print(f"True: {trues[i].item():.4f}, Predicted: {preds[i].item():.4f}")

    return preds, trues, mse.item()

In [52]:
from torch.nn import Sequential, Linear, ReLU
from torch_geometric.nn import GINConv, global_add_pool
model = GINDrugTargetModel(node_feat_dim=9)

# Train the model
trained_model = train_model(model, train_loader, val_loader, num_epochs=40)

Epoch 1/40 | Train Loss: 1.0953 | Val Loss: 0.7067
Epoch 2/40 | Train Loss: 0.7256 | Val Loss: 0.7191
Epoch 3/40 | Train Loss: 0.7079 | Val Loss: 0.8051
Epoch 4/40 | Train Loss: 0.6679 | Val Loss: 0.9676
Epoch 5/40 | Train Loss: 0.6485 | Val Loss: 19.4012
Epoch 6/40 | Train Loss: 0.6445 | Val Loss: 1.1540
Epoch 7/40 | Train Loss: 0.6139 | Val Loss: 1.0132
Epoch 8/40 | Train Loss: 0.6086 | Val Loss: 1.3872
Epoch 9/40 | Train Loss: 0.5981 | Val Loss: 1.5396
Epoch 10/40 | Train Loss: 0.6035 | Val Loss: 1.3176
Epoch 11/40 | Train Loss: 0.5829 | Val Loss: 0.7803
Epoch 12/40 | Train Loss: 0.5883 | Val Loss: 0.8084
Epoch 13/40 | Train Loss: 0.5978 | Val Loss: 1.1623
Epoch 14/40 | Train Loss: 0.5902 | Val Loss: 0.8522
Epoch 15/40 | Train Loss: 0.5902 | Val Loss: 0.6643
Epoch 16/40 | Train Loss: 0.5854 | Val Loss: 1.5822
Epoch 17/40 | Train Loss: 0.5787 | Val Loss: 2.4989
Epoch 18/40 | Train Loss: 0.5794 | Val Loss: 1.3623
Epoch 19/40 | Train Loss: 0.5743 | Val Loss: 1.4599
Epoch 20/40 | Train 

In [53]:
# Test the model
test_preds, test_targets, test_mse = test_model(trained_model, test_loader)

Test MSE: 0.7702

Sample Predictions vs True Binding Affinities:
True: 5.0000, Predicted: 5.0372
True: 5.0000, Predicted: 5.0787
True: 5.0000, Predicted: 4.9831
True: 5.0000, Predicted: 4.4874
True: 5.0000, Predicted: 4.9491
True: 5.0000, Predicted: 4.6886
True: 5.0000, Predicted: 4.7769
True: 5.6989, Predicted: 5.0578
True: 5.8538, Predicted: 5.1192
True: 5.0000, Predicted: 4.6384
True: 5.0000, Predicted: 4.6384
True: 5.0000, Predicted: 4.4874
True: 7.7670, Predicted: 4.4513
True: 5.0000, Predicted: 5.0578
True: 5.0000, Predicted: 4.9564
True: 5.0000, Predicted: 5.3076
True: 5.0000, Predicted: 4.8256
True: 5.0000, Predicted: 4.9055
True: 5.7695, Predicted: 5.1917
True: 5.0000, Predicted: 4.9719
True: 7.1186, Predicted: 4.8396
True: 5.6383, Predicted: 4.8269
True: 5.0000, Predicted: 4.6658
True: 5.0000, Predicted: 4.9081
True: 7.3862, Predicted: 5.1249
True: 5.0000, Predicted: 4.4874
True: 5.0000, Predicted: 4.7191
True: 5.0000, Predicted: 4.8419
True: 5.0000, Predicted: 4.9796
True: 5

In [54]:
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score, mean_absolute_error
import numpy as np

def evaluate_model(model, loader, device='cuda'):
    model.eval()
    preds = []
    trues = []

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            pred = model(batch)
            preds.append(pred.view(-1).cpu().numpy())
            trues.append(batch.y.view(-1).cpu().numpy())

    preds = np.concatenate(preds)
    trues = np.concatenate(trues)

    mse = mean_squared_error(trues, preds)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(trues, preds)
    r2 = r2_score(trues, preds)
    explained_variance = explained_variance_score(trues, preds)

    print(f'--- Evaluation Metrics ---')
    print(f'MSE: {mse:.4f}')
    print(f'RMSE: {rmse:.4f}')
    print(f'MAE: {mae:.4f}')
    print(f'R-squared (R2): {r2:.4f}')
    print(f'Explained Variance: {explained_variance:.4f}')
    print('--------------------------')


    # Print few predictions vs actual value
    print("\nSample Predictions vs True Binding Affinities:")
    for i in range(min(10, len(preds))):  # Show 10 samples (or fewer if smaller dataset)
        print(f"True: {trues[i]:.4f}, Predicted: {preds[i]:.4f}")

    return preds, trues, {'mse': mse, 'rmse': rmse, 'mae': mae, 'r2': r2, 'explained_variance': explained_variance}

# Evaluate the trained model on the test set
test_preds, test_targets, test_metrics = evaluate_model(trained_model, test_loader)


--- Evaluation Metrics ---
MSE: 0.7702
RMSE: 0.8776
MAE: 0.5301
R-squared (R2): -0.1766
Explained Variance: 0.1046
--------------------------

Sample Predictions vs True Binding Affinities:
True: 5.0000, Predicted: 5.0372
True: 5.0000, Predicted: 5.0787
True: 5.0000, Predicted: 4.9831
True: 5.0000, Predicted: 4.4874
True: 5.0000, Predicted: 4.9491
True: 5.0000, Predicted: 4.6886
True: 5.0000, Predicted: 4.7769
True: 5.6989, Predicted: 5.0578
True: 5.8538, Predicted: 5.1192
True: 5.0000, Predicted: 4.6384
