In [7]:
import pandas as pd

In [8]:
# Load the dataset
df = pd.read_csv('reduced_data.csv')

In [17]:
import torch
from torch_geometric.data import HeteroData
from torch_geometric.nn import RGCNConv
import torch.nn.functional as F
from torch.nn import Linear
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Define the graph data
data = HeteroData()

# Assuming num_nodes = 200
data['patient'].x = torch.randn(200, 6)  # Features: BMI, Sex, Age, PhysHlth, MentHlth, GenHlth
data['patient'].y = torch.randint(0, 2, (200, 2))  # Labels: Diabetic, HeartDisease

# Define edges with edge types; simple example: each node is self-connected
edge_index = torch.tensor([[i, i] for i in range(200)], dtype=torch.long).t()
edge_type = torch.full((edge_index.size(1),), 0, dtype=torch.long)  # Single relation type
data['patient', 'relates', 'patient'].edge_index = edge_index
data['patient', 'relates', 'patient'].edge_type = edge_type

# RGCN model definition for a homogeneous-like approach using heterogeneous API
class HeteroRGCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.conv1 = RGCNConv(in_channels=6, out_channels=hidden_channels, num_relations=1)
        self.conv2 = RGCNConv(in_channels=hidden_channels, out_channels=hidden_channels, num_relations=1)
        self.out = Linear(hidden_channels, 2)

    def forward(self, x_dict, edge_index_dict, edge_type_dict):
        edge_index = edge_index_dict[('patient', 'relates', 'patient')]
        edge_type = edge_type_dict[('patient', 'relates', 'patient')]
        x = self.conv1(x_dict['patient'], edge_index, edge_type)
        x = F.relu(x)
        x = self.conv2(x, edge_index, edge_type)
        x = F.relu(x)
        out = self.out(x)
        return out

model = HeteroRGCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
criterion = torch.nn.BCEWithLogitsLoss()

# Training function
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict, data.edge_type_dict)
    loss = criterion(out, data['patient'].y.float())
    loss.backward()
    optimizer.step()
    return loss.item()

# Extended evaluation function including multiple metrics
def evaluate():
    model.eval()
    with torch.no_grad():
        out = model(data.x_dict, data.edge_index_dict, data.edge_type_dict)
        y_pred_prob = torch.sigmoid(out).numpy()
        y_pred = y_pred_prob > 0.5
        y_true = data['patient'].y.numpy()

        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='macro')
        recall = recall_score(y_true, y_pred, average='macro')
        f1 = f1_score(y_true, y_pred, average='macro')
        auc_roc = roc_auc_score(y_true, y_pred_prob, average='macro', multi_class='ovr')
        
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'auc_roc': auc_roc
        }
# Training the model
for epoch in range(200):
    loss = train()
    if epoch % 10 == 0:
        metrics = evaluate()
        print(f"Epoch: {epoch}, Loss: {loss:.4f}, Metrics: Acc: {metrics['accuracy']:.4f}, "
              f"Prec: {metrics['precision']:.4f}, Recall: {metrics['recall']:.4f}, "
              f"F1: {metrics['f1']:.4f}, AUC-ROC: {metrics['auc_roc']:.4f}")


# Final evaluation
final_metrics = evaluate()
print(f'Final Evaluation Metrics: {final_metrics}')


Epoch: 0, Loss: 0.7242, Metrics: Acc: 0.2550, Prec: 0.4851, Recall: 0.6333, F1: 0.4976, AUC-ROC: 0.5086
Epoch: 10, Loss: 0.6672, Metrics: Acc: 0.3600, Prec: 0.6581, Recall: 0.3886, F1: 0.4632, AUC-ROC: 0.6424
Epoch: 20, Loss: 0.6441, Metrics: Acc: 0.3850, Prec: 0.6461, Recall: 0.5002, F1: 0.5496, AUC-ROC: 0.6838
Epoch: 30, Loss: 0.6193, Metrics: Acc: 0.4200, Prec: 0.6643, Recall: 0.5438, F1: 0.5885, AUC-ROC: 0.7261
Epoch: 40, Loss: 0.5895, Metrics: Acc: 0.4900, Prec: 0.7254, Recall: 0.5603, F1: 0.6285, AUC-ROC: 0.7635
Epoch: 50, Loss: 0.5563, Metrics: Acc: 0.5650, Prec: 0.7479, Recall: 0.6843, F1: 0.7142, AUC-ROC: 0.7974
Epoch: 60, Loss: 0.5200, Metrics: Acc: 0.6050, Prec: 0.7871, Recall: 0.7163, F1: 0.7498, AUC-ROC: 0.8291
Epoch: 70, Loss: 0.4838, Metrics: Acc: 0.6400, Prec: 0.7923, Recall: 0.7589, F1: 0.7751, AUC-ROC: 0.8554
Epoch: 80, Loss: 0.4463, Metrics: Acc: 0.6600, Prec: 0.8160, Recall: 0.7805, F1: 0.7974, AUC-ROC: 0.8855
Epoch: 90, Loss: 0.4089, Metrics: Acc: 0.7150, Prec: 0.8

In [20]:
# New patient data (assuming the same feature size)
new_patient_features = torch.randn(1, 6)  # Random features for the example

# Define self-connection for the new patient
new_edge_index = torch.tensor([[0, 0]], dtype=torch.long).t()
new_edge_type = torch.tensor([0], dtype=torch.long)  # Assuming the same edge type as others

# Update model input format
x_dict = {'patient': new_patient_features}
edge_index_dict = {('patient', 'relates', 'patient'): new_edge_index}
edge_type_dict = {('patient', 'relates', 'patient'): new_edge_type}

# Prediction
with torch.no_grad():
    output = model(x_dict, edge_index_dict, edge_type_dict)
    predicted_probabilities = torch.sigmoid(output)
    predicted_classes = (predicted_probabilities > 0.5).int()

print("Predicted probabilities:", predicted_probabilities.numpy())
print("Predicted classes (0: No, 1: Yes):", predicted_classes.numpy())

Predicted probabilities: [[0.99999976 0.69459677]]
Predicted classes (0: No, 1: Yes): [[1 1]]
