## Load data

In [1]:
from dataAnalysis.DataAnalysis import DataAnalysis
import pandas as pd

data = pd.read_csv(r"extdata/sbcdata.csv", header=0)
data_analysis = DataAnalysis(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Label'] = self.data['Diagnosis']


Training: 
Assessable data are 528101 cases and 1015074 CBCs
Control data are 527038 cases and 1013548 CBCs
Sepsis data are 1488 cases and 1526 CBCs
$$$$$$$$$$$$$$$$$$$$
Testing: 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Label'] = self.data['Diagnosis']


Controls: 365794, Sepsis: 490
Assessable data are 180494 cases and 366284 CBCs
Control data are 180157 cases and 365794 CBCs
Sepsis data are 472 cases and 490 CBCs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Label'] = self.data['Diagnosis']


Controls: 437629, Sepsis: 448
Assessable data are 157922 cases and 438077 CBCs
Control data are 180157 cases and 437629 CBCs
Sepsis data are 438 cases and 448 CBCs


In [2]:
import torch

y_train = torch.tensor(data_analysis.get_y_train(), dtype=torch.long)
X_train = torch.tensor(data_analysis.get_X_train(), dtype=torch.float)

## Define model

In [3]:
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
import torch
from dataAnalysis.Constants import FEATURES
from torch_geometric.loader import NeighborLoader

class GraphNeuralNetwork(torch.nn.Module):

    def __init__(self, hidden_dim = 128, out_channels = 1):
        super(GraphNeuralNetwork, self).__init__()
        
        input_dim = len(FEATURES)          
        self.conv1 = SAGEConv(input_dim, hidden_dim, normalize=True, project= True, aggr = "mean", root_weight = True, dropout=0.0)
        self.conv_end = SAGEConv(hidden_dim, out_channels, aggr = "mean", root_weight = True)


    def forward(self, graph):
        x, edge_index = graph.x, graph.edge_index
        x = torch.relu(self.conv1(x, edge_index))
        x = self.conv_end(x, edge_index)
        return x
           
    def predict_proba(self, graph):
        y_pred_proba_all = torch.tensor([])
        loader = NeighborLoader(graph, num_neighbors=[-1,-1], batch_size = 100000)
        with torch.inference_mode():
            self.eval()
            for i, batch in enumerate(loader):
                logits = self.forward(batch)
                scores = torch.sigmoid(torch.squeeze(logits))
                scores = torch.unsqueeze(scores, 0)
                proba_predict = torch.concat((1- scores, scores), dim = 0)
                transp_proba_pred = torch.transpose(proba_predict, 0, 1)
                y_pred_proba_all = torch.concat((y_pred_proba_all, transp_proba_pred), dim = 0)
        return y_pred_proba_all
            
    def predict(self, graph):
        return torch.round(self.predict_proba(graph)[:, 1])

## Load trained model

In [4]:
model = GraphNeuralNetwork() #.to(device)
model.load_state_dict(torch.load("model_homognn_88.pt"))
model.eval()

GraphNeuralNetwork(
  (conv1): SAGEConv(7, 128, aggr=mean)
  (conv_end): SAGEConv(128, 1, aggr=mean)
)

## Create graph from synthetic dataset

In [None]:
from torch_geometric.nn import knn_graph
from dataAnalysis.FeatureImportance import FeatureImportance

def normalize(tensor):
    if not torch.is_tensor(tensor):
        tensor = torch.from_numpy(tensor).type(torch.float)
    mean = torch.mean(tensor, dim = 0)
    std = torch.std(tensor, dim = 0)
    mean_diff = tensor - mean
    return mean_diff / std


feature_importance = FeatureImportance(X_train)
X_all_fv = normalize(feature_importance.X_all_fv)
edge_index = knn_graph(X_all_fv ,k = 1, loop = True, num_workers = -1)

In [None]:
from torch_geometric.data import Data

graph = Data(x= X_all_fv,  edge_index = edge_index.type(torch.long))

## Feature importance

In [None]:
feature_importance.set_model_input([graph])

In [None]:
feature_importance.plot_feature_importance(model, title= "Homogeneous GNN")