# D2C with GNNs

In this notebook we explore the use of Graph Neural Networks for Causal Inference with D2C. Specifically, we will perform an edge classification task, where each edge will have as features the descriptors previously computed with D2C.

In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.data import Batch


  from .autonotebook import tqdm as notebook_tqdm


Here is the Graph Neural Network: a sequence of two Graph Convolutional Layers, with Relu and dropout

In [2]:
class GNN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(81, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, 2) # 2 classes (causal, non-causal)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x


We need a function to transform the descriptors dataframe to proper edge tensors.

In [3]:
import torch
import pandas as pd

def create_graph_data(df):
    # Extracting edge indices from the DataFrame
    edge_index = torch.tensor([df['edge_source'].values, df['edge_dest'].values], dtype=torch.long)

    # Creating edge features using selected columns (effca, effef, comcau, delta)
    edge_features = torch.tensor(df[df.columns[3:-1]].values, dtype=torch.float)

    # Assuming 'is_causal' column contains the labels for edges
    edge_labels = torch.tensor(df['is_causal'].values, dtype=torch.long)

    return edge_features, edge_index, edge_labels


In [4]:
#loading descriptors 
import pandas as pd 
data = pd.read_csv('../csv/timeseries_training.csv')
test = data.loc[data['graph_id'] == 8]
train = data.loc[data['graph_id'] < 8]

In [5]:
graphs = []

# Iterate over unique graph_ids
for graph_id in train['graph_id'].unique():
    # Filter the DataFrame for rows corresponding to the current graph_id
    graph_df = train[train['graph_id'] == graph_id]

    # Create edge_features, edge_index, and edge_labels for this graph
    edge_features, edge_index, edge_labels = create_graph_data(graph_df)

    # Create a Data object for this graph
    graph_data = Data(x=edge_features, edge_index=edge_index, y=edge_labels)

    # Add to the list of graphs
    graphs.append(graph_data)

train_data = Batch.from_data_list(graphs)

  edge_index = torch.tensor([df['edge_source'].values, df['edge_dest'].values], dtype=torch.long)


We train our network here

In [6]:
# Define model and optimizer
model = GNN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(train_data.x, train_data.edge_index)
    loss = F.cross_entropy(out, train_data.y)
    loss.backward()
    optimizer.step()


In [7]:
eval_edge_features, eval_edge_index, eval_edge_labels = create_graph_data(test)

# Create a Data object for this graph
eval_graph_data = Data(x=eval_edge_features, edge_index=eval_edge_index, y=eval_edge_labels)



We test our network on 1 unseen graph

In [8]:
# Set the model to evaluation mode
model.eval()

# Make predictions using the evaluation data
with torch.no_grad():
    predictions = model(eval_graph_data.x, eval_graph_data.edge_index)

# Convert the predictions to probabilities using the softmax function
probs = F.softmax(predictions, dim=1)

# Get the class with the highest probability
predicted_labels = torch.argmax(probs, dim=1)

# Print the Predicted Labels
print("Predicted Labels:")
print(predicted_labels)
print("Real Labels:")
print(eval_edge_labels)

Predicted Labels:
tensor([1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0])
Real Labels:
tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
