Implementing a feedforward neural network (FNN) as a first step

In [2]:
from d2c.descriptors import D2C, DataLoader
from d2c.descriptors.utils import return_mb
import numpy as np

from tqdm import tqdm

In [3]:
dataloader = DataLoader()
dataloader.from_pickle('../example/synthetic_data.pkl')
observations = dataloader.get_observations()
dags = dataloader.get_dags()


In [4]:
len(observations)

7000

In [5]:
def pad_mb(before_padding_MB, size = 3):
    if before_padding_MB.shape[1] < size:
        padding = np.zeros((before_padding_MB.shape[0], size - before_padding_MB.shape[1]))
        before_padding_MB = np.hstack([before_padding_MB, padding])
    
    if before_padding_MB.shape[1] > size:
        before_padding_MB = before_padding_MB[:, :3]
    
    return before_padding_MB

In [6]:
def prepare_data_for_couple(observations, dag, source_name, target_name, is_causal):
    
    # Estimate Markov blankets
    MBca = return_mb(dag, source_name)
    MBef = return_mb(dag, target_name)
    
    padded_MBca = pad_mb(observations[MBca].values)
    padded_MBef = pad_mb(observations[MBef].values)

    # Gather features
    features = np.hstack([
        observations[source_name].values.reshape(-1,1) ,        # Observations of X
        observations[target_name].values.reshape(-1,1) ,        # Observations of Y
        padded_MBca,  # Observations of MB of X
        padded_MBef   # Observations of MB of Y
    ])
    
    # Create a dictionary for the sample
    sample = {
        'features': features,
        'is_causal': is_causal,
        'edge_source': source_name,
        'edge_dest': target_name
    }
    
    return sample

In [7]:
from multiprocessing import Pool, cpu_count

def process_dag(index):
    current_dag = dags[index]
    current_observations = observations[index]
    local_samples = []
    for edge in current_dag.edges:
        s1 = prepare_data_for_couple(current_observations, current_dag, edge[0], edge[1], 1)
        s2 = prepare_data_for_couple(current_observations, current_dag, edge[1], edge[0], 0)
        local_samples.append(s1)
        local_samples.append(s2)
    return local_samples

if __name__ == '__main__':
    with Pool(cpu_count()) as pool:
        results = pool.map(process_dag, range(len(dags)))
    
    # Flatten the list of lists
    samples = [item for sublist in results for item in sublist]

In [8]:
import torch
from torch.utils.data import Dataset

class CausalDataset(Dataset):
    def __init__(self, samples):
        self.features = np.array([sample['features'] for sample in samples])
        self.labels = np.array([sample['is_causal'] for sample in samples])
        self.features = torch.tensor(self.features, dtype=torch.float32)
        self.labels = torch.tensor(self.labels, dtype=torch.long)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


In [9]:
import torch.nn as nn

class CausalInferenceNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_classes=2):
        super(CausalInferenceNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.relu2 = nn.ReLU()
        self.output_layer = nn.Linear(hidden_dim // 2, num_classes)
    
    def forward(self, x):

        x = x.view(x.size(0), -1)
        
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.output_layer(x)
        return x


In [10]:
from torch.utils.data import DataLoader, random_split

dataset = CausalDataset(samples)


  self.features = torch.tensor(self.features, dtype=torch.float32)


In [11]:

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [12]:
dataset.features.shape

torch.Size([72024, 250, 8])

In [13]:
input_dim = dataset.features.shape[1]*dataset.features.shape[2]
model = CausalInferenceNN(input_dim)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [14]:
def train(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for features, labels in loader:
        features, labels = features.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    return total_loss / len(loader)


In [15]:
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for features, labels in loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    accuracy = correct / len(loader.dataset)
    return total_loss / len(loader), accuracy


In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 20

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_accuracy = validate(model, val_loader, criterion, device)
    
    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f}')
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')


KeyboardInterrupt: 