In [None]:
import sys

path_append = "../" # Go up one directory from where you are.
sys.path.append(path_append) 

from tools.setting.ml_params import MLParameters
from tools.setting.data_config import DataConfig
from nn.utils.init import set_random_seed
set_random_seed(0)

import warnings
warnings.filterwarnings("ignore")

In [None]:
import torch
import torchvision.datasets as dset
from torchvision import transforms
# import albumentations
n_img_sz = 128
attribute_indices = torch.tensor([20, 31]) # Male, Smiling

# Load the CelebA dataset for training. Specify the root directory where the dataset is located
trainset = dset.CelebA(root=path_append + '../data/celeba', split = "train", transform=transforms.Compose([
                            transforms.Resize(n_img_sz), # Transformations include resizing the images to `n_img_sz`
                            transforms.CenterCrop(n_img_sz), # Center cropping to the same size
                            transforms.ToTensor(), # Converting the images to tensors,
                            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalizing the pixel values to have a mean and standard deviation of 0.5 across all channels.
                        ]), download= False)

testset = dset.CelebA(root=path_append + '../data/celeba', split = "test", transform=transforms.Compose([
                            transforms.Resize(n_img_sz), # Transformations include resizing the images to `n_img_sz`
                            transforms.CenterCrop(n_img_sz), # Center cropping to the same size
                            transforms.ToTensor(), # Converting the images to tensors
                            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalizing the pixel values to have a mean and standard deviation of 0.5 across all channels.
                        ]), download= False)    


In [None]:
# Custom dataset class for CelebA dataset
class CelebA(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def __getitem__(self, index):
        X, y = self.dataset[index] # Get the image and label at the specified index
        y= torch.index_select(y.unsqueeze(0), 1, attribute_indices).squeeze(0) # Select specific attributes(Male, Smiling) for the label using a predefined list of indices
        return X, y # Return the image and the selected attri   butes
    
    def __len__(self):
        return len(self.dataset) # Return the size of the dataset
        
trainset = CelebA(trainset)
testset = CelebA(testset)

In [None]:
data_config = DataConfig(dataset_name = 'celebA', task_type='multi_label_classification', obs_shape=[3, 128, 128], label_size=2, \
                        show_image_indices=[737, 1518, 390, 607])

#  Set training configuration from the AlgorithmConfig class, returning them as a Namespace object.
ml_params = MLParameters(ccnet_network = 'resnet', encoder_network = 'none')
ml_params.model.ccnet_config.d_model = 256
ml_params.model.ccnet_config.d_model = 512
ml_params.training.num_epoch = 1

first_data = trainset[0]
X, y = first_data

print(f"Input shape: {X.shape}")
print(f"Label shape: {y.shape}")

print(f"Total number of samples in trainset: {len(trainset)}")

In [None]:
from trainer_hub import TrainerHub

# Set the device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

# Initialize the TrainerHub class with the training configuration, data configuration, device, and use_print and use_wandb flags
trainer_hub = TrainerHub(ml_params, data_config, device, use_print=True, use_wandb=False)

In [None]:
trainer_hub.train(trainset, testset)    

In [None]:
label_list = ['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 
              'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', 
              'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones', 'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard', 
              'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline', 'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 
              'Wavy_Hair', 'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace', 'Wearing_Necktie', 'Young' ]

original_labels = torch.tensor([20, 31]) # Male, Smiling
selected_labels = torch.tensor([label_list.index('Bald'), label_list.index('Eyeglasses')]) # Bald, Eyeglasses

In [None]:
import pandas as pd
from torch.utils.data import DataLoader, Dataset, TensorDataset

class ExtendedCelebA(Dataset):
    def __init__(self, dataset, attributes_path, extra_attrs_indices):
        self.dataset = dataset
        self.attrs = pd.read_csv(attributes_path, delim_whitespace=True, header=1)
        # display(self.attrs.head())
        self.extra_attrs_indices = extra_attrs_indices

    def __getitem__(self, index):
        X, y = self.dataset[index]
        img_name = self.dataset.dataset.filename[index]
        extra_attrs = self.attrs.loc[img_name].iloc[self.extra_attrs_indices].values
        extra_attrs = (extra_attrs + 1) // 2  # Convert -1, 1 to 0, 1
        extra_attrs = torch.tensor(extra_attrs, dtype=torch.float32)
        y = torch.cat((y, extra_attrs))

        return X, y


    def __len__(self):
        return len(self.dataset)
    

extra_attrs_indices = torch.tensor([label_list.index('Bald'), label_list.index('Eyeglasses')]) # Bald, Eyeglasses


extended_trainset = ExtendedCelebA(testset, path_append + '../data/celeba/celeba/list_attr_celeba.txt', extra_attrs_indices)
extended_testset = ExtendedCelebA(testset, path_append + '../data/celeba/celeba/list_attr_celeba.txt', extra_attrs_indices)

In [None]:
for images, labels in extended_testset:
    print(images.shape, labels.shape)  
    break  # check only the first sample

In [None]:
import tqdm

test_loader = torch.utils.data.DataLoader(dataset=extended_testset, batch_size=64, shuffle=False, drop_last=False)

ccnet = trainer_hub.ccnet
explanation = None

explanation_dataset = []
original_labels_gender_dataset = []
original_labels_smile_dataset = []
extra_labels_bald_dataset = []
extra_labels_glasses_dataset = []


for data, labels in tqdm.tqdm(test_loader):
    data = data.to(device)
    labels = labels.to(device)
    
    original_labels_gender = labels[:, 0]  # Gender 
    original_labels_smile = labels[:, 1]   # Smile 
    extra_labels_bald = labels[:, 2]       # Bald 
    extra_labels_glasses = labels[:, 3]    # Glasses 
    
    # Use CCNet to explain the original data and generate synthetic counterparts
    explanations = ccnet.explain(data)
    
    # append to the list
    explanation_dataset.append(explanations.detach())
    
    original_labels_gender_dataset.append(original_labels_gender.detach())
    original_labels_smile_dataset.append(original_labels_smile.detach())
    
    extra_labels_bald_dataset.append(extra_labels_bald.detach())
    extra_labels_glasses_dataset.append(extra_labels_glasses.detach())

# transform to tensor
explanation_tensor = torch.cat(explanation_dataset, dim=0)

original_labels_gender_tensor = torch.cat(original_labels_gender_dataset, dim=0)
original_labels_smile_tensor = torch.cat(original_labels_smile_dataset, dim=0)

extra_labels_bald_tensor = torch.cat(extra_labels_bald_dataset, dim=0)
extra_labels_glasses_tensor = torch.cat(extra_labels_glasses_dataset, dim=0)

# generate dataset
dataset_with_gender_labels = TensorDataset(explanation_tensor, original_labels_gender_tensor)
dataset_with_smile_labels = TensorDataset(explanation_tensor, original_labels_smile_tensor)

dataset_with_bald_labels = TensorDataset(explanation_tensor, extra_labels_bald_tensor)
dataset_with_glasses_labels = TensorDataset(explanation_tensor, extra_labels_glasses_tensor)

In [None]:
import torch
import torch.nn.functional as F
import tqdm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming each explanation has a size of 128*128 for example purposes
input_size = 256  # This should be adjusted based on your actual data size
num_classes = 1  # Binary classification

def train_supervised_model(model, dataset, num_epoch=5, device=None):
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True, )

    for epoch in tqdm.tqdm(range(num_epoch)):
        model.train()
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = torch.nn.functional.binary_cross_entropy_with_logits(outputs, labels.unsqueeze(1).float())
            loss.backward()
            optimizer.step()

        # metrics
    print(f'\nEpoch {epoch + 1} / {num_epoch}')
    evaluate_model(model, train_loader, device)

def evaluate_model(model, data_loader, device):
    model.eval()  
    predictions = []
    ground_truth = []

    with torch.no_grad():
        for data, labels in data_loader:
            data, labels = data.to(device), labels.to(device, dtype=torch.float32)
            outputs = model(data)
            
   
            # if output shape is [batch_size, 1], then squeeze the last dimension
            if outputs.ndim == 2 and outputs.shape[1] == 1:
                predicted = outputs.squeeze().round()  # predict result by rounding the probability
            else:
                predicted = outputs.round()  
            
            predictions.extend(predicted.cpu().numpy())
            ground_truth.extend(labels.view(-1).cpu().numpy())
    
    accuracy = accuracy_score(ground_truth, predictions)
    print(f"Accuracy: {accuracy:.4f}")

In [None]:
import torch.nn as nn

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


In [None]:
# Initialize the model
model = LogisticRegressionModel(input_size, num_classes).to(device)

# Assuming explanations are already flattened and prepared in dataset loaders
print("Training with original labels:")
train_supervised_model(model, dataset_with_gender_labels)
evaluate_model(model, dataset_with_gender_labels, device)
print('='*20)

print("Training with original labels:")
train_supervised_model(model, dataset_with_smile_labels)
evaluate_model(model, dataset_with_smile_labels, device)
print('='*20)

print("Training with extra labels:")
train_supervised_model(model, dataset_with_bald_labels)
evaluate_model(model, dataset_with_bald_labels, device)
print('='*20)

print("Training with extra labels:")
train_supervised_model(model, dataset_with_glasses_labels)
evaluate_model(model, dataset_with_glasses_labels, device)
