In [None]:
#@title Imports
%reset -f 

In [None]:
import pandas as pd

In [None]:
import pylab
import scipy.io

In [None]:

import shutil
import numpy as np
from itertools import product as cartesian_prod

import matplotlib.pyplot as plt

import urllib.request
from scipy.io import arff
from copy import deepcopy
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import os
import argparse
import sys

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

np.set_printoptions(precision=2)

def set_npseed(seed):
    np.random.seed(seed)
def set_torchseed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
def sigmoid(u):
    u = np.maximum(u,-100)
    u = np.minimum(u,100)
    return 1/(1+np.exp(-u))


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import pairwise_distances

In [None]:
class Args:
    def __init__(self):
        self.numlayer=3
        self.numnodes=5
        self.beta=5.
        self.lr=.1
        

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
class DLGN_FC(nn.Module):
    def __init__(self, input_dim=None, output_dim=None, num_hidden_nodes=[], beta=30, mode='pwc'):		
        super(DLGN_FC, self).__init__()
        self.num_hidden_layers = len(num_hidden_nodes)
        self.beta=beta  # Soft gating parameter
        self.mode = mode
        self.num_nodes=[input_dim]+num_hidden_nodes+[output_dim]
        self.gating_layers=nn.ModuleList()
        self.value_layers=nn.Parameter(torch.randn([1]+num_hidden_nodes)/100.) #[1, 12, 12, 12, 12]
        self.num_layer = len(num_hidden_nodes)
        self.num_hidden_nodes = num_hidden_nodes
        for i in range(self.num_hidden_layers+1):
            if i!=self.num_hidden_layers:
                temp = nn.Linear(self.num_nodes[0], self.num_nodes[i+1], bias=False)
                self.gating_layers.append(temp)

    def set_parameters_with_mask(self, to_copy, parameter_masks):
        # self and to_copy are DLGN_FC objects with same architecture
        # parameter_masks is compatible with dict(to_copy.named_parameters())
        for (name, copy_param) in to_copy.named_parameters():
            copy_param = copy_param.clone().detach()
            orig_param  = self.state_dict()[name]
            if name in parameter_masks:
                param_mask = parameter_masks[name]>0
                orig_param[param_mask] = copy_param[param_mask]
            else:
                orig_param = copy_param.data.detach()

    def return_gating_functions(self):
        effective_weights = []
        for i in range(self.num_hidden_layers):
            curr_weight = self.gating_layers[i].weight.detach().clone()
            # curr_weight /= torch.norm(curr_weight, dim=1, keepdim=True)
            effective_weights.append(curr_weight)
        return effective_weights
        # effective_weights (and effective biases) is a list of size num_hidden_layers


    def forward(self, x):
        for el in self.parameters():
            if el.is_cuda:
                device = torch.device('cuda')
            else:
                device = torch.device('cpu')
        values=[torch.ones(x.shape).to(device)]
        for i in range(self.num_hidden_layers):
            fiber = [len(x)]+[1]*self.num_layer
#             print("fiber:",fiber)
            fiber[i+1] = self.num_hidden_nodes[i]
#             print("fiber:",fiber)
            fiber = tuple(fiber)
#             print("fiber:",fiber)
            gate_score = torch.sigmoid( self.beta*(x@self.gating_layers[i].weight.T))#/
                #   torch.norm(self.gating_layers[i].weight, dim=1, keepdim=True).T) 
#             print("gate_score:",gate_score.shape)
            gate_score = gate_score.reshape(fiber) 
#             print("gate_score:",gate_score.shape)
            if i==0:
                cp = gate_score
#                 print("cp:",cp.shape)
            else:
                cp = cp*gate_score 
#                 print("cp:",cp.shape)
#             print("return:",torch.sum(cp*self.value_layers, dim=(1,2,3,4)).shape)
        return torch.sum(cp*self.value_layers, dim=(1,2,3))

In [None]:
#@title Train DLGN model
def train_dlgn (DLGN_obj, train_data_curr,vali_data_curr,test_data_curr,
                train_labels_curr,test_labels_curr,vali_labels_curr,
                parameter_mask=dict()):
    # DLGN_obj is the initial network
    # parameter_mask is a dictionary compatible with dict(DLGN_obj.named_parameters())
    # if a key corresponding to a named_parameter is not present it is assumed to be all ones (i.e it will be updated)

    # Assuming that we are on a CUDA machine, this should print a CUDA device:

    # Speed up of a factor of over 40 by using GPU instead of CPU
    # Final train loss of 0.02 and test acc of 74%
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # device = torch.device('cpu')
    DLGN_obj.to(device)

    criterion = nn.CrossEntropyLoss()




    optimizer = optim.SGD(DLGN_obj.parameters(), lr=lr)



    train_data_torch = torch.Tensor(train_data_curr)
    vali_data_torch = torch.Tensor(vali_data_curr)
    test_data_torch = torch.Tensor(test_data_curr)

    train_labels_torch = torch.tensor(train_labels_curr, dtype=torch.int64)
    test_labels_torch = torch.tensor(test_labels_curr, dtype=torch.int64)
    vali_labels_torch = torch.tensor(vali_labels_curr, dtype=torch.int64)

    num_batches = no_of_batches
    batch_size = len(train_data_curr)//num_batches
    losses=[]
    DLGN_obj_store = []
    best_vali_error = len(vali_labels_curr)


    # print("H3")
    # print(DLGN_params)
    debug_models= []
    train_losses = []
    tepoch = tqdm(range(saved_epochs[-1]+1))
    for epoch in tepoch:  # loop over the dataset multiple times
        if epoch in update_value_epochs:
            # updating the value pathdim vector by optimising 

            train_preds =DLGN_obj(torch.Tensor(train_data_curr).to(device)).reshape((-1,1))
            criterion = nn.CrossEntropyLoss()
            outputs = torch.cat((-1*train_preds,train_preds), dim=1)
            targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)

            train_loss = criterion(outputs, targets)
            print("Loss before updating value_net at epoch", epoch, " is ", train_loss)
            print("Total path abs value", torch.abs(DLGN_obj.value_layers.cpu().detach()).sum().numpy())

            ew = DLGN_obj.return_gating_functions()
            cp_feat1 = sigmoid(beta*np.dot(train_data_curr,ew[0].cpu().T).reshape(-1,num_neuron,1,1))
#             print("cp_feat1:",cp_feat1.shape)
            cp_feat2 = sigmoid(beta*np.dot(train_data_curr,ew[1].cpu().T).reshape(-1,1,num_neuron,1))
#             print("cp_feat2:",cp_feat2.shape)
            cp_feat3 = sigmoid(beta*np.dot(train_data_curr,ew[2].cpu().T).reshape(-1,1,1,num_neuron))
#             print("cp_feat3:",cp_feat3.shape)
#             cp_feat4 = sigmoid(beta*np.dot(train_data_curr,ew[3].cpu().T).reshape(-1,1,1,1,num_neuron))
#             print("cp_feat4:",cp_feat4.shape)
            cp_feat = cp_feat1 * cp_feat2 * cp_feat3 #* cp_feat4
#             print("cp_feat:",cp_feat.shape)
            cp_feat_vec = cp_feat.reshape((len(cp_feat),-1))
#             print("cp_feat_vec:",cp_feat_vec.shape)

            clf = LogisticRegression(C=0.03, fit_intercept=False,max_iter=1000, penalty="l1", solver='liblinear')
            clf.fit(2*cp_feat_vec, train_labels_curr)
            value_wts  = clf.decision_function(np.eye(num_neuron**num_layer)).reshape(1,num_neuron,num_neuron,num_neuron)
#             print("value_wts:",value_wts.shape)
            A= DLGN_obj.value_layers.detach()
            A[:] = torch.Tensor(value_wts)

            train_preds =DLGN_obj(torch.Tensor(train_data_curr).to(device)).reshape((-1,1))
            criterion = nn.CrossEntropyLoss()
            outputs = torch.cat((-1*train_preds,train_preds), dim=1)
            targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)
            train_loss = criterion(outputs, targets)
            print("Loss after updating value_net at epoch", epoch, " is ", train_loss)			
            print("Total path abs value", torch.abs(DLGN_obj.value_layers.cpu().detach()).sum().numpy())
    # 		if epoch in saved_epochs:
    # 			DLGN_obj_copy = deepcopy(DLGN_obj)
    # 			DLGN_obj_copy.to(torch.device('cpu'))
    # 			DLGN_obj_store.append(DLGN_obj_copy)

        for batch_start in range(0,len(train_data_curr),batch_size):
            if (batch_start+batch_size)>len(train_data_curr):
                break
            optimizer.zero_grad()
            inputs = train_data_torch[batch_start:batch_start+batch_size]
            targets = train_labels_torch[batch_start:batch_start+batch_size].reshape(batch_size)
            criterion = nn.CrossEntropyLoss()
            inputs = inputs.to(device)
            targets = targets.to(device)
            preds = DLGN_obj(inputs).reshape(-1,1)
            # preds_clone = preds.detach().clone().cpu().numpy()[:,0]
            # targets_clone = targets.detach().clone().cpu().numpy()
            # coeff = (0.5-targets_clone)/(sigmoid(2*preds_clone)-targets_clone)
            # print(coeff.shape)

            # print(coeff.min())
            # print(coeff.mean())
            # print(coeff.max())
            outputs = torch.cat((-1*preds, preds), dim=1)
            loss = criterion(outputs, targets)
            # loss = loss*torch.tensor(coeff, device=device)	
            # loss = loss.mean()		
            loss.backward()
            for name,param in DLGN_obj.named_parameters():
                if "val" in name:
                    param.grad *= 0.0
                if "gat" in name:
                    param.grad *= 1.0
            optimizer.step()

        train_preds =DLGN_obj(torch.Tensor(train_data_curr).to(device)).reshape(-1,1)
        criterion = nn.CrossEntropyLoss()
        outputs = torch.cat((-1*train_preds,train_preds), dim=1)
        targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)
        train_loss = criterion(outputs, targets)
        if epoch%5 == 0:
            print("Loss after updating at epoch ", epoch, " is ", train_loss)
            test_preds =DLGN_obj(test_data_torch.to(device)).reshape(-1,1)
            test_preds = test_preds.detach().cpu().numpy()
            print("Test error=",np.sum(test_labels_curr != (np.sign(test_preds[:,0])+1)//2 ))
        if train_loss < 0.005:
            break
        if np.isnan(train_loss.detach().cpu().numpy()):
            break

        losses.append(train_loss.cpu().detach().clone().numpy())
        inputs = vali_data_torch.to(device)
        targets = vali_labels_torch.to(device)
        preds =DLGN_obj(inputs).reshape(-1,1)
        vali_preds = torch.cat((-1*preds, preds), dim=1)
        vali_preds = torch.argmax(vali_preds, dim=1)
        vali_error= torch.sum(targets!=vali_preds)
        if vali_error < best_vali_error:
            DLGN_obj_return = deepcopy(DLGN_obj)
            best_vali_error = vali_error
    plt.figure()
    plt.title("DLGN loss vs epoch")
    plt.plot(losses)
    # 	if not os.path.exists('figures'):
    # 		os.mkdir('figures')

    # 	filename = 'figures/'+filename_suffix +'.pdf'
    # 	plt.savefig(filename)
    DLGN_obj_return.to(torch.device('cpu'))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # device = torch.device('cpu')
    return train_losses, DLGN_obj_return, DLGN_obj_store, losses, debug_models

In [None]:
def preprocess_data_adult(data_path):
    # Read the data into a DataFrame
    columns = [
        "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
        "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
        "hours-per-week", "native-country", "income"
    ]
    df = pd.read_csv(data_path, names=columns, na_values=[" ?"])

    # Drop rows with missing values
    df.dropna(inplace=True)

    # Convert categorical features using Label Encoding
    categorical_columns = ["workclass", "education", "marital-status", "occupation", "relationship", "race", "sex", "native-country"]
    label_encoders = {}
    for col in categorical_columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

    # Encode the target variable
    df["income"] = df["income"].apply(lambda x: 1 if x == " >50K" else 0)

    return df

def preprocess_data_bank_marketing(data):
    # Convert categorical features using Label Encoding
    label_encoders = {}
    for col in data.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

    return data

def preprocess_data_credit_card_defaults(data):
    # Convert categorical features using one-hot encoding
    data = pd.get_dummies(data, columns=["SEX", "EDUCATION", "MARRIAGE"], drop_first=True)

    # Standardize numerical features
    scaler = StandardScaler()
    data[["LIMIT_BAL", "AGE", "PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6", "BILL_AMT1",
          "BILL_AMT2", "BILL_AMT3", "BILL_AMT4", "BILL_AMT5", "BILL_AMT6", "PAY_AMT1", "PAY_AMT2",
          "PAY_AMT3", "PAY_AMT4", "PAY_AMT5", "PAY_AMT6"]] = scaler.fit_transform(
        data[["LIMIT_BAL", "AGE", "PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6", "BILL_AMT1",
               "BILL_AMT2", "BILL_AMT3", "BILL_AMT4", "BILL_AMT5", "BILL_AMT6", "PAY_AMT1", "PAY_AMT2",
               "PAY_AMT3", "PAY_AMT4", "PAY_AMT5", "PAY_AMT6"]])

    return data


def fetch_ADULT(data_dir="./ADULT_DATA"):
    print("---------------------ADULT--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
        
    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/2/adult.zip"
    zip_file_path = os.path.join(data_dir, "adult.zip")
    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)
    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)

    # Preprocess the data
    train_data_path = os.path.join(data_dir, "adult.data")
#     test_data_path = os.path.join(data_dir, "adult.test")
   
    df_train = preprocess_data_adult(train_data_path)
#     df_test = preprocess_data_adult(test_data_path)

    # Split the data into train, validation, and test sets
    X = df_train.drop("income", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df_train["income"]
    
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
#     X_test = df_test.drop("income", axis=1)
#     y_test = df_test["income"]

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents using shutil.rmtree()
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train, X_valid=X_val.astype('float32'), y_valid=y_val, X_test=X_test.astype('float32'), y_test=y_test
    )

def fetch_bank_marketing(data_dir="./BANK"):
    print("---------------------BANK--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/222/bank+marketing.zip"
    zip_file_path = os.path.join(data_dir, "bank_marketing.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
    
    zip_file_path_bank_add = os.path.join(data_dir, "bank-additional.zip")
    with zipfile.ZipFile(zip_file_path_bank_add, "r") as zip_ref:
        zip_ref.extractall(data_dir)

    # Get the extracted directory path
    extracted_dir = os.path.join(data_dir, "bank-additional")

    # Read the dataset
    data = pd.read_csv(os.path.join(extracted_dir, "bank-additional-full.csv"), sep=';')

    # Preprocess the data
    data = preprocess_data_bank_marketing(data)

    # Split the data into train, validation, and test sets
    X = data.drop("y", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["y"]
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,X_test=X_test.astype('float32'), y_test=y_test, X_valid = X_val.astype('float32'), y_valid = y_val
    )

def fetch_credit_card_defaults(data_dir="./CREDIT"):
    print("---------------------CREDIT--------------------------------------")
    # Create the data directory if it doesn't exist
    !pip install xlrd
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/350/default+of+credit+card+clients.zip"
    zip_file_path = os.path.join(data_dir, "credit_card_defaults.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)

#     # Get the extracted directory path
#     extracted_dir = os.path.join(data_dir, "default+of+credit+card+clients")

    # Read the dataset
    data = pd.read_excel(os.path.join(data_dir, "default of credit card clients.xls"), skiprows=1)

    # Preprocess the data
    data = preprocess_data_credit_card_defaults(data)

    # Split the data into train, validation, and test sets
    X = data.drop("default payment next month", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["default payment next month"]
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train, X_valid=X_val.astype('float32'), y_valid=y_val , X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_gamma_telescope(data_dir="./TELESCOPE"):
    print("---------------------TELESCOPE--------------------------------------")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/159/magic+gamma+telescope.zip"
    zip_file_path = os.path.join(data_dir, "magic_gamma_telescope.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
    
    # Load the data from CSV
    data_path = os.path.join(data_dir, "magic04.data")
    columns = [
        "fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long",
        "fM3Trans", "fAlpha", "fDist", "class"
    ]
    data = pd.read_csv(data_path, header=None, names=columns)
    
    # Convert the class labels to binary format (g = gamma, h = hadron)
    data["class"] = data["class"].map({"g": 1, "h": 0})
    
    # Split the data into features (X) and target (y)
    X = data.drop("class", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["class"]
    
    # Split the data into train, test, and validation sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')
    
    # Create a dictionary to store the data splits
    data_splits = {
        "X_train": X_train.astype('float32'), "y_train": y_train,
        "X_valid": X_valid.astype('float32'), "y_valid": y_valid,
        "X_test": X_test.astype('float32'), "y_test": y_test
    }
    
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)
    
    return data_splits

def fetch_rice_dataset(data_dir="./RICE"):
    print("---------------------RICE--------------------------------------")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/545/rice+cammeo+and+osmancik.zip"
    zip_file_path = os.path.join(data_dir, "rice_dataset.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
        
    # Load the data from CSV
    arff_file_name = os.path.join(data_dir, "Rice_Cammeo_Osmancik.arff")

    
    # Load the ARFF file using SciPy
    data, meta = arff.loadarff(arff_file_name)
    
    df = pd.DataFrame(data)
    print("df",df)
    df["Class"] = df["Class"].map({b'Cammeo': 1, b'Osmancik': 0})
    
    # Split the data into features (X) and target (y)
    X = df.drop("Class", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df["Class"]
    
    # Split the data into train, test, and validation sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')
    
    # Create a dictionary to store the data splits
    data_splits = {
        "X_train": X_train.astype('float32'), "y_train": y_train,
        "X_valid": X_valid.astype('float32'), "y_valid": y_valid,
        "X_test": X_test.astype('float32'), "y_test": y_test
    }
    
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)
    
    return data_splits

def fetch_german_credit_data(data_dir="./GERMAN"):
    print("---------------------GERMAN--------------------------------------")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "http://archive.ics.uci.edu/static/public/144/statlog+german+credit+data.zip"
    zip_file_path = os.path.join(data_dir, "german_credit_data.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
        
    # Load the data from CSV
    data_path = os.path.join(data_dir, "german.data")

    columns = [
        "checking_account_status", "duration_months", "credit_history", "purpose",
        "credit_amount", "savings_account_bonds", "employment", "installment_rate",
        "personal_status_sex", "other_debtors_guarantors", "present_residence",
        "property", "age", "other_installment_plans", "housing", "existing_credits",
        "job", "num_dependents", "own_telephone", "foreign_worker", "class"
    ]
    data = pd.read_csv(data_path, sep=' ', header=None, names=columns)
    
    # Convert the class labels to binary format (1 = Good, 2 = Bad)
    data["class"] = data["class"].map({1: 1, 2: 0})
    
    # Handle null values (replace with appropriate values)
    data.fillna(method='ffill', inplace=True)  # Forward fill
    
    # Convert categorical variables to dummy variables
    categorical_columns = [
        "checking_account_status", "credit_history", "purpose", "savings_account_bonds",
        "employment", "personal_status_sex", "other_debtors_guarantors", "property",
        "other_installment_plans", "housing", "job", "own_telephone", "foreign_worker"
    ]
    data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)
    
    # Split the data into features (X) and target (y)
    X = data.drop("class", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["class"]
    
    # Split the data into train, test, and validation sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')
    
    # Create a dictionary to store the data splits
    data_splits = {
        "X_train": X_train.astype('float32'), "y_train": y_train,
        "X_valid": X_valid.astype('float32'), "y_valid": y_valid,
        "X_test": X_test.astype('float32'), "y_test": y_test
    }
    
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)
    
    return data_splits

def fetch_spambase_dataset(data_dir="./SPAM"):
    print("---------------------SPAM--------------------------------------")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "http://archive.ics.uci.edu/static/public/94/spambase.zip"
    zip_file_path = os.path.join(data_dir, "spambase.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
        
    # Load the data from CSV
    data_path = os.path.join(data_dir, "spambase.data")

    columns = [
        f"f{i}" for i in range(57)
    ] + ["spam"]
    data = pd.read_csv(data_path, header=None, names=columns)
    
    # Split the data into features (X) and target (y)
    X = data.drop("spam", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["spam"]
    
    # Split the data into train, test, and validation sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')
    
    # Create a dictionary to store the data splits
    data_splits = {
        "X_train": X_train.astype('float32'), "y_train": y_train,
        "X_valid": X_valid.astype('float32'), "y_valid": y_valid,
        "X_test": X_test.astype('float32'), "y_test": y_test
    }
    
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)
    
    return data_splits

def fetch_accelerometer_gyro_dataset(data_dir="./GYRO"):
    print("---------------------GYRO--------------------------------------")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/755/accelerometer+gyro+mobile+phone+dataset.zip"
    zip_file_path = os.path.join(data_dir, "accelerometer_gyro_dataset.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
        
    # Load the data from CSV
    data_path = os.path.join(data_dir, "accelerometer_gyro_mobile_phone_dataset.csv")
    
    data = pd.read_csv(data_path)
    
    # Convert categorical column to numeric (e.g., label encoding)
    data["timestamp"] = data["timestamp"].astype("category").cat.codes
    
    # Split the data into features (X) and target (y)
    X = data.drop("Activity", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["Activity"]
    
    # Split the data into train, test, and validation sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')
    
    # Create a dictionary to store the data splits
    data_splits = {
        "X_train": X_train.astype('float32'), "y_train": y_train,
        "X_valid": X_valid.astype('float32'), "y_valid": y_valid,
        "X_test": X_test.astype('float32'), "y_test": y_test
    }
    
    # Remove the zip file
    os.remove(zip_file_path)

    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir)
    
    return data_splits

def fetch_swarm_behaviour(data_dir="./SWARM"):
    print("---------------------SWARM--------------------------------------")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # URL of the dataset zip file
    url = "https://archive.ics.uci.edu/static/public/524/swarm+behaviour.zip"
    zip_file_path = os.path.join(data_dir, "swarm_behaviour.zip")

    # Download the zip file
    urllib.request.urlretrieve(url, zip_file_path)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)
        
    # Load the data from CSV
    data_path = os.path.join(data_dir, "Swarm Behavior Data/Grouped.csv")
    
    data = pd.read_csv(data_path)
    
    # Split the data into features (X) and target (y)
    X = data.drop("Class", axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = data["Class"]
    
    # Split the data into train, test, and validation sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')
    
    # Create a dictionary to store the data splits
    data_splits = {
        "X_train": X_train.astype('float32'), "y_train": y_train,
        "X_valid": X_valid.astype('float32'), "y_valid": y_valid,
        "X_test": X_test.astype('float32'), "y_test": y_test
    }
    
    # Remove the zip file
    os.remove(zip_file_path)
    # Remove the extracted directory and its contents
    shutil.rmtree(data_dir) 
    return data_splits


def fetch_openml_credit_data(data_dir="./OpenML_Credit"):
    print("---------------------OpenML_Credit DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103185/credit.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "credit.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

    df[last_column] = df[last_column].astype(int)
    
#     print("df",df)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_electricity_data(data_dir="./OpenML_Electricity"):
    print("---------------------OpenML_Electricity DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103245/electricity.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "electricity.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

    df[last_column] = df[last_column].map({b'DOWN': 0, b'UP': 1})
    
#     print("df",df)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_covertype_data(data_dir="./OpenML_Covertype"):
    print("---------------------OpenML_Covertype DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103246/covertype.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "covertype.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

    df[last_column] = df[last_column].astype(int)
    
#     print("df",df)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_pol_data(data_dir="./OpenML_Pol"):
    print("---------------------OpenML_Pol DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103247/pol.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "pol.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

#     print("df",df)
    
    df[last_column] = df[last_column].map({b'N':0,b'P':1})
    
    

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_house_16H_data(data_dir="./OpenML_House_16H"):
    print("---------------------OpenML_House_16H DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103248/house_16H.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "house_16H.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

#     print("df",df)
    df[last_column] = df[last_column].map({b'N':0,b'P':1})
    
    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_MiniBooNE_data(data_dir="./OpenML_MiniBooNE"):
    print("---------------------OpenML_MiniBooNE DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103253/MiniBooNE.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "MiniBooNE.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

#     print("df",df)
    
    df[last_column] = df[last_column].map({b'False':0,b'True':1})

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_eye_movements_data(data_dir="./OpenML_Eye_movements"):
    print("---------------------OpenML_Eye_movements DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22103255/eye_movements.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "eye_movements.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]

#     print("df",df)
    df[last_column] = df[last_column].astype(int)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_Diabetes130US_data(data_dir="./OpenML_Diabetes130US"):
    print("---------------------OpenML_Diabetes130US DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22111908/Diabetes130US.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "Diabetes130US.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]
#     print("df",df)
    df[last_column] = df[last_column].astype(int)
    

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_jannis_data(data_dir="./OpenML_Jannis"):
    print("---------------------OpenML_Jannis DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22111907/jannis.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "jannis.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]
#     print("df",df)

    df[last_column] = df[last_column].astype(int)


    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_Bioresponse_data(data_dir="./OpenML_Bioresponse"):
    print("---------------------OpenML_Bioresponse DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22111905/Bioresponse.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "Bioresponse.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]
#     print("df",df)

    df[last_column] = df[last_column].astype(int)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_california_data(data_dir="./OpenML_California"):
    print("---------------------OpenML_California DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22111914/california.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "california.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]
#     print("df",df)

    df[last_column] = df[last_column].astype(int)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


def fetch_openml_heloc_data(data_dir="./OpenML_Heloc"):
    print("---------------------OpenML_Heloc DATASET--------------------------------------")
    # Create the data directory if it doesn't exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data_url = "https://api.openml.org/data/v1/download/22111912/heloc.arff"
    # Download the ARFF file
    arff_file_path = os.path.join(data_dir, "heloc.arff")
    urllib.request.urlretrieve(data_url, arff_file_path)

    # Load ARFF file into DataFrame
    data, meta = arff.loadarff(arff_file_path)
    df = pd.DataFrame(data)
    # Convert target variable to int
    last_column = df.columns[-1]
#     print("df",df)

    df[last_column] = df[last_column].astype(int)

    # Split the data into train, validation, and test sets
    X = df.drop(last_column, axis=1)  # Assuming "SeriousDlqin2yrs" is the target variable
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = df[last_column]

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)

#     y_train = y_train.astype('int64')
#     y_test = y_test.astype('int64')
#     y_val = y_val.astype('int64')

    y_train = (y_train.values.reshape(-1) == 1).astype('int64')
    y_test = (y_test.values.reshape(-1) == 1).astype('int64')
    y_val = (y_val.values.reshape(-1) == 1).astype('int64')

    # Remove the ARFF file
    os.remove(arff_file_path)

    # Remove the data directory
    shutil.rmtree(data_dir)

    return dict(
        X_train=X_train.astype('float32'), y_train=y_train,
        X_valid=X_val.astype('float32'), y_valid=y_val,
        X_test=X_test.astype('float32'), y_test=y_test
    )


#**class Dataset:**

REAL_DATASETS = {
    ####### 10 latest UCI datasets ########
    'ADULT': fetch_ADULT,
    'bank_marketing': fetch_bank_marketing,
    'credit_card_defaults': fetch_credit_card_defaults,
    'gamma_telescope': fetch_gamma_telescope,
    'rice_dataset': fetch_rice_dataset,
    'german_credit_data': fetch_german_credit_data,
    'spambase_dataset': fetch_spambase_dataset,
    'accelerometer_gyro_dataset': fetch_accelerometer_gyro_dataset,
    'swarm_behaviour': fetch_swarm_behaviour,
    ######## OpenML Tabular Datasets ##########
    'OpenML_Credit': fetch_openml_credit_data,
    'OpenML_Electricity': fetch_openml_electricity_data,
    'OpenML_Covertype': fetch_openml_covertype_data,
    'OpenML_Pol': fetch_openml_pol_data,
    'OpenML_House_16H': fetch_openml_house_16H_data,
    'OpenML_MiniBooNE': fetch_openml_MiniBooNE_data,
    'OpenML_Eye_movements': fetch_openml_eye_movements_data,
    'OpenML_Diabetes130US': fetch_openml_Diabetes130US_data,
    'OpenML_Jannis': fetch_openml_jannis_data,
    'OpenML_Bioresponse': fetch_openml_Bioresponse_data,
    'OpenML_California': fetch_openml_california_data,
    'OpenML_Heloc': fetch_openml_heloc_data
}

class Dataset:
    def __init__(self, dataset, data_path='./DATA', normalize=False, normalize_target=False, quantile_transform=False, quantile_noise=1e-3, in_features=None, out_features=None, flatten=False, **kwargs):
        """
        Dataset is a dataclass that contains all training and evaluation data required for an experiment
        :param dataset: a pre-defined dataset name (see DATASETS) or a custom dataset
            Your dataset should be at (or will be downloaded into) {data_path}/{dataset}
        :param data_path: a shared data folder path where the dataset is stored (or will be downloaded into)
        :param normalize: standardize features by removing the mean and scaling to unit variance
        :param quantile_transform: whether tranform the feature distributions into normals, using a quantile transform
        :param quantile_noise: magnitude of the quantile noise
        :param in_features: which features to use as inputs
        :param out_features: which features to reconstruct as output
        :param flatten: whether flattening instances to vectors
        :param kwargs: depending on the dataset, you may select train size, test size or other params
        """

        if dataset in REAL_DATASETS:
            data_dict = REAL_DATASETS[dataset](Path(data_path) / dataset, **kwargs)

            self.X_train = data_dict['X_train']
            self.y_train = data_dict['y_train']
            self.X_valid = data_dict['X_valid']
            self.y_valid = data_dict['y_valid']
            self.X_test = data_dict['X_test']
            self.y_test = data_dict['y_test']

            if flatten:
                self.X_train, self.X_valid, self.X_test = self.X_train.reshape(len(self.X_train), -1), self.X_valid.reshape(len(self.X_valid), -1), self.X_test.reshape(len(self.X_test), -1)

            if normalize:

                print("Normalize dataset")
                axis = [0] + [i + 2 for i in range(self.X_train.ndim - 2)]
                self.mean = np.mean(self.X_train, axis=tuple(axis), dtype=np.float32)
                self.std = np.std(self.X_train, axis=tuple(axis), dtype=np.float32)

                # if constants, set std to 1
                self.std[self.std == 0.] = 1.

                if dataset not in ['ALOI']:
                    self.X_train = (self.X_train - self.mean) / self.std
                    self.X_valid = (self.X_valid - self.mean) / self.std
                    self.X_test = (self.X_test - self.mean) / self.std

            if quantile_transform:
                quantile_train = np.copy(self.X_train)
                if quantile_noise:
                    stds = np.std(quantile_train, axis=0, keepdims=True)
                    noise_std = quantile_noise / np.maximum(stds, quantile_noise)
                    quantile_train += noise_std * np.random.randn(*quantile_train.shape)

                qt = QuantileTransformer(output_distribution='normal').fit(quantile_train)
                self.X_train = qt.transform(self.X_train)
                self.X_valid = qt.transform(self.X_valid)
                self.X_test = qt.transform(self.X_test)

            if normalize_target:

                print("Normalize target value")
                self.mean_y = np.mean(self.y_train, axis=0, dtype=np.float32)
                self.std_y = np.std(self.y_train, axis=0, dtype=np.float32)

                # if constants, set std to 1
                if self.std_y == 0.:
                    self.std_y = 1.

                self.y_train = (self.y_train - self.mean_y) / self.std_y
                self.y_valid = (self.y_valid - self.mean_y) / self.std_y
                self.y_test = (self.y_test - self.mean_y) / self.std_y

            if in_features is not None:
                self.X_train_in, self.X_valid_in, self.X_test_in = self.X_train[:, in_features], self.X_valid[:, in_features], self.X_test[:, in_features]

            if out_features is not None:
                self.X_train_out, self.X_valid_out, self.X_test_out = self.X_train[:, out_features], self.X_valid[:, out_features], self.X_test[:, out_features]

        elif dataset in TOY_DATASETS:
            data_dict = toy_dataset(distr=dataset, **kwargs)

            self.X = data_dict['X']
            self.Y = data_dict['Y']
            if 'labels' in data_dict:
                self.labels = data_dict['labels']

        self.data_path = data_path
        self.dataset = dataset

class TorchDataset(torch.utils.data.Dataset):

    def __init__(self, *data, **options):
        
        n_data = len(data)
        if n_data == 0:
            raise ValueError("At least one set required as input")

        self.data = data
        means = options.pop('means', None)
        stds = options.pop('stds', None)
        self.transform = options.pop('transform', None)
        self.test = options.pop('test', False)
        
        if options:
            raise TypeError("Invalid parameters passed: %s" % str(options))
         
        if means is not None:
            assert stds is not None, "must specify both <means> and <stds>"

            self.normalize = lambda data: [(d - m) / s for d, m, s in zip(data, means, stds)]

        else:
            self.normalize = lambda data: data

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, idx):
        data = self.normalize([s[idx] for s in self.data])
        if self.transform:

            if self.test:
                data = sum([[self.transform.test_transform(d)] * 2 for d in data], [])
            else:
                data = sum([self.transform(d) for d in data], [])
            
        return data

**Training a DLGN model**

In [None]:
args =  Args()

num_layer = args.numlayer
num_neuron = args.numnodes
beta = args.beta
lr=args.lr

saved_epochs = list(range(0,300,10)) + list(range(300,10001,50))
update_value_epochs = list(range(0,10001,100))# 

seed = 365
no_of_batches=10 #[1,10,100]
weight_decay=0.0
num_hidden_nodes=[num_neuron]*num_layer


optimizer_name ='Adam'
modep='pwc' 
output_dim=1

weight_decay=0.0

# DATA_NAME=["OpenML_Credit","OpenML_Electricity","OpenML_Pol","OpenML_House_16H","OpenML_MiniBooNE","OpenML_Eye_movements","OpenML_Diabetes130US","OpenML_Jannis","OpenML_Bioresponse","OpenML_California","OpenML_Heloc"]#"OpenML_Covertype"]#,"bank_marketing","credit_card_defaults","gamma_telescope","rice_dataset","german_credit_data","spambase_dataset","accelerometer_gyro_dataset","swarm_behaviour"]#,"HIGGS"]
DATA_NAME = ["OpenML_Covertype"]
for data_name in DATA_NAME: 
    data = Dataset(data_name,normalize=True)
    print('classes', np.unique(data.y_test))
    set_npseed(seed)
    set_torchseed(seed)
    input_dim=data.X_train.shape[1]

    set_torchseed(6675)
    
    train_data = data.X_train
    train_data_labels =  data.y_train

    vali_data = data.X_valid
    vali_data_labels = data.y_valid

    test_data = data.X_test
    test_data_labels = data.y_test  
    
    print("train_data:",train_data.shape,"vali_data:",vali_data.shape,"test_data:",test_data.shape)

    print("---" * 30)
    set_torchseed(365)
    # set_torchseed(5612)
    DLGN_init= DLGN_FC(input_dim=input_dim, output_dim=1, num_hidden_nodes=num_hidden_nodes, beta=beta)

    train_parameter_masks=dict()
    
    for name,parameter in DLGN_init.named_parameters():
        if "val" in name:
            train_parameter_masks[name]=torch.ones_like(parameter)# Updating all value network layers
        if "gat" in name:
            train_parameter_masks[name]=torch.ones_like(parameter)
        train_parameter_masks[name].to(device)


        






    set_torchseed(8)
    train_losses, DLGN_obj_final, DLGN_obj_store, losses , debug_models= train_dlgn(train_data_curr=train_data,
                                                vali_data_curr=vali_data,
                                                test_data_curr=test_data,
                                                train_labels_curr=train_data_labels,
                                                vali_labels_curr=vali_data_labels,
                                                test_labels_curr=test_data_labels,
                                                DLGN_obj=deepcopy(DLGN_init),
                                                parameter_mask=train_parameter_masks,
                                                )


    torch.cuda.empty_cache() 
    losses=np.array(losses)
    
    
    device=torch.device('cpu')
    train_preds =DLGN_obj_final(torch.Tensor(train_data).to(device)).reshape(-1,1)
    criterion = nn.CrossEntropyLoss()
    outputs = torch.cat((-1*train_preds,train_preds), dim=1)
    targets = torch.tensor(train_data_labels, dtype=torch.int64)
    train_loss = criterion(outputs, targets)
    train_preds = train_preds.detach().numpy()
    # filename = 'outputs/'+filename_suffix+'.txt'
    # original_stdout = sys.stdout
    Train_error = np.sum(train_data_labels != (np.sign(train_preds[:,0])+1)//2)
    Num_train_data = len(train_data_labels)
    print("Train error=",Train_error)
    print("Num_train_data=",Num_train_data)
    print("Train_acc:",1-Train_error/Num_train_data)
    
    test_preds =DLGN_obj_final(torch.Tensor(test_data)).reshape(-1,1)
    test_preds = test_preds.detach().numpy()
    # filename = 'outputs/'+filename_suffix+'.txt'
    # original_stdout = sys.stdout
    # with open(filename,'a') as f:
    #     sys.stdout = f
    #     print("Test error=",np.sum(test_data_labels != (np.sign(test_preds[:,0])+1)//2 ))
    #     print("Num_test_data=",len(test_data_labels))
    #     sys.stdout = original_stdout

    Test_error = np.sum(test_data_labels != (np.sign(test_preds[:,0])+1)//2)
    Num_test_data = len(test_data_labels)
    print("Test error=",Test_error)
    print("Num_test_data=",Num_test_data)
    print("Test_acc:",1-Test_error/Num_test_data)

# print(DLGN_obj_store[-1].beta)
    