In [2]:
import numpy as np
import torch
import random
import torch.nn as nn
from scipy.stats import logistic
import sympy

## Dataset Generation

In [58]:
random.seed(123)
n_train = 2000
n_eval = 100
n_test = 100
p = 100#00
sparse_p = 3

sparsity_index = [ _ < sparse_p for _ in range(p)]
random.shuffle(sparsity_index)

beta = np.zeros(p)
for _ in range(p):
    if sparsity_index[_]:
        beta[_] = 2
X_train = [np.random.normal(size = p) for _ in range(n_train)]
X_eval = [np.random.normal(size = p) for _ in range(n_eval)]
X_test = [np.random.normal(size = p) for _ in range(n_test)]
train_dataset = []
eval_dataset = []
test_dataset = []
noise_ratio = []
for _ in X_train:
    #y = np.random.binomial(n = 1, p = logistic.cdf(np.dot(_, beta)))
    y = int(logistic.cdf(np.dot(_, beta) + np.random.normal(scale = 1)) > 0.5)
    noise_ratio.append((np.dot(_, beta) + np.random.normal(scale = 1))*np.dot(_, beta) >0 )
    train_dataset.append((_,y))
print(np.mean(noise_ratio))
for _ in X_eval:
    #y = np.random.binomial(n = 1, p = logistic.cdf(np.dot(_, beta)))
    y = int(logistic.cdf(np.dot(_, beta)+ np.random.normal(scale = 1)) > 0.5)
    eval_dataset.append((_,y))
for _ in X_test:
    #y = np.random.binomial(n = 1, p = logistic.cdf(np.dot(_, beta)))
    y = int(logistic.cdf(np.dot(_, beta)+ np.random.normal(scale = 1)) > 0.5)
    test_dataset.append((_,y))
print(train_dataset[:3][:2])
print(f"beta: {beta}")

0.902
[(array([ 0.12976129, -0.78798991, -0.05701262,  1.71557933, -0.2259525 ,
       -0.31758014, -0.43481237,  0.84160706, -1.82509361, -1.03462898,
        0.69238431,  0.21645693,  0.3241297 ,  0.55201787, -0.76750723,
        1.23622276, -0.56373276,  0.54551368, -0.47309837, -1.03562485,
        1.49680449, -0.71719001,  1.62195236,  1.46231323,  1.90513124,
        0.5364317 , -0.46854214,  0.13101486, -0.91163672, -1.49117535,
        0.11615425, -1.14930316, -2.0305153 ,  0.08900907,  0.8645204 ,
       -0.16461894,  1.6441051 ,  1.07971974,  1.66513849,  0.84967694,
        0.40691173,  0.65040831,  1.99305442, -0.06850417,  1.67250944,
        0.8455447 ,  0.95454406,  0.62288154,  1.15587714,  2.89329238,
       -1.0182851 , -0.18325103, -1.63310774,  0.57522459, -0.99291546,
       -1.34567733,  0.25890868,  0.89838251,  3.01664906,  1.03254649,
        1.2839966 ,  1.99787481, -0.13836703,  1.62051279, -0.64380245,
       -1.83876504, -0.49839213,  0.58410724, -0.1524597

In [51]:
from torch.utils.data import Dataset, DataLoader


class TrainDataset(Dataset):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, ind):
        return self.data[ind]

train_set = TrainDataset(train_dataset)
test_set  = TrainDataset(test_dataset)
eval_set  = TrainDataset(eval_dataset)
batch_size = 32
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_set,  batch_size=n_eval, shuffle=False)
eval_loader  = DataLoader(eval_set,  batch_size=n_test, shuffle=False)


## Model

In [90]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(p, 25 ,bias=False),
            nn.ReLU(),
            nn.Linear(25, 1,bias=False)
        )
    def forward(self, x):
        out = self.mlp(x)
        return out

model = MLP().to(device)
#optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
#torch.optim.lr_scheduler()

##
print(model)

class single_layer(nn.Module):
    def __init__(self):
        super(single_layer, self).__init__()
        self.mlp = nn.Linear( (p + 0) * 25  + (25 + 0) * 1, 1, bias= False)
    def forward(self, x):
        out = self.mlp(x)
        return out

model = MLP().to(device)
var_Z = single_layer().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
optimizer2 = torch.optim.Adam(var_Z.parameters(), lr=1e-1)
print(("Z", var_Z))

MLP(
  (mlp): Sequential(
    (0): Linear(in_features=100, out_features=25, bias=False)
    (1): ReLU()
    (2): Linear(in_features=25, out_features=1, bias=False)
  )
)
('Z', single_layer(
  (mlp): Linear(in_features=2525, out_features=1, bias=False)
))


In [43]:
for name, param in model.named_parameters():
    #if  name.endswith("weight"):
        print((name,param))

('mlp.0.weight', Parameter containing:
tensor([[ 0.0714,  0.0970,  0.0101,  ...,  0.0272,  0.0412,  0.0658],
        [ 0.0362,  0.0809, -0.0111,  ..., -0.0771, -0.0668,  0.0631],
        [-0.0277, -0.0024,  0.0064,  ..., -0.0718, -0.0456, -0.0703],
        ...,
        [-0.0799, -0.0725,  0.0139,  ...,  0.0834,  0.0476,  0.0047],
        [-0.0130,  0.0360, -0.0968,  ...,  0.0997, -0.0399,  0.0508],
        [ 0.0477,  0.0881,  0.0644,  ...,  0.0953, -0.0890,  0.0803]],
       requires_grad=True))
('mlp.2.weight', Parameter containing:
tensor([[ 0.1626, -0.1530, -0.1347, -0.1431,  0.0070,  0.0260, -0.1813,  0.0238,
          0.0480,  0.1058,  0.1559, -0.0744,  0.0649,  0.0119, -0.0398,  0.0520,
         -0.1185, -0.0989, -0.1112, -0.0340,  0.0425,  0.1464, -0.1987,  0.1808,
         -0.1774],
        [ 0.1566,  0.0417,  0.1807,  0.1085, -0.1328,  0.0312,  0.0943, -0.0281,
          0.0542, -0.1553,  0.0917,  0.1734, -0.0248,  0.0563, -0.0291, -0.1520,
         -0.0812,  0.1498, -0.0498, 

In [39]:
torch.cat([param.view(-1)  for param in model.named_parameters()])

AttributeError: 'tuple' object has no attribute 'view'

In [48]:
optimizer2.zero_grad()
#print([_ for _ in model.parameters()])
torch.cat([param.view(-1)  for param in model.parameters()])
#[torch.cat([param[0].view(-1), param[1].view(-1)])  for param in model.parameters()]

tensor([0.0714, 0.0970, 0.0101,  ..., 0.0258, 0.0179, 0.1544],
       grad_fn=<CatBackward0>)

## Training

In [93]:
epochs = 40
lambda_2 = 0.1
lambda_3 = 100
criterion = nn.BCEWithLogitsLoss()

# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc
def projection_on_z(z_network):
    with torch.no_grad():
        for param in z_network.parameters():
            param.data.clamp_(min = 0, max = 1)
for param in var_Z.parameters():
    param.data.clamp_(min = 1, max = 1)
for epoch in range(epochs):
    model.train()
    losses = []
    for batch_num, input_data in enumerate(train_loader):
        projection_on_z(var_Z)
        optimizer.zero_grad()
        optimizer2.zero_grad()
        x, y = input_data
        x = x.to(device).float()
        y = y.to(device).float()

        output = model(x).squeeze() 
        
        loss1 = criterion(output, y)
        vec_z = torch.cat([param.view(-1)  for param in var_Z.parameters()]) 
        vec_weight = torch.cat([param.view(-1)  for param in model.parameters()])
        loss2 = lambda_2 * torch.sum(vec_z)
        loss3 = lambda_3 * torch.norm((torch.ones_like(vec_z) - vec_z) * vec_weight, 1)
        loss = loss1 +  loss2 + loss3
        loss.backward()
        losses.append(loss.item())
        print(("grad",var_Z.mlp.weight.grad))
        print(("value",var_Z.mlp.weight ))
        
        optimizer2.step()
        optimizer.step()
        print(("value_updated",var_Z.mlp.weight ))
        #if batch_num % (len(train_loader)//1) == 0:
        
    model.eval()
    val_acc = 0
    with torch.inference_mode():
        for ind, batch in enumerate(eval_loader):
            x, y = input_data
            x = x.to(device).float()
            y = y.to(device).float()
            test_logits = model(x).squeeze() 
            test_pred = torch.round(torch.sigmoid(test_logits))
        # 2. Caculate loss/accuracy
        #test_loss = loss_fn(test_logits,
        #                    y_test)
            val_acc += accuracy_fn(y_true=y,
                               y_pred=test_pred)
    val_acc /= ind + 1    
    print('Epoch %d | Batch %d | train Loss %6.2f, loss1 %6.2f, loss2 % 6.2f, loss3 %6.2f' % (epoch, batch_num, loss.item(), loss1.item(), loss2.item(), loss3.item()))
    print(' non-zero weights %d | val accuracy %6.2f ' % (sum(torch.nonzero(vec_weight).size()), val_acc) )


('grad', tensor([[0.0100, 0.0100, 0.0100,  ..., 0.0100, 0.0100, 0.0100]]))
('value', Parameter containing:
tensor([[1., 1., 1.,  ..., 1., 1., 1.]], requires_grad=True))
('value_updated', Parameter containing:
tensor([[1.0019, 1.0057, 1.0009,  ..., 1.0000, 1.0004, 1.0000]],
       requires_grad=True))
('grad', tensor([[ 0.0100,  0.0100,  0.0100,  ...,  0.0100,  0.0100, -1.3495]]))
('value', Parameter containing:
tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       requires_grad=True))
('value_updated', Parameter containing:
tensor([[1.0017, 1.0051, 1.0008,  ..., 1.0000, 1.0004, 1.0001]],
       requires_grad=True))
('grad', tensor([[ 1.0000e-02,  1.0000e-02,  1.0000e-02,  ..., -1.1406e+01,
          1.0000e-02,  1.0000e-02]]))
('value', Parameter containing:
tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       requires_grad=True))
('value_updated', Parameter containing:
tensor([[1.0016, 1.0046, 1.0007,  ..., 1.0001, 1.0003, 1.0001]],
       requires

## Eval

In [None]:
model.eval()
accuracy = 0
for batch_num, input_data in enumerate(train_loader):
    x, y = input_data
    x = x.to(device).float()
    y = y.to(device)
    y_pred = model(x)
