In [1]:
import numpy as np
import torch
import random
import torch.nn as nn
from scipy.stats import logistic
import sympy

## Dataset Generation

In [9]:
random.seed(123)
torch.manual_seed(123)
n_train = 2000
n_eval = 100
n_test = 100
p = 100#00
sparse_p = 10

sparsity_index = [ _ < sparse_p for _ in range(p)]
random.shuffle(sparsity_index)

beta = np.zeros(p)
for _ in range(p):
    if sparsity_index[_]:
        beta[_] = 2
X_train = [np.random.normal(size = p) for _ in range(n_train)]
X_eval = [np.random.normal(size = p) for _ in range(n_eval)]
X_test = [np.random.normal(size = p) for _ in range(n_test)]
train_dataset = []
eval_dataset = []
test_dataset = []
noise_ratio = []
for _ in X_train:
    #y = np.random.binomial(n = 1, p = logistic.cdf(np.dot(_, beta)))
    y = int(logistic.cdf(np.dot(_, beta) + np.random.normal(scale = 1)) > 0.5)
    noise_ratio.append((np.dot(_, beta) + np.random.normal(scale = 1))*np.dot(_, beta) >0 )
    train_dataset.append((_,y))
print(np.mean(noise_ratio))
for _ in X_eval:
    #y = np.random.binomial(n = 1, p = logistic.cdf(np.dot(_, beta)))
    y = int(logistic.cdf(np.dot(_, beta)+ np.random.normal(scale = 1)) > 0.5)
    eval_dataset.append((_,y))
for _ in X_test:
    #y = np.random.binomial(n = 1, p = logistic.cdf(np.dot(_, beta)))
    y = int(logistic.cdf(np.dot(_, beta)+ np.random.normal(scale = 1)) > 0.5)
    test_dataset.append((_,y))
print(train_dataset[:3][:2])
print(f"beta: {beta}")

0.9415
[(array([-0.99255664, -0.09850755,  1.28068487, -0.21883622, -0.68517474,
        1.47696556, -0.69625027,  0.20211889, -0.40431657,  0.39336439,
       -0.44355195,  2.05617479, -0.54803995,  0.84628832, -0.73711619,
        1.98127596,  0.41923591,  0.893298  ,  0.56276857, -0.22347953,
       -0.17147022,  1.62890066,  1.01785961, -2.20311793,  0.73077756,
        0.43549215,  0.52516738,  0.72431454,  0.14734245,  0.17624503,
       -0.01190663, -1.34815375, -1.47993372, -0.62495667, -1.75821009,
        1.46175899,  0.9440697 ,  0.30296945,  0.11635087, -1.50477573,
        0.60928502, -0.36456007,  0.75549871,  0.04874346, -0.66939099,
       -0.15267852, -0.79950737,  0.73294968, -1.71637004, -0.64090541,
       -1.74597354,  0.30217295, -1.42545454,  0.11945793, -0.10902602,
        0.82596859,  0.03332325,  0.99495543,  1.98529061,  2.47761857,
       -0.99467733, -1.35298519, -1.4023889 , -0.25696657, -1.3336469 ,
        0.34520775,  1.17876415, -0.84555489,  0.123494

In [3]:
from torch.utils.data import Dataset, DataLoader


class TrainDataset(Dataset):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, ind):
        return self.data[ind]

train_set = TrainDataset(train_dataset)
test_set  = TrainDataset(test_dataset)
eval_set  = TrainDataset(eval_dataset)
batch_size = 32
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_set,  batch_size=n_eval, shuffle=False)
eval_loader  = DataLoader(eval_set,  batch_size=n_test, shuffle=False)


## Model

In [24]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(p, 25 ,bias=False),
            nn.ReLU(),
            nn.Linear(25, 1,bias=False)
        )
    def forward(self, x):
        out = self.mlp(x)
        return out

model = MLP().to(device)
#optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
#torch.optim.lr_scheduler()

##
print(model)

class single_layer(nn.Module):
    def __init__(self):
        super(single_layer, self).__init__()
        self.mlp = nn.Linear( (p + 0) * 25  + (25 + 0) * 1, 1, bias= False)
    def forward(self, x):
        out = self.mlp(x)
        return out

model = MLP().to(device)
var_Z = single_layer().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
optimizer2 = torch.optim.SGD(var_Z.parameters(), lr=1e-1)
print(("Z", var_Z))

MLP(
  (mlp): Sequential(
    (0): Linear(in_features=100, out_features=25, bias=False)
    (1): ReLU()
    (2): Linear(in_features=25, out_features=1, bias=False)
  )
)
('Z', single_layer(
  (mlp): Linear(in_features=2525, out_features=1, bias=False)
))


In [5]:
for name, param in model.named_parameters():
    #if  name.endswith("weight"):
        print((name,param))

('mlp.0.weight', Parameter containing:
tensor([[-0.0650, -0.0010, -0.0521,  ...,  0.0245,  0.0350,  0.0087],
        [-0.0716,  0.0811, -0.0584,  ...,  0.0306,  0.0548,  0.0138],
        [-0.0274, -0.0257, -0.0638,  ...,  0.0615, -0.0323,  0.0153],
        ...,
        [ 0.0328,  0.0557,  0.0139,  ..., -0.0046, -0.0082,  0.0477],
        [ 0.0883, -0.0404, -0.0180,  ...,  0.0324,  0.0954,  0.0755],
        [-0.0217,  0.0082, -0.0038,  ..., -0.0872,  0.0482, -0.0065]],
       requires_grad=True))
('mlp.2.weight', Parameter containing:
tensor([[-0.0493, -0.0503,  0.0279,  0.0233, -0.1056, -0.0151,  0.1458, -0.1005,
         -0.0736, -0.0118, -0.0963,  0.1630,  0.1574,  0.1516, -0.0310,  0.0708,
          0.1207, -0.1783,  0.0906, -0.1006,  0.1613,  0.1243, -0.0337, -0.0112,
         -0.1828]], requires_grad=True))


In [6]:
optimizer2.zero_grad()
#print([_ for _ in model.parameters()])
torch.cat([param.view(-1)  for param in model.parameters()])
#[torch.cat([param[0].view(-1), param[1].view(-1)])  for param in model.parameters()]


tensor([-0.0650, -0.0010, -0.0521,  ..., -0.0337, -0.0112, -0.1828],
       grad_fn=<CatBackward0>)

## Training

In [51]:
random.seed(123)
torch.manual_seed(123)
epochs = 20#20
lambda_2 = 0.01
lambda_3 = 10
criterion = nn.BCEWithLogitsLoss()
model = MLP().to(device)
var_Z = single_layer().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
optimizer2 = torch.optim.SGD(var_Z.parameters(), lr=1e-1)
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc
def projection_on_z(z_network):
    with torch.no_grad():
        for param in z_network.parameters():
            param.data.clamp_(min = 0, max = 1)


for param in var_Z.parameters():
    #param.data.clamp_(min = 1 - 0.1/lambda_3, max = 1 - 0.1/lambda_3)
    param.data.clamp_(min = 0.5, max = 0.5)
for epoch in range(epochs):
    model.train()
    losses = []
    if epoch % 2 == 0:
        projection_on_z(var_Z)
        optimizer2.zero_grad()
        vec_z = torch.cat([param.view(-1)  for param in var_Z.parameters()]) 
        vec_weight = torch.cat([param.view(-1)  for param in model.parameters()])
        #vec_z = #torch.tensor([ lambda_2 /lambda_3 < torch.abs(vec_weight[_]) for _ in range()])
        #vec_z = lambda_2 /lambda_3 * torch.ones_like(vec_weight) < torch.abs(vec_weight)
        #vec_z = vec_z.float()
        loss2 = lambda_2 * torch.sum(vec_z)
        loss3 = lambda_3 * torch.norm((torch.ones_like(vec_z) - vec_z) * vec_weight, 1)
        loss4 = 0.01 * torch.norm(vec_weight, 1)
        loss = loss2 + loss3
        loss.backward()
        print(("grad",var_Z.mlp.weight.grad))
        #print(("z value",var_Z.mlp.weight ))
        print(("mean_value",torch.mean(var_Z.mlp.weight)))
        optimizer2.step()
    for batch_num, input_data in enumerate(train_loader):
        projection_on_z(var_Z)
        optimizer.zero_grad()
        #optimizer2.zero_grad()
        x, y = input_data
        x = x.to(device).float()
        y = y.to(device).float()

        output = model(x).squeeze() 
        
        loss1 = criterion(output, y)
        vec_z = torch.cat([param.view(-1)  for param in var_Z.parameters()]) 
        vec_weight = torch.cat([param.view(-1)  for param in model.parameters()])
        #vec_z = #torch.tensor([ lambda_2 /lambda_3 < torch.abs(vec_weight[_]) for _ in range()])
        #vec_z = lambda_2 /lambda_3 * torch.ones_like(vec_weight) < torch.abs(vec_weight)
        #vec_z = vec_z.float()
        loss2 = lambda_2 * torch.sum(vec_z)
        loss3 = lambda_3 * torch.norm((torch.ones_like(vec_z) - vec_z) * vec_weight, 1)
        loss4 = 0.01 * torch.norm(vec_weight, 1)
        loss = loss1.mean() + loss2 + loss3
        loss.backward()
        losses.append(loss.item())
        #print(('z', torch.mean(vec_z), vec_z))
        #print(("grad",var_Z.mlp.weight.grad))
        #print(("z value",var_Z.mlp.weight ))
        #print(("mean_value",torch.mean(var_Z.mlp.weight)))
        #optimizer2.step()
        optimizer.step()
        #print(("z value_updated",var_Z.mlp.weight ))
       
        
    model.eval()
    val_acc = 0
    with torch.inference_mode():
        for ind, batch in enumerate(eval_loader):
            x, y = input_data
            x = x.to(device).float()
            y = y.to(device).float()
            test_logits = model(x).squeeze() 
            test_pred = torch.round(torch.sigmoid(test_logits))
        # 2. Caculate loss/accuracy
        #test_loss = loss_fn(test_logits,
        #                    y_test)
            val_acc += accuracy_fn(y_true=y,
                               y_pred=test_pred)
    
    val_acc /= ind + 1    
    if epoch > 2:
        print('Epoch %d | Batch %d | train Loss %6.2f, loss1 %6.2f, loss2 % 6.2f, loss3 %6.2f, loss4 %6.2f' % (epoch, batch_num, loss.item(), loss1.item(), loss2.item(), loss3.item(), loss4.item()))
        print(' non-zero weights %d | val accuracy %6.2f ' % (sum(torch.nonzero(vec_weight).size()), val_acc) )
    print(f"weight:{vec_weight}")

('grad', tensor([[-0.3978, -0.0231, -0.4867,  ..., -1.5970, -0.7254, -1.0626]]))
('mean_value', tensor(0.9900, grad_fn=<MeanBackward0>))
weight:tensor([-4.0798e-02, -5.0233e-05, -4.8865e-02,  ..., -1.6177e-01,
         7.4795e-02,  1.0718e-01], grad_fn=<CatBackward0>)
weight:tensor([-4.1120e-02,  1.2928e-05, -4.7404e-02,  ..., -1.6373e-01,
         7.5836e-02,  1.0798e-01], grad_fn=<CatBackward0>)
('grad', tensor([[0.0100, 0.0099, 0.0100,  ..., 0.0100, 0.0100, 0.0100]]))
('mean_value', tensor(0.9994, grad_fn=<MeanBackward0>))
weight:tensor([-3.3763e-02, -3.1542e-06, -3.9978e-02,  ..., -1.6433e-01,
         7.6908e-02,  1.0809e-01], grad_fn=<CatBackward0>)
Epoch 3 | Batch 62 | train Loss  26.92, loss1   0.72, loss2  25.21, loss3   0.99, loss4   0.99
 non-zero weights 2526 | val accuracy  43.75 
weight:tensor([-2.6469e-02, -3.3988e-07, -3.2543e-02,  ..., -1.6480e-01,
         7.7754e-02,  1.0857e-01], grad_fn=<CatBackward0>)
('grad', tensor([[-0.2535,  0.0100, -0.3143,  ..., -1.6378, -0.

## Eval

In [8]:
model.eval()
accuracy = 0
for batch_num, input_data in enumerate(train_loader):
    x, y = input_data
    x = x.to(device).float()
    y = y.to(device)
    y_pred = model(x)
