In [1]:
import sklearn
from sklearn.linear_model import LogisticRegression
import numpy as np
import torch



In [2]:
p = np.random.uniform(0,1,1)
print(p)

[0.72143965]


## Here, we consider two ways of generating logistic regression data.


In the first method, we randomly sample features $(X)$ and labels $(y)$ independently. The "simulator" then solves the logistic regression problem to get the weights $W$ given the $(X,y)$ dataset.

In [3]:
def prior_datasets(num_features = 20):
    p = np.random.uniform(0,1,1)
    X_prior = torch.distributions.MultivariateNormal(torch.zeros(num_features),torch.diag(torch.ones(num_features)))
    y_prior = torch.distributions.bernoulli.Bernoulli(probs=torch.Tensor([p]))
    return X_prior,y_prior
X_prior, y_prior = prior_datasets()

def simulate(X,y):
    X = X.numpy()
    y = y.numpy()
    model = LogisticRegression()
    model.fit(X,y)
    return torch.Tensor([model.coef_])

  y_prior = torch.distributions.bernoulli.Bernoulli(probs=torch.Tensor([p]))


In [4]:
X = X_prior.sample((100,))
y = y_prior.sample((100,)).squeeze()
print(X.shape,y.shape)
weights = simulate(X,y)
print(weights.shape)

torch.Size([100, 20]) torch.Size([100])
torch.Size([1, 1, 20])


### In the second way, we independently generate feature vectors $X$ and weights $W$.

Given these, we can calculate the probabilities of the logistic regression model, given by:
\begin{equation*}
p = \sigma(\mathbf{W}\cdot \mathbf{X})
\end{equation*}

which we can then use to "simulate" the labels $y$ by sampling $y\sim \textit{Bernoulli}(p)$

In [5]:
def prior_features(num_features=20):
    X_prior = torch.distributions.MultivariateNormal(torch.zeros(num_features),torch.diag(torch.ones(num_features)))
    return X_prior

def prior_weights(num_features=20):
    W_prior = torch.distributions.MultivariateNormal(torch.zeros(num_features),torch.diag(torch.ones(num_features)))
    return W_prior

def generate_labels(features,weights):
    p = torch.sigmoid(features@weights.T)
    y_dist = torch.distributions.bernoulli.Bernoulli(probs=p)
    y = y_dist.sample()
    return  y

In [6]:
X_prior = prior_features()
X = X_prior.sample((100,))
W_prior = prior_weights()
W = W_prior.sample()
print(X.shape)
print(W.shape)

torch.Size([100, 20])
torch.Size([20])


In [7]:
y = generate_labels(X,W)
print(y)

tensor([0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1.,
        1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 1.,
        0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1.,
        0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0.,
        0., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1.,
        1., 1., 0., 0., 1., 0., 1., 1., 1., 1.])


  p = torch.sigmoid(features@weights.T)
