In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [2]:
# Set the coefficient and intercept
coef = 0.6
intercept = 0

# Generate random integer data
np.random.seed(0)
X = np.random.randint(0, 2, size=500_000).reshape(-1, 1)  # Generate 1000 random integer samples between -10 and 10
y_prob = 1 / (1 + np.exp(-(intercept + coef * X)))  # Calculate probability using logistic function
y = np.random.binomial(1, y_prob).reshape(-1, 1)  # Convert probabilities to binary labels

In [3]:
# Fit logistic regression model
model = LogisticRegression(random_state=42, fit_intercept=False)
model.fit(X, y.ravel())
print(model.coef_[0])
print(model.intercept_)

[0.60512214]
[0.]


In [4]:
# Fit logistic regression model
model = LogisticRegression(random_state=42, fit_intercept=True)
model.fit(X, y.ravel())
print(model.coef_[0])
print(model.intercept_)

[0.59789361]
[0.0072281]


In [5]:
X[:5]

array([[0],
       [1],
       [1],
       [0],
       [1]])

In [6]:
# Convert X to X_new with one-hot encoding
X_new = np.array([[1, 0] if x == 0 else [0, 1] for x in X.flatten()])

In [7]:
X_new[:5]

array([[1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1]])

In [8]:
model2 = LogisticRegression(fit_intercept=False)
model2.fit(X_new, y.ravel())
print(model2.coef_[0])
print(model2.intercept_)

[0.00721868 0.60512217]
[0.]


In [9]:
model2 = LogisticRegression(fit_intercept=True)
model2.fit(X_new, y.ravel())
print(model2.coef_[0])
print(model2.intercept_)

[-0.19689737  0.40100697]
[0.20411982]


In [10]:
import numpy as np
from sklearn.linear_model import LogisticRegression

# Set the coefficients and intercept
coef = np.array([0.6, -0.4])  # Now using two features
intercept = 0

# Seed the random number generator for reproducibility
np.random.seed(42)

# Generate random integer data for two features
X = np.random.randint(0, 2, size=(100_000, 2))  # Generate data with two features

# Calculate probability using logistic function for two features
logit_model = intercept + np.dot(X, coef)
y_prob = 1 / (1 + np.exp(-logit_model))  # Apply logistic function

# Convert probabilities to binary labels
y = np.random.binomial(1, y_prob).reshape(-1, 1)

In [11]:
# Fit logistic regression model
model = LogisticRegression(random_state=42, fit_intercept=False)
model.fit(X, y.ravel())
model.coef_, model.intercept_

(array([[ 0.58187533, -0.38746504]]), array([0.]))

In [12]:
# Fit logistic regression model
model = LogisticRegression(random_state=42, fit_intercept=True)
model.fit(X, y.ravel())
model.coef_, model.intercept_

(array([[ 0.58514762, -0.38412112]]), array([-0.00500463]))

In [13]:
import random

def convert_X_probabilistic(X):
    X_new = []
    for sublist in X:
        ones_count = sum(sublist)
        if ones_count == 2:
            new_sublist = sublist.tolist() + [0]  # If exactly two 1s, append 0
        else:
            # Append 1 with a 50% probability, otherwise append 0
            new_sublist = sublist.tolist() + [random.choice([0, 1])]
        X_new.append(new_sublist)
    return np.array(X_new)

In [14]:
np.random.seed(42)
X_new = convert_X_probabilistic(X)
X_new[:5]

array([[0, 1, 0],
       [0, 0, 1],
       [0, 1, 1],
       [0, 0, 1],
       [0, 1, 0]])

In [15]:
# Fit logistic regression model
model = LogisticRegression(random_state=42, fit_intercept=False)
model.fit(X_new, y.ravel())
model.coef_, model.intercept_

(array([[ 0.58213459, -0.38718893, -0.00164992]]), array([0.]))

In [16]:
# Fit logistic regression model
model = LogisticRegression(random_state=42, fit_intercept=True)
model.fit(X_new, y.ravel())
model.coef_, model.intercept_

(array([[ 0.58578425, -0.38349128,  0.00253458]]), array([-0.00658511]))