## Basics

In [1]:
import torch

from torchonometrics.linear import LinearRegression
from torchonometrics.mle import LogisticRegression, PoissonRegression

In [2]:
cuda_available = torch.cuda.is_available()
if cuda_available:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

device

device(type='cuda')

## OLS

In [3]:
torch.manual_seed(42)
n, p = 100, 5
X = torch.randn(n, p)
true_coef = torch.randn(p)
y = X @ true_coef + 0.1 * torch.randn(n)

model = LinearRegression()
model.fit(X, y, se="HC1")

model.summary()

LinearRegression Results
coef: tensor([-0.1935,  0.0301, -1.3814, -0.3106,  0.9807])
se: tensor([0.0097, 0.0082, 0.0124, 0.0111, 0.0123])


In [5]:
# Panel data setup
n_firms, n_years = 100, 10
n_obs = n_firms * n_years

# Generate data with firm and year effects
X = torch.randn(n_obs, 3)
firm_ids = torch.repeat_interleave(torch.arange(n_firms), n_years)
year_ids = torch.tile(torch.arange(n_years), (n_firms,))

# Add intercept

# True coefficients and effects
true_coef = torch.tensor([1.5, -0.8, 0.3])
firm_effects = torch.randn(n_firms)[firm_ids]
year_effects = torch.randn(n_years)[year_ids]

y = X @ true_coef + firm_effects + year_effects + 0.1 * torch.randn(n_obs)

# Fit with two-way fixed effects
model = LinearRegression()
model.fit(X, y, fe=[firm_ids, year_ids])
print(f"Coefficients: {model.params['coef']}")

Coefficients: tensor([ 1.4962, -0.8031,  0.3018], dtype=torch.float64)


## logit

In [3]:
# Binary classification data
n, p = 10_000_000, 4
X = torch.randn(n, p)
X_with_intercept = torch.cat([torch.ones(n, 1), X], dim=1)

# Generate binary outcomes
true_coef = torch.tensor([0.5, 1.0, -0.8, 0.3, 0.2])
logits = X_with_intercept @ true_coef
probs = torch.sigmoid(logits)
y = torch.bernoulli(probs)

# Fit logistic regression
model = LogisticRegression(maxiter=100)


In [4]:
%%time
model.fit(X_with_intercept, y)
model.summary()

LogisticRegression Results
Optimizer: LBFGS
Optimization: 12/100 iterations
Final Log-Likelihood: -5380863.0000
No. Observations: 10000000

Variable     Coef.      Std.Err.   t        P>|t|    [95.0%   Conf. Interval]
----------------------------------------------------------------------
const        0.5006     0.0008     659.540  0.000    0.499    0.502   
x1           1.0008     0.0009     1136.440 0.000    0.999    1.003   
x2           -0.7996    0.0008     -959.589 0.000    -0.801   -0.798  
x3           0.2990     0.0008     395.213  0.000    0.297    0.300   
x4           0.2002     0.0007     267.239  0.000    0.199    0.202   
CPU times: user 1min 2s, sys: 15.2 s, total: 1min 17s
Wall time: 7.77 s


on GPU

In [7]:
Xg, yg = X_with_intercept.to(device), y.to(device)
# Fit logistic regression
model2 = LogisticRegression(maxiter=100)

In [8]:
%%time
model2.fit(Xg, yg)
model2.summary()

LogisticRegression Results
Optimizer: LBFGS
Optimization: 12/100 iterations
Final Log-Likelihood: -5380863.0000
No. Observations: 10000000

Variable     Coef.      Std.Err.   t        P>|t|    [95.0%   Conf. Interval]
----------------------------------------------------------------------
const        0.5006     0.0008     659.541  0.000    0.499    0.502   
x1           1.0008     0.0009     1136.438 0.000    0.999    1.003   
x2           -0.7996    0.0008     -959.586 0.000    -0.801   -0.798  
x3           0.2990     0.0008     395.212  0.000    0.297    0.300   
x4           0.2002     0.0007     267.239  0.000    0.199    0.202   
CPU times: user 882 ms, sys: 1.93 ms, total: 884 ms
Wall time: 885 ms


In [9]:
# Predictions
y_pred_proba = model.predict_proba(X_with_intercept)
y_pred = model.predict(X_with_intercept)
torch.unique(y_pred, return_counts=True)

(tensor([0, 1], dtype=torch.int32), tensor([3535193, 6464807]))

## poisson

In [10]:
torch.manual_seed(42)
n, p = 10_000_000, 2
X = torch.randn(n, p)
X_with_intercept = torch.cat([torch.ones(n, 1), X], dim=1)
true_coef = torch.tensor([1.0, 0.5, -0.3])

# Generate Poisson counts
linear_pred = X_with_intercept @ true_coef
lambda_true = torch.exp(linear_pred)
y = torch.poisson(lambda_true)

In [11]:
%%time
model = PoissonRegression(maxiter=50)
model.fit(X_with_intercept, y)
model.summary()

PoissonRegression Results
Optimizer: LBFGS
Optimization: 12/50 iterations
Final Log-Likelihood: 10960921.0000
No. Observations: 10000000

Variable     Coef.      Std.Err.   t        P>|t|    [95.0%   Conf. Interval]
----------------------------------------------------------------------
const        1.0000     0.0002     4903.631 0.000    1.000    1.000   
x1           0.5000     0.0002     2837.858 0.000    0.500    0.500   
x2           -0.3000    0.0002     -1702.947 0.000    -0.300   -0.300  
CPU times: user 44.7 s, sys: 7.75 s, total: 52.4 s
Wall time: 4.96 s


In [12]:
Xg, yg = X_with_intercept.to(device), y.to(device)

In [13]:
%%time
model2 = PoissonRegression(maxiter=50)
model2.fit(Xg, yg)
model2.summary()

PoissonRegression Results
Optimizer: LBFGS
Optimization: 12/50 iterations
Final Log-Likelihood: 10960920.0000
No. Observations: 10000000

Variable     Coef.      Std.Err.   t        P>|t|    [95.0%   Conf. Interval]
----------------------------------------------------------------------
const        1.0000     0.0002     4903.634 0.000    1.000    1.000   
x1           0.5000     0.0002     2837.848 0.000    0.500    0.500   
x2           -0.3000    0.0002     -1702.948 0.000    -0.300   -0.300  
CPU times: user 858 ms, sys: 6.87 ms, total: 865 ms
Wall time: 867 ms
