In [0]:
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import fastreg.linear as frl
%matplotlib inline

### Generate Data

In [0]:
st = np.random.RandomState(89320432)
N, K1, K2 = 1_000_000, 10, 100

In [0]:
df = pd.DataFrame({
    'id1': st.randint(K1, size=N),
    'id2': st.randint(K2, size=N),
    'x1': st.randn(N),
    'x2': st.randn(N)
})
df['y'] = 3*df['x1'] + 2*df['x2'] + 0.1*df['id1'] + 0.01*df['id2'] + st.randn(N)
df['lEp'] = 0.6*df['x1'] + 0.2*df['x2'] + 0.2*df['id1']/100 + 0.5*df['id2']/100
df['Ep'] = np.exp(df['lEp'])
df['p'] = np.random.poisson(df['Ep'])
df.head()

### Normal OLS

In [0]:
%time smf.ols('y ~ x1 + x2', data=df).fit().params

In [0]:
%time frl.ols(y='y', x=['x1', 'x2'], data=df)

### Sparse OLS

In [0]:
%time frl.ols(y='y', x=['x1', 'x2'], fe=['id1', 'id2'], data=df)

In [0]:
%time _ = frl.ols(y='y', x=['x1', 'x2'], fe=[('id1', 'id2')], data=df)

In [0]:
%time _ = frl.ols(y='y', x=['x1', 'x2'], fe=[('id1', 'id2')], data=df, drop=None, intercept=False)

### Poisson

In [0]:
import tensorflow as tf
import fastreg.general as frg

In [0]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [0]:
%time table = frg.poisson(y='p', x=['x1', 'x2'], fe=['id1', 'id2'], data=df, epochs=3)

In [0]:
table

In [0]:
coeff = table['coeff'].filter(regex='id2').rename('beta1').rename_axis('id2').reset_index()
coeff['id2'] = coeff['id2'].apply(lambda s: s[4:]).astype(np.int)
coeff['beta0'] = 0.5*coeff['id2']/100
coeff['beta1'] += table.loc['intercept', 'coeff']

In [0]:
bmax = 0.5*K2/100
beta = np.linspace(0, bmax, 1000)
fig, ax = plt.subplots(figsize=(6, 5))
coeff.plot.scatter(x='beta0', y='beta1', ax=ax, alpha=0.5);
ax.plot(beta, beta, c='r', linewidth=1, zorder=1);