In [None]:
import numpy as np
import pandas as pd
import patsy
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [None]:
data = pd.DataFrame({
    'x0': [1, 2, 3, 4, 5],
    'x1': [0.01, -0.01, 0.25, -4.1, 0.],
    'y': [-1.5, 0., 3.6, 1.3, -2.]})
data

In [None]:
data.columns

In [None]:
data.to_numpy()

In [None]:
df2 = pd.DataFrame(data.to_numpy(), columns=['one', 'two', 'three'])
df2

In [None]:
df3 = data.copy()
df3['strings'] = ['a', 'b', 'c', 'd', 'e']

In [None]:
df3

In [None]:
df3.to_numpy()

In [None]:
model_cols = ['x0', 'x1']
data.loc[:, model_cols].to_numpy()

In [None]:
data['category'] = pd.Categorical(['a', 'b', 'a', 'a', 'b'],
                                  categories=['a', 'b'])
data

In [None]:
dummies = pd.get_dummies(data.category, prefix='category')
data_with_dummies = data.drop('category', axis=1).join(dummies)
data_with_dummies

In [None]:
data = pd.DataFrame({
    'x0': [1, 2, 3, 4, 5],
    'x1': [0.01, -0.01, 0.25, -4.1, 0.],
    'y': [-1.5, 0., 3.6, 1.3, -2.]})
data
y, X = patsy.dmatrices('y ~ x0 + x1', data)

In [None]:
print(y)
print(X)

In [None]:
print(np.asarray(y))
print(np.asarray(X))

In [None]:
patsy.dmatrices('y ~ x0 + x1 + 0', data)[1]

In [None]:
coef, resid, _, _ = np.linalg.lstsq(X, y, rcond=None)

In [None]:
print(coef)
coef = pd.Series(coef.squeeze(), index=X.design_info.column_names)
print(coef)

In [None]:
y, X = patsy.dmatrices('y ~ x0 + np.log(np.abs(x1) + 1)', data)
X

In [None]:
y, X = patsy.dmatrices('y ~ standardize(x0) + center(x1)', data)
X

In [None]:
new_data = pd.DataFrame({
    'x0': [6, 7, 8, 9],
    'x1': [3.1, -0.5, 0, 2.3],
    'y': [1, 2, 3, 4]})
new_X = patsy.build_design_matrices([X.design_info], new_data)
new_X

In [None]:
y, X = patsy.dmatrices('y ~ I(x0 + x1)', data)
X

In [None]:
data = pd.DataFrame({
    'key1': ['a', 'a', 'b', 'b', 'a', 'b', 'a', 'b'],
    'key2': [0, 1, 0, 1, 0, 1, 0, 0],
    'v1': [1, 2, 3, 4, 5, 6, 7, 8],
    'v2': [-1, 0, 2.5, -0.5, 4.0, -1.2, 0.2, -1.7]
})
y, X = patsy.dmatrices('v2 ~ key1', data)
X

In [None]:
y, X = patsy.dmatrices('v2 ~ key1 + 0', data)
X

In [None]:
y, X = patsy.dmatrices('v2 ~ C(key2)', data)
X

In [None]:
data['key2'] = data['key2'].map({0: 'zero', 1: 'one'})
data

In [None]:
y, X = patsy.dmatrices('v2 ~ key1 + key2', data)
X

In [None]:
y, X = patsy.dmatrices('v2 ~ key1 + key2 + key1:key2', data)
X