In [17]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
import statsmodels.formula.api as smf

model = LinearRegression(fit_intercept=False)

#### Example dataframe

In [7]:
df = pd.DataFrame(columns=["cat", "x", "y"])
df["x"] = np.random.normal(size=10)
df["y"] = np.random.normal(size=10)
df["cat"] = ["a", "b", "a", "c", "b", "c", "a", "c", "b", "b"]
df

Unnamed: 0,cat,x,y
0,a,1.509802,-0.936015
1,b,0.849288,-0.444733
2,a,-2.67385,0.910589
3,c,2.161368,0.531697
4,b,-1.509027,-0.263009
5,c,-0.940353,0.566537
6,a,-1.911003,-0.906917
7,c,-0.24288,-1.446628
8,b,1.762018,-0.709754
9,b,-0.433713,0.041794


#### Method 1: Pandas get dummies

In [20]:
d2 = df.join(pd.get_dummies(df.cat))

model.fit(
    X=d2[["a", "b", "c", "x"]],
    y=d2["y"]
)
model.coef_

array([-0.4548642 , -0.32043103, -0.07030063, -0.14056664])

#### Method 2: One hot encoder

In [21]:
transform = make_column_transformer(
    (OneHotEncoder(), ["cat"]),
    remainder="passthrough"
)
pipe = make_pipeline(transform, model)

pipe.fit(X=df[["x", "cat"]], y=df["y"])
model.coef_

array([-0.4548642 , -0.32043103, -0.07030063, -0.14056664])