In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
preproc = make_column_transformer(
    (Binarizer(threshold=2), ['size']),
    (OneHotEncoder(), ['sex', 'smoker', 'day', 'time']),
    remainder='passthrough',
)

pl = make_pipeline(preproc, LinearRegression())
# Notice that the steps in the pipeline and column transformer are
# automatically named
pl

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df.select_dtypes(np.number).drop(columns=['review_scores_rating',
       'review_scores_accuracy', 'review_scores_cleanliness',
       'review_scores_checkin', 'review_scores_communication',
       'review_scores_location', 'review_scores_value']),
    df['review_scores_rating'],
    test_size=0.2,
    random_state=42
)

In [None]:
model = make_pipeline(
    SimpleImputer(missing_values=np.nan, strategy='mean'),
    LinearRegression()
)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [None]:
mean_absolute_error(y_test, y_pred)

In [None]:
mean_squared_error(y_test, y_pred)

In [None]:
model.named_steps['linearregression'].coef_

In [None]:
coefs = pd.DataFrame({
    "Features" : df.select_dtypes(np.number).drop(columns=['review_scores_rating',
       'review_scores_accuracy', 'review_scores_cleanliness',
       'review_scores_checkin', 'review_scores_communication',
       'review_scores_location', 'review_scores_value']).columns,
    "Coefficients" : model.named_steps['linearregression'].coef_
}).sort_values(by='Coefficients')

ax = coefs.plot(kind='scatter',x='Features',y='Coefficients')
ax.set_xticklabels(coefs['Features'], rotation=90)

...