In [1]:
import altair as alt
from sklearn.linear_model import LinearRegression
import palmerpenguins

In [2]:
penguins = (
    palmerpenguins.load_penguins()
    .dropna()
    .pipe(lambda df_: df_[df_['species']=='Adelie'])
)

penguins.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,male,2007


## Linear model

In [7]:
mod = LinearRegression()

mod

In [8]:
X, y = (penguins['bill_depth_mm'].values.reshape(-1, 1), 
        penguins['bill_length_mm'].values)

In [12]:
mod.fit(X, y)

In [13]:
mod.coef_

array([0.8424775])

In [14]:
mod.intercept_

23.36681859556319

## Altair visualization

In [18]:
mod.predict(penguins['bill_depth_mm'].values.reshape(-1, 1))

array([39.12114789, 38.02592713, 38.53141363, 39.62663439, 40.72185514,
       38.36291813, 39.87937764, 38.19442263, 41.22734164, 41.14309389,
       38.36291813, 39.37389114, 40.80610289, 38.86840464, 41.48008489,
       38.78415689, 39.12114789, 39.54238664, 38.61566138, 37.85743163,
       39.28964339, 39.03690014, 38.44716588, 39.03690014, 39.28964339,
       37.43619288, 38.61566138, 38.36291813, 39.28964339, 37.68893613,
       41.14309389, 40.21636864, 38.95265239, 39.62663439, 39.45813889,
       38.53141363, 38.86840464, 38.95265239, 39.96362539, 37.60468838,
       39.20539564, 39.37389114, 38.44716588, 41.22734164, 38.27867038,
       39.28964339, 38.44716588, 39.79512989, 38.61566138, 39.03690014,
       38.11017488, 39.20539564, 37.35194513, 39.45813889, 37.60468838,
       41.14309389, 37.68893613, 38.69990913, 37.77318388, 38.53141363,
       37.01495413, 39.45813889, 37.35194513, 39.71088214, 39.37389114,
       38.86840464, 37.85743163, 39.28964339, 38.11017488, 38.95

In [21]:
penguins_w_pred = (
    penguins
    .assign(
        predictions=lambda df_: mod.predict(
            df_['bill_depth_mm'].values.reshape(-1, 1)
        )
    )
)

penguins_w_pred.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year,predictions
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007,39.121148
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007,38.025927
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007,38.531414
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007,39.626634
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,male,2007,40.721855


In [23]:
axis_kws = {'labelFontSize': 12, 'titleFontSize': 14, 'titlePadding': 20}

base = alt.Chart(penguins_w_pred).encode(
    x=alt.X('bill_depth_mm', scale=alt.Scale(zero=False), axis=alt.Axis(**axis_kws))
)

points = base.mark_point(color='#151515', fill='#151515', fillOpacity=0.5, size=50).encode(
    y=alt.Y('bill_length_mm', scale=alt.Scale(zero=False), axis=alt.Axis(**axis_kws))
)
line = base.mark_line(strokeWidth=4).encode(
    y=alt.Y('predictions', axis=alt.Axis(title='bill_length_mm', **axis_kws))
)

points + line