## Python versus R

### Pandas

```R
library("tidymodels")

linear_reg()
```

In [None]:
import pandas as pd

In [None]:
# from pandas import read_csv

In [None]:
pd.read_csv()

In [None]:
# pd.read_csv()

### Plotnine 

https://plotnine.readthedocs.io/en/latest/

In [None]:
from plotnine import *  # frowned upon in Python
from plotnine.data import mtcars

In [None]:
mtcars

In [None]:
(
    ggplot(mtcars, aes(x = "wt", y = "mpg", color = "factor(gear)")) 
     + geom_point()
     + stat_smooth(method="lm")
     + facet_wrap("~gear")
)

### Altair

https://altair-viz.github.io/

In [None]:
from palmerpenguins import load_penguins

In [None]:
import altair as alt

In [None]:
penguins = load_penguins()

penguins.head()

In [None]:
# pipe-ing uses dots instead of |> 

alt.Chart(penguins).mark_point().encode(
    x = alt.X('bill_depth_mm', scale=alt.Scale(zero=False)),
    y = alt.Y('bill_length_mm', scale=alt.Scale(zero=False)),
    color = alt.Color('species'),
    tooltip = [alt.Tooltip('species'), alt.Tooltip('sex')]
)

## Scikit-Learn

In [None]:
penguins = load_penguins()

penguins.head()

In [None]:
penguins2 = (
    penguins
    .dropna()
    .pipe(lambda df_: df_[df_['species']=="Adelie"])
)

penguins2.head()

```R
mod = linear_reg() |> set_engine("lm")

mod_fit = mod |> fit(bill_length_mm ~ bill_depth_mm, data = penguins)
```

In [None]:
list_of_stuff = [1, 2, 5, 6]
list_of_stuff

In [None]:
features = ['bill_depth_mm']
features

In [None]:
outcome = ['bill_length_mm']

In [None]:
# in R: X = penguins2 |> select(bill_depth_mm)

X = penguins2[features] # capitalized
y = penguins2[['bill_length_mm']]

X.head()

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
mod = LinearRegression()

mod.fit(X, y)

mod

In [None]:
# in R: tidy(mod_fit)

mod.intercept_

In [None]:
X_to_predict = pd.DataFrame(
    {"bill_depth_mm": [19]}
)

X_to_predict

In [None]:
# augment, predict
# in R: mod |> predict(new_data = penguins)

mod.predict(X_to_predict)

In [None]:
from plotnine import *

p = (
    ggplot(penguins2, aes(x = "bill_depth_mm", y = "bill_length_mm")) + geom_point()
)

p.draw()

### Transforming input data

In [None]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder

In [None]:
features = ['bill_depth_mm', 'species']
features

In [None]:
penguins_no_missing = penguins.dropna()

X = penguins_no_missing[features] # capitalized
y = penguins_no_missing[['bill_length_mm']]

X.head()

In [None]:
ct = make_column_transformer(
    ['passthrough', ['bill_depth_mm']],
    [OneHotEncoder(drop=['Adelie']), ['species']]
)

In [None]:
ct

In [None]:
ct.fit_transform(X)[:5]

# try fit then transform

### Pipelines

In [None]:
from sklearn.pipeline import make_pipeline

In [None]:
pl = make_pipeline(
    ct,
    LinearRegression()
)

In [None]:
pl.fit(X, y)

#### Extract steps from pipeline

In [None]:
pl.named_steps

In [None]:
mod = pl['linearregression']

In [None]:
mod.intercept_

In [None]:
X_to_predict = pd.DataFrame(
    {"bill_depth_mm": [19],
     "species": ["Adelie"]}
)

In [None]:
pl.predict(X_to_predict)

In [None]:
mod_from_pipeline = pl['linearregression']
mod_from_pipeline.intercept_