# Recipipe Features - Snippets

Collection of simple examples showing the strength of Recipipe.

In [1]:
import numpy as np

import pandas as pd

import recipipe as r

# Named output columns

After applying a transformer, output columns have descriptive names.
In this example output columns are "color=blue" and "color=red" instead of "color_0" and "color_1".

In [2]:
df = pd.DataFrame({"color": ["red", "blue"]})
df

Unnamed: 0,color
0,red
1,blue


In [3]:
r.onehot().fit_transform(df)

Unnamed: 0,color=blue,color=red
0,0.0,1.0
1,1.0,0.0


# Selecting by dtype

In [4]:
df = pd.DataFrame({"color": ["red", "blue"], "value": [1, 2]})
df

Unnamed: 0,color,value
0,red,1
1,blue,2


In [5]:
r.onehot(dtype=object).fit_transform(df)

Unnamed: 0,color=blue,color=red,value
0,0.0,1.0,1
1,1.0,0.0,2


# All numbers dtype

In [6]:
df = pd.DataFrame({"color": ["red", "blue"], "value_int": [1, 2], "value_float": [0.1, 0.2]})
df

Unnamed: 0,color,value_int,value_float
0,red,1,0.1
1,blue,2,0.2


In [7]:
r.scale(dtype=np.number).fit_transform(df)

Unnamed: 0,color,value_int,value_float
0,red,-1.0,-1.0
1,blue,1.0,1.0


# Exclude by dtype

In [8]:
df = pd.DataFrame({"color": ["red", "blue"], "value_int": [1, 2], "value_float": [0.1, 0.2]})
df

Unnamed: 0,color,value_int,value_float
0,red,1,0.1
1,blue,2,0.2


In [9]:
r.scale(dtype=dict(exclude=object)).fit_transform(df)

Unnamed: 0,color,value_int,value_float
0,red,-1.0,-1.0
1,blue,1.0,1.0


# Select using fnmatch

You can use fnmatch patterns in any Recipipe transformer.

In [10]:
df = pd.DataFrame({"feature1": [1, 2], "feature2": [3, 4], "id": ["a", "b"]})
df

Unnamed: 0,feature1,feature2,id
0,1,3,a
1,2,4,b


In [11]:
r.select("feature*").fit_transform(df)

Unnamed: 0,feature1,feature2
0,1,3
1,2,4


# Missing indicator

In [12]:
df = pd.DataFrame({"feature": [1, np.nan, 2, np.nan]})
df

Unnamed: 0,feature
0,1.0
1,
2,2.0
3,


In [13]:
r.indicator().fit_transform(df)

Unnamed: 0,INDICATOR(feature)
0,False
1,True
2,False
3,True


# Extract: text match indicator in several columns

In [14]:
df = pd.DataFrame(dict(c=["tone", "one", "none", "lone", "all", "al"]))
df

Unnamed: 0,c
0,tone
1,one
2,none
3,lone
4,all
5,al


In [15]:
r.extract(pattern=["one", "ll"], indicator=True, col_format="CONTAINS({column},{value})").fit_transform(df)

Unnamed: 0,"CONTAINS(c,one)","CONTAINS(c,ll)"
0,1,0
1,1,0
2,1,0
3,1,0
4,0,1
5,0,0


# Extract: date

In [16]:
df = pd.DataFrame(dict(date=["2012-02", "2013-03"]))
df

Unnamed: 0,date
0,2012-02
1,2013-03


In [17]:
r.recipipe([
    r.extract(pattern=["(\d*)-"], col_format="year", keep_original=True),
    r.extract("date", pattern=["-(\d*)"], col_format="month"),
    r.astype(dtypes=int),
]).fit_transform(df)

Unnamed: 0,month,year
0,2,2012
1,3,2013


# Apply transformer by group

In [18]:
df = pd.DataFrame(dict(name=["a", "a", "a", "b", "b"], value=[0, 1, 2, 0, 1]))
df

Unnamed: 0,name,value
0,a,0
1,a,1
2,a,2
3,b,0
4,b,1


In [19]:
r.groupby("name", r.minmax("value")).fit_transform(df)

Unnamed: 0,name,value
0,a,0.0
1,a,0.5
2,a,1.0
3,b,0.0
4,b,1.0


# Concat columns

In [20]:
df = pd.DataFrame(dict(year=[2020, 2020], month=[1, 2]))
df

Unnamed: 0,year,month
0,2020,1
1,2020,2


In [21]:
r.concat(separator="-").fit_transform(df)

Unnamed: 0,year
0,2020-1
1,2020-2


# Sum columns

In [22]:
df = pd.DataFrame(dict(points_match_1=[1, 2, 3], points_match_2=[3, 4, 5], match_id=["a", "b", "c"]))
df

Unnamed: 0,points_match_1,points_match_2,match_id
0,1,3,a
1,2,4,b
2,3,5,c


In [23]:
r.sum("points_match_*", col_format="points").fit_transform(df)

Unnamed: 0,points,match_id
0,4,a
1,6,b
2,8,c


# Type casting

In [24]:
df = pd.DataFrame(dict(year=["2012", "2013"]))
df

Unnamed: 0,year
0,2012
1,2013


In [25]:
r.astype(dtypes="int").fit_transform(df)

Unnamed: 0,year
0,2012
1,2013
