# 第5章 関数を適用(apply)する

## 5.1 関数の初歩

In [1]:
def my_function():
    pass

In [2]:
def my_sq(x):
    return x ** 2

def avg_2(x, y):
    return (x + y)/ 2

In [3]:
my_calc_1 = my_sq(4)
print(my_calc_1)

my_calc_2 = avg_2(10,20)
print(my_calc_2)

16
15.0


## applyの基本

In [5]:
import polars as pl
df = pl.DataFrame({
    "a": [10, 20, 30],
    "b": [20, 30, 40]
    })
print(df)

shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 20  │
│ 20  ┆ 30  │
│ 30  ┆ 40  │
└─────┴─────┘


In [6]:
print(df["a"] ** 2)

shape: (3,)
Series: 'a' [f64]
[
	100.0
	400.0
	900.0
]


### 5.2.1 関数をSeriesに適用する

In [7]:
print(type(df["a"]))
print(type(df[0, :]))

<class 'polars.series.series.Series'>
<class 'polars.dataframe.frame.DataFrame'>


In [8]:
sq = df["a"].apply(my_sq)
print(sq)

shape: (3,)
Series: 'a' [i64]
[
	100
	400
	900
]


  sq = df["a"].apply(my_sq)
Series.map_elements is significantly slower than the native series API.
Only use if you absolutely CANNOT implement your logic otherwise.
In this case, you can replace your `map_elements` with the following:
  - s.map_elements(my_sq)
  + s ** 2

  sq = df["a"].apply(my_sq)


In [9]:
def my_exp(x, e):
    return x ** e

In [10]:
cubed = my_exp(2, 3)
print(cubed)

8


In [14]:
# polars.Series.apply
# Deprecated since version 0.19.0: This method has been renamed to Series.map_elements().
ex = df["a"].map_elements(lambda x: my_exp(x, 2))
print(ex)

ex = df["a"].map_elements(lambda x: my_exp(x, 3))
print(ex)

shape: (3,)
Series: 'a' [i64]
[
	100
	400
	900
]
shape: (3,)
Series: 'a' [i64]
[
	1000
	8000
	27000
]


### 5.2.2 関数をDataFrameに適用する

In [15]:
df = pl.DataFrame({
    "a": [10, 20, 30],
    "b": [20, 30, 40]
    })
print(df)

shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 20  │
│ 20  ┆ 30  │
│ 30  ┆ 40  │
└─────┴─────┘


In [16]:
def print_me(x):
    print(x)

#### 5.2.2.1 列ごとの演算
polarsのapplyやmap_rowsには列ごとの処理が無いようなので、この節は省略

In [17]:
# polars.DataFrame.apply
# Deprecated since version 0.19.0: This method has been renamed to DataFrame.map_rows().
# pandasのapplyと様子が異なる
df.map_rows(lambda t: (print_me(t[0]), print_me(t[1])))

10
20
20
30
30
40


column_0,column_1
null,null
,
,
,


#### 行ごとの演算

In [21]:
def avg_2_apply(row):
    x = row[0]
    y = row[1]
    return (x + y)/2

In [25]:
print(df.apply(lambda row: avg_2_apply(row)))

shape: (3, 1)
┌──────┐
│ map  │
│ ---  │
│ f64  │
╞══════╡
│ 15.0 │
│ 25.0 │
│ 35.0 │
└──────┘


  print(df.apply(lambda row: avg_2_apply(row)))


In [26]:
print(df.map_rows(lambda row: avg_2_apply(row)))

shape: (3, 1)
┌──────┐
│ map  │
│ ---  │
│ f64  │
╞══════╡
│ 15.0 │
│ 25.0 │
│ 35.0 │
└──────┘


## 5.3 関数のベクトル化

In [28]:
df = pl.DataFrame({
    "a": [10, 20, 30],
    "b": [20, 30, 40]
    })
print(df)

shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 20  │
│ 20  ┆ 30  │
│ 30  ┆ 40  │
└─────┴─────┘


In [27]:
def avg_2(x, y):
    return (x + y)/2

In [29]:
print(avg_2(df["a"], df["b"]))

shape: (3,)
Series: 'a' [f64]
[
	15.0
	25.0
	35.0
]


In [31]:
print(df.apply(lambda row: avg_2(row[0], row[1])))

shape: (3, 1)
┌──────┐
│ map  │
│ ---  │
│ f64  │
╞══════╡
│ 15.0 │
│ 25.0 │
│ 35.0 │
└──────┘


  print(df.apply(lambda row: avg_2(row[0], row[1])))


In [30]:
print(df.map_rows(lambda row: avg_2(row[0], row[1])))

shape: (3, 1)
┌──────┐
│ map  │
│ ---  │
│ f64  │
╞══════╡
│ 15.0 │
│ 25.0 │
│ 35.0 │
└──────┘


In [34]:
import numpy as np

def avg_2_mod(x, y):
    if (x == 20):
        return np.NaN
    else:
        return (x + y) / 2

In [36]:
# これはpandas, polrasともにエラーになる
#print(avg_2_mod(df["a"], df["b"]))

In [37]:
print(avg_2_mod(10, 20))

print(avg_2_mod(20, 30))

15.0
nan


### 5.3.1 NumPyを使ったベクトル化

In [39]:
avg_mod_vec = np.vectorize(avg_2_mod)

print(avg_mod_vec(df["a"], df["b"]))

[15. nan 35.]


In [40]:
@np.vectorize
def v_avg_2_mod(x, y):
    if (x == 20):
        return (np.NaN)
    else:
        return (x + y) / 2

print(v_avg_2_mod(df["a"], df["b"]))

[15. nan 35.]


### 5.3.2 numbaを使ったベクトル化

In [41]:
import numba

@numba.vectorize
def v_avg_2_numba(x, y):
    if (int(x) == 20):
        return (np.NaN)
    else:
        return (x + y) / 2

In [42]:
print( v_avg_2_numba(df["a"].to_numpy(), df["b"].to_numpy()) )

[15. nan 35.]


## 5.4 ラムダ関数

In [44]:
df = pl.DataFrame({
    "a": [10, 20, 30],
    "b": [20, 30, 40]
    })
print(df)

def my_sq(x):
    return x ** 2

# pandasの場合、関数だけを引数に取ることができるが、polarsの場合Closuresのように式を書く必要がある
# polarsの場合、何をしたいかを明示する必要がある
df = df.with_columns([
    df["a"].apply(lambda x: my_sq(x)).alias("a_apply")
    ])
print(df)

df = df.with_columns([
    df["a"].map_elements(lambda x: my_sq(x)).alias("a_map_elements")
    ])
print(df)


shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 20  │
│ 20  ┆ 30  │
│ 30  ┆ 40  │
└─────┴─────┘
shape: (3, 3)
┌─────┬─────┬─────────┐
│ a   ┆ b   ┆ a_apply │
│ --- ┆ --- ┆ ---     │
│ i64 ┆ i64 ┆ i64     │
╞═════╪═════╪═════════╡
│ 10  ┆ 20  ┆ 100     │
│ 20  ┆ 30  ┆ 400     │
│ 30  ┆ 40  ┆ 900     │
└─────┴─────┴─────────┘
shape: (3, 4)
┌─────┬─────┬─────────┬────────────────┐
│ a   ┆ b   ┆ a_apply ┆ a_map_elements │
│ --- ┆ --- ┆ ---     ┆ ---            │
│ i64 ┆ i64 ┆ i64     ┆ i64            │
╞═════╪═════╪═════════╪════════════════╡
│ 10  ┆ 20  ┆ 100     ┆ 100            │
│ 20  ┆ 30  ┆ 400     ┆ 400            │
│ 30  ┆ 40  ┆ 900     ┆ 900            │
└─────┴─────┴─────────┴────────────────┘


  df["a"].apply(lambda x: my_sq(x)).alias("a_apply")
