In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option("display.max_rows", 5)

データの読み込みと列指定（メソッドチェーン）

In [4]:
df = \
sns\
.load_dataset("diamonds")\
.filter(["depth", "table", "color"])

In [5]:
df

Unnamed: 0,depth,table,color
0,61.5,55.0,E
1,59.8,61.0,E
...,...,...,...
53938,61.0,58.0,H
53939,62.2,55.0,D


color列を更新、new_1列とnew_2列を追加

In [7]:
df.assign(
    color ="color",
    new_1 = 1,
    new_2 = 2
)

Unnamed: 0,depth,table,color,new_1,new_2
0,61.5,55.0,color,1,2
1,59.8,61.0,color,1,2
...,...,...,...,...,...
53938,61.0,58.0,color,1,2
53939,62.2,55.0,color,1,2


普通の関数

2 * depth + 5 * tableをする関数定義

In [8]:
def func_1(x):
    return 2 * x["depth"] + 5 * x["table"]

関数による出力

In [9]:
func_1(df)

0        398.0
1        424.6
         ...  
53938    412.0
53939    399.4
Length: 53940, dtype: float64

new列に出力であるベクトルを追加

In [10]:
df.assign(new =func_1)

Unnamed: 0,depth,table,color,new
0,61.5,55.0,E,398.0
1,59.8,61.0,E,424.6
...,...,...,...,...
53938,61.0,58.0,H,412.0
53939,62.2,55.0,D,399.4


lambda式

2 * depth + 5 * tableをするlambda式

In [11]:
lambda x: 2 * x["depth"] + 5 * x["table"]

<function __main__.<lambda>(x)>

new列にlambda式で2 * depth + 5 * tableを追加

In [12]:
df.assign(new = lambda x: 2 * x["depth"] + 5 * x["table"])

Unnamed: 0,depth,table,color,new
0,61.5,55.0,E,398.0
1,59.8,61.0,E,424.6
...,...,...,...,...
53938,61.0,58.0,H,412.0
53939,62.2,55.0,D,399.4


条件による分岐

In [13]:
df

Unnamed: 0,depth,table,color
0,61.5,55.0,E
1,59.8,61.0,E
...,...,...,...
53938,61.0,58.0,H
53939,62.2,55.0,D


color列が"E"→"E"をnew列に追加<br>
それ以外→"not_E"をnew列に追加

In [17]:
df.assign(
    new = lambda x: np.where(
        x["color"] == "E",
        "E",
        "not_E"
    )
)

Unnamed: 0,depth,table,color,new
0,61.5,55.0,E,E
1,59.8,61.0,E,E
...,...,...,...,...
53938,61.0,58.0,H,not_E
53939,62.2,55.0,D,not_E


In [18]:
df["color"]

0        E
1        E
        ..
53938    H
53939    D
Name: color, Length: 53940, dtype: category
Categories (7, object): ['D', 'E', 'F', 'G', 'H', 'I', 'J']

In [19]:
np.where(df["color"] == "E", "E", "not_E")

array(['E', 'E', 'E', ..., 'not_E', 'not_E', 'not_E'], dtype='<U5')

goodとbadの比較

In [20]:
df = \
sns\
.load_dataset("diamonds")\
.filter(["depth", "table", "color"])

In [22]:
df

Unnamed: 0,depth,table,color
0,61.5,55.0,E
1,59.8,61.0,E
...,...,...,...
53938,61.0,58.0,H
53939,62.2,55.0,D


good

In [21]:
df.assign(table = lambda x: x["table"] + 1)

Unnamed: 0,depth,table,color
0,61.5,56.0,E
1,59.8,62.0,E
...,...,...,...
53938,61.0,59.0,H
53939,62.2,56.0,D


bad

In [24]:
df["table"] = df["table"] + 1

In [25]:
df

Unnamed: 0,depth,table,color
0,61.5,56.0,E
1,59.8,62.0,E
...,...,...,...
53938,61.0,59.0,H
53939,62.2,56.0,D


In [26]:
df = df.assign(table = lambda x: x["table"] + 1)

In [27]:
df

Unnamed: 0,depth,table,color
0,61.5,57.0,E
1,59.8,63.0,E
...,...,...,...
53938,61.0,60.0,H
53939,62.2,57.0,D
