## knock 057 カテゴリ同士を組合わせた新たなカテゴリを作成する

In [1]:
import polars as pl
pl.Config.set_tbl_cols(-1)# 列が省略されないようにする

polars.config.Config

### データを読み込む

In [2]:
df_customer = pl.read_csv("../docker/work/data/customer.csv")
display(df_customer.head())

customer_id,customer_name,gender_cd,gender,birth_day,age,postal_cd,address,application_store_cd,application_date,status_cd
str,str,i64,str,str,i64,str,str,str,i64,str
"""CS021313000114…","""大野 あや子""",1,"""女性""","""1981-04-29""",37,"""259-1113""","""神奈川県伊勢原市粟窪****…","""S14021""",20150905,"""0-00000000-0"""
"""CS037613000071…","""六角 雅彦""",9,"""不明""","""1952-04-01""",66,"""136-0076""","""東京都江東区南砂******…","""S13037""",20150414,"""0-00000000-0"""
"""CS031415000172…","""宇多田 貴美子""",1,"""女性""","""1976-10-04""",42,"""151-0053""","""東京都渋谷区代々木*****…","""S13031""",20150529,"""D-20100325-C"""
"""CS028811000001…","""堀井 かおり""",1,"""女性""","""1933-03-27""",86,"""245-0016""","""神奈川県横浜市泉区和泉町**…","""S14028""",20160115,"""0-00000000-0"""
"""CS001215000145…","""田崎 美紀""",1,"""女性""","""1995-03-29""",24,"""144-0055""","""東京都大田区仲六郷*****…","""S13001""",20170605,"""6-20090929-2"""


### ノック

In [4]:
(
    df_customer
    # 使用する列を抽出する(大規模データの場合メモリ使用量を節約する効果がある)
    .select([
        pl.col("customer_id"),
        pl.col("birth_day"),
        pl.col("gender_cd"),
        pl.col("age")
    ])
    # 10歳ごとの階級を計算する
    .with_columns(
        ((pl.col("age") / 10).floor() * 10).cast(pl.Int64).alias("era")
    )
    # 60歳以上の階級を60台に寄せる
    .with_columns(
        pl.col("era").replace(mapping = {70: 60, 80: 60, 90:60, 100:60})
    )
    # 性別コードと年齢階級を組合わせた新しい列を作成する
    .with_columns(
        (pl.col("gender_cd").cast(pl.Utf8) + pl.col("era").cast(pl.Utf8)).alias("gender_era")
    )
    # 性別コードと年齢は使用しないので削除する
    .drop(columns = ["gender_cd", "age"])
    # 先頭10行
    .head(n = 10)
)

customer_id,birth_day,era,gender_era
str,str,i64,str
"""CS021313000114…","""1981-04-29""",30,"""130"""
"""CS037613000071…","""1952-04-01""",60,"""960"""
"""CS031415000172…","""1976-10-04""",40,"""140"""
"""CS028811000001…","""1933-03-27""",60,"""160"""
"""CS001215000145…","""1995-03-29""",20,"""120"""
"""CS020401000016…","""1974-09-15""",40,"""040"""
"""CS015414000103…","""1977-08-09""",40,"""140"""
"""CS029403000008…","""1973-08-17""",40,"""040"""
"""CS015804000004…","""1931-05-02""",60,"""060"""
"""CS033513000180…","""1962-07-11""",50,"""150"""
