## knock 069 集計結果から割合を計算する

In [7]:
import polars as pl
pl.Config.set_tbl_cols(-1)# 列の表示が省略されないようにする
import polars.selectors as cs# 抽出条件のプリセット

### データを読み込む

In [8]:
df_receipt = pl.read_csv("../docker/work/data/receipt.csv")
display(df_receipt.head())

df_product = pl.read_csv("../docker/work/data/product.csv")
display(df_product.head())

sales_ymd,sales_epoch,store_cd,receipt_no,receipt_sub_no,customer_id,product_cd,quantity,amount
i64,i64,str,i64,i64,str,str,i64,i64
20181103,1541203200,"""S14006""",112,1,"""CS006214000001…","""P070305012""",1,158
20181118,1542499200,"""S13008""",1132,2,"""CS008415000097…","""P070701017""",1,81
20170712,1499817600,"""S14028""",1102,1,"""CS028414000014…","""P060101005""",1,170
20190205,1549324800,"""S14042""",1132,1,"""ZZ000000000000…","""P050301001""",1,25
20180821,1534809600,"""S14025""",1102,2,"""CS025415000050…","""P060102007""",1,90


product_cd,category_major_cd,category_medium_cd,category_small_cd,unit_price,unit_cost
str,i64,i64,i64,i64,i64
"""P040101001""",4,401,40101,198,149
"""P040101002""",4,401,40101,218,164
"""P040101003""",4,401,40101,230,173
"""P040101004""",4,401,40101,248,186
"""P040101005""",4,401,40101,268,201


### ノック

In [9]:
(
    df_receipt
    # 使用する列を抽出する(大規模データの場合メモリ使用量を節約する効果がある)
    .select(
        pl.col("customer_id"),
        pl.col("product_cd"),
        pl.col("amount")
    )
    # 売上明細に商品カテゴリ大区分を追加する
    .join(
        other = df_product[["product_cd", "category_major_cd"]],
        on = "product_cd",
        how = "left"
    )
    # 顧客別の売上高と商品カテゴリ大区分7(瓶詰め缶詰)の売上高を計算する
    .group_by(by = "customer_id")
    .agg([
        pl.col("amount").sum().alias("sum_all"),
        pl.col("amount").filter( pl.col("category_major_cd") == 7 ).sum().alias("sum_a7")
    ])
    # 商品カテゴリ大区分7(瓶詰め缶詰)の売上高比率を計算する
    .with_columns(
        (pl.col("sum_a7") / pl.col("sum_all")).alias("sales_rate")
    )
    # 先頭10行
    .head(n = 10)
)

customer_id,sum_all,sum_a7
str,i64,i64
"""CS002413000071…",198,198
"""CS004415000607…",200,200
"""CS007515000232…",5605,2136
"""CS023515000126…",2079,218
"""CS040513000111…",968,0
"""CS038515000114…",3164,1076
"""CS029214000018…",1685,1087
"""CS015415000018…",8218,5245
"""CS019214000005…",4396,3692
"""CS004115000010…",4830,2650
