# 基本設定

In [1]:
import setting

import pandas as pd
import numpy as np
import math
import pathlib as Path

import scipy as sc
import sklearn
import pickle
import matplotlib.pyplot as plt
import japanize_matplotlib
import seaborn as sb
sb.set(font='IPAexGothic')

# データ格納先設定
data_path = Path.Path('../data')
# 接頭辞
prefix = 'ana302'

# データ抽出
> 軸データ（ana103_base_smpl.pkl）\
> 顧客加工情報（ana202_fix_cus.pkl）

In [2]:
# 軸データ
base = pd.read_pickle(data_path / 'ana103_base_smpl.pkl')
# 顧客情報
df = pd.read_pickle(data_path / 'ana202_fix_cus.pkl')

In [3]:
base.shape

(3364, 3)

# データ結合

In [4]:
# 軸データに外部結合
_df = base.merge(df.reset_index(), on='customer_id_nys', how='left')

In [5]:
_df['customer_id_nys'].nunique()

3364

# 特徴量作成

In [6]:
# 性別と年代をダミー変数化
ftr_cus = pd.concat(
    [
        # 顧客ID
        _df['customer_id_nys'], 
        
        # 性別、年代をダミー変数化
        pd.get_dummies(_df[['gender', 'kbn_age']])
    
    ], axis=1

).set_index('customer_id_nys')

# 説明変数に接頭辞「FTR」を付与
ftr_cus.columns = ['FTR_%s' %col for col in ftr_cus.columns]
ftr_cus

Unnamed: 0_level_0,FTR_gender_不明,FTR_gender_女性,FTR_gender_男性,FTR_kbn_age_10代,FTR_kbn_age_20代,FTR_kbn_age_30代,FTR_kbn_age_40代,FTR_kbn_age_50代,FTR_kbn_age_60代,FTR_kbn_age_70代,FTR_kbn_age_80代,FTR_kbn_age_90代
customer_id_nys,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
CS005415000212,0,1,0,0,0,0,1,0,0,0,0,0
CS038314000063,1,0,0,0,0,0,1,0,0,0,0,0
CS028415000181,0,1,0,0,0,0,1,0,0,0,0,0
CS014514000027,0,1,0,0,0,0,0,1,0,0,0,0
CS025215000003,0,1,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
CS022514000068,0,1,0,0,0,0,0,1,0,0,0,0
CS012414000086,0,1,0,0,0,0,1,0,0,0,0,0
CS023514000029,0,1,0,0,0,0,0,1,0,0,0,0
CS025515000053,0,1,0,0,0,0,0,1,0,0,0,0


# データ保存

In [7]:
ftr_cus.to_pickle(data_path / ('%s_ftr_cus.pkl' % prefix))