# 基本設定

In [1]:
import setting

import pandas as pd
import numpy as np
import math
import pathlib as Path

import scipy as sc
import sklearn
import pickle
import matplotlib.pyplot as plt
import japanize_matplotlib
import seaborn as sb
sb.set(font='IPAexGothic')

# データ格納先設定
data_path = Path.Path('../data')
# 接頭辞
prefix = 'ana101'

# データ抽出
> 顧客情報（df_customer.pkl）
> 決済情報（df_receipt.pkl）

In [2]:
# 顧客情報
df_customer = pd.read_pickle(data_path / 'df_customer.pkl')
# 決済情報
df_receipt = pd.read_pickle(data_path / 'df_receipt.pkl')

# 名寄せ処理
> 顧客名, 住所（昇順）、合計金額（降順）、顧客ＩＤ（昇順）

In [3]:
# 顧客情報に売上金額合計を結合
key = ['customer_id']

_df = df_customer[['customer_name', 'address', 'customer_id']].merge(
    
    df_receipt[key + ['amount']].groupby(key)['amount'].agg([('sum_amt', 'sum')]),
    
    on=key, 
    how='left'
    
# 売上実績なし先は0円
).fillna({'sum_amt':0}).sort_values(
    
    ['customer_name', 'address', 'sum_amt', 'customer_id'], ascending=[True, True, False, True]
 
# 重複削除
).drop_duplicates(['customer_name', 'address'])

In [4]:
print(df_customer.shape, _df.shape)

(21971, 11) (21941, 4)


In [5]:
# 名寄せ顧客IDを付与
key = ['customer_name', 'address']

_cus = df_customer.merge(
    
    # 結合キー + 名寄せ顧客ID
    _df[key + ['customer_id']].rename(columns={'customer_id':'customer_id_nys'}), 
    on=key, 
    how='left'

# 列順を成形
)[['customer_id_nys'] + [i for i in df_customer.columns]]

In [6]:
_cus.head()

Unnamed: 0,customer_id_nys,customer_id,customer_name,gender_cd,gender,birth_day,age,postal_cd,address,application_store_cd,application_date,status_cd
0,CS021313000114,CS021313000114,大野 あや子,1,女性,1981-04-29,37,259-1113,神奈川県伊勢原市粟窪**********,S14021,20150905,0-00000000-0
1,CS037613000071,CS037613000071,六角 雅彦,9,不明,1952-04-01,66,136-0076,東京都江東区南砂**********,S13037,20150414,0-00000000-0
2,CS031415000172,CS031415000172,宇多田 貴美子,1,女性,1976-10-04,42,151-0053,東京都渋谷区代々木**********,S13031,20150529,D-20100325-C
3,CS028811000001,CS028811000001,堀井 かおり,1,女性,1933-03-27,86,245-0016,神奈川県横浜市泉区和泉町**********,S14028,20160115,0-00000000-0
4,CS001215000145,CS001215000145,田崎 美紀,1,女性,1995-03-29,24,144-0055,東京都大田区仲六郷**********,S13001,20170605,6-20090929-2


# データ保存

In [7]:
_cus.to_pickle(data_path / ('%s_df_customer_nys.pkl' % prefix))