# 載入需要套件

In [4]:
import os
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset
from evaluation import Evaluation
from mlaas_tools.config_build import config_set
from db_connection.utils import get_conn
from utils import recommendation_all, load_w103, load_w106, load_cust_pop, create_all_feature_pairs, build_feature_tuples

# 確認環境設定

In [None]:
## Configure env
if not os.path.isfile('config.ini'):
    config_set()

# 連接後端資料庫

In [5]:
rawdata_conn = get_conn('edu')

read key file
login as edu-ding


# 設定參數

In [None]:
today = '2019-02-28'
eval_duration = '1m'
dimension = 128
epoch = 10 

# Query W103 顧客交易紀錄

In [6]:
w103_df = load_w103(today, rawdata_conn)
w103_df

Unnamed: 0,cust_no,wm_prod_code,txn_dt,txn_amt,dta_src,deduct_cnt,etl_dt
0,0016boFyZaRofUFAYfXYTA==,2535,2018-04-16,5800000.0,,0.0,2018-11-12
1,008d2RbDuEfP8n5Dc/t20Q==,5906,2017-12-11,525944.0,,0.0,2018-09-25
2,00CC0bunwJX3jgJwycvHvQ==,2446,2017-09-08,11600000.0,,0.0,2018-09-25
3,00EEcEgHkpb6NGcolnAEog==,UF56,2017-09-28,1392000.0,,0.0,2018-09-25
4,00EEcEgHkpb6NGcolnAEog==,UF57,2017-09-28,1793012.0,,0.0,2018-09-25
...,...,...,...,...,...,...,...
1938646,ZzzqJZTdev3aK41clf5i1w==,FCA3,2018-06-21,876670.0,,0.0,2018-11-12
1938647,ZzzqJZTdev3aK41clf5i1w==,NN83,2017-09-12,402636.0,,0.0,2018-09-25
1938648,ZzzqJZTdev3aK41clf5i1w==,FE41,2018-04-12,928000.0,,0.0,2018-09-25
1938649,ZzzqJZTdev3aK41clf5i1w==,FE41,2018-04-12,928000.0,,0.0,2018-09-25


# Load cm_customer_m 顧客Features

In [7]:
cm_customer_m_df = load_cust_pop(today, rawdata_conn)

Unnamed: 0,cust_no,data_dt,age,gender_code,gender_code1,gender_code2,gender_code0,cust_vintage,income_range_code,income_range_code1,income_range_code2,income_range_code3,income_range_code4,income_range_code0
0,0016boFyZaRofUFAYfXYTA==,2019-08-01,54.0,F,0.0,1.0,0.0,0.005474,2,0.0,1.0,0.0,0.0,0.0
1,008d2RbDuEfP8n5Dc/t20Q==,2019-01-02,53.0,F,0.0,1.0,0.0,0.005474,1,1.0,0.0,0.0,0.0,0.0
2,00CC0bunwJX3jgJwycvHvQ==,2019-08-01,48.0,M,1.0,0.0,0.0,0.005474,1,1.0,0.0,0.0,0.0,0.0
3,00EEcEgHkpb6NGcolnAEog==,2019-08-01,54.0,F,0.0,1.0,0.0,0.005474,3,0.0,0.0,1.0,0.0,0.0
4,00FmcV87beY8s7OI0Xjtkg==,2019-06-01,47.0,F,0.0,1.0,0.0,0.005474,2,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66301,zZWE1Zhl7DBlGFGBfXSSog==,2019-07-01,50.0,F,0.0,1.0,0.0,0.005474,1,1.0,0.0,0.0,0.0,0.0
66302,ZZXAkIEU4xzGE95pBRb6sg==,2017-07-01,69.0,F,0.0,1.0,0.0,0.005474,1,1.0,0.0,0.0,0.0,0.0
66303,ZzyAUxhNwdD/5F84b/+4ww==,2019-02-02,50.0,F,0.0,1.0,0.0,0.005474,1,1.0,0.0,0.0,0.0,0.0
66304,ZzYD6HPVyldQFpsaCR5/Hg==,2019-07-01,52.0,M,1.0,0.0,0.0,0.005474,2,0.0,1.0,0.0,0.0,0.0


# Intersection of w103 & cm_customer_m wrt cust_no

In [None]:
_filter = cm_customer_m_df.cust_no.isin(w103_df['cust_no'].tolist())
cust_df_filter = cm_customer_m_df[_filter]
_selected_col = ['cust_no', 'age', 'gender_code', 'cust_vintage', 'income_range_code']
cust_df_filter = cust_df_filter[_selected_col]
cust_df_filter

# Load W106 基金Features

In [8]:
w106_df = load_w106(rawdata_conn)

# Intersection of w103 & w106 wrt wm_prod_code

In [12]:
_filter = w106_df.wm_prod_code.isin(w103_df['wm_prod_code'].tolist())
w106_df_filter = w106_df[_filter]
_selected_col = ['wm_prod_code','prod_detail_type_code','prod_ccy','prod_risk_code','can_rcmd_ind']
w106_df_filter = w106_df_filter[_selected_col]
w106_df_filter

Unnamed: 0,wm_prod_code,prod_detail_type_code,prod_ccy,prod_risk_code,can_rcmd_ind
0,AAC4,FNDF,AUD,RR3,1
2,AF46,FNDF,EUR,RR4,0
3,AO14,FNDF,USD,RR2,1
5,GG29,FNDF,EUR,RR3,1
6,GG48,FNDF,USD,RR4,0
...,...,...,...,...,...
4160,3041,FNDD,TWD,RR3,1
4161,3305,FNDD,TWD,RR4,1
4162,3808,FNDD,TWD,RR4,1
4164,4826,FNDD,CNY,RR3,1


# 建立User features pairs

In [13]:

user_fts = create_all_feature_pairs(cust_df_filter)
user_fts

['data_dt:2019-08-01',
 'data_dt:2019-01-02',
 'data_dt:2019-06-01',
 'data_dt:2019-07-01',
 'data_dt:2019-04-01',
 'data_dt:2018-12-01',
 'data_dt:2019-05-01',
 'data_dt:2019-03-01',
 'data_dt:2018-11-02',
 'data_dt:2019-02-02',
 'data_dt:2018-10-02',
 'data_dt:2018-09-01',
 'data_dt:2018-03-01',
 'data_dt:2018-02-01',
 'data_dt:2018-08-01',
 'data_dt:2016-11-01',
 'data_dt:2017-02-02',
 'data_dt:2017-08-01',
 'data_dt:2018-01-02',
 'data_dt:2018-05-02',
 'data_dt:2016-12-01',
 'data_dt:2017-11-01',
 'data_dt:2018-06-05',
 'data_dt:2017-07-01',
 'data_dt:2017-01-01',
 'data_dt:2016-10-03',
 'data_dt:2017-10-01',
 'data_dt:2017-09-01',
 'data_dt:2017-12-01',
 'data_dt:2017-05-02',
 'data_dt:2017-06-01',
 'data_dt:2017-04-03',
 'data_dt:2018-04-01',
 'data_dt:2017-03-01',
 'data_dt:2018-07-01',
 'age:54.0',
 'age:53.0',
 'age:48.0',
 'age:47.0',
 'age:43.0',
 'age:25.0',
 'age:36.0',
 'age:52.0',
 'age:49.0',
 'age:39.0',
 'age:40.0',
 'age:27.0',
 'age:44.0',
 'age:50.0',
 'age:56.0',


# 建立Item features pairs

In [14]:
# wm_prod_code:2535
# wm_prod_code:JJ15
# wm_prod_code:5704
# wm_prod_code:KK41
# wm_prod_code:ED64

item_fts = create_all_feature_pairs(w106_df_filter)
item_fts 

['prod_detail_type_code:FNDF',
 'prod_detail_type_code:FNDD',
 'prod_ccy:AUD',
 'prod_ccy:EUR',
 'prod_ccy:USD',
 'prod_ccy:TWD',
 'prod_ccy:SGD',
 'prod_ccy:JPY',
 'prod_ccy:CNY',
 'prod_ccy:SEK',
 'prod_ccy:HKD',
 'prod_ccy:GBP',
 'prod_ccy:ZAR',
 'prod_ccy:CHF',
 'prod_ccy:NZD',
 'prod_ccy:CAD',
 'prod_risk_code:RR3',
 'prod_risk_code:RR4',
 'prod_risk_code:RR2',
 'prod_risk_code:RR5',
 'prod_risk_code:RR1',
 'can_rcmd_ind:1',
 'can_rcmd_ind:0']

# Fit dataset with LightFM datasets function

In [15]:
#這邊會使用user featreues & item features去 fit LightFM dataset format 

dataset1 = Dataset()
dataset1.fit(
        w103_df['cust_no'].unique(), # all the users
        w103_df['wm_prod_code'].unique(), # all the items
        user_features = user_fts,
        item_features = item_fts
)

# 產生 user/item tuple

In [16]:
#user/item tuple: (item id, {feature name: feature weight})


# [('AAC4', ['prod_ccy:AUD']),
#  ('AF46', ['prod_ccy:EUR']),
#  ('AO14', ['prod_ccy:USD']),
#  ('GG29', ['prod_ccy:EUR']),
#  ('GG48', ['prod_ccy:USD'])]

user_tuples = build_feature_tuples(cust_df_filter)
user_features = dataset1.build_user_features(user_tuples, normalize= False)
item_tuples = build_feature_tuples(w106_df_filter)
item_features = dataset1.build_item_features(item_tuples, normalize= False)

# Create user-item interation & weight matrix

In [17]:
(interactions, weights) = dataset1.build_interactions([(x[0], x[1], x[3]) for x in w103_df.values ])


In [18]:
## Get Id mappings 
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()

# 決定是否使用 meta data

In [None]:
user_meta_ft = False
item_meta_ft = False

In [None]:
if not user_meta_ft: user_meta_ft = None
if not item_meta_ft: item_meta_ft = None   

# 模型訓練

In [21]:
model = LightFM(no_components=dimension, loss='warp')
model.fit(interactions, # spase matrix representing whether user u and item i interacted
      user_features= user_features, # we have built the sparse matrix above
      item_features= item_features, # we have built the sparse matrix above
      sample_weight= weights, # spase matrix representing how much value to give to user u and item i inetraction: i.e ratings
      epochs=epoch) 

<lightfm.lightfm.LightFM at 0x7fad2d2caf90>

# 模型預測

In [22]:
print("Predicting...")
user_list = w103_df['cust_no'].unique().tolist()
pred = recommendation_all(model, interactions, user_list, user_id_map, item_id_map)

  0%|          | 0/72020 [00:00<?, ?it/s]

Predicting...


100%|██████████| 72020/72020 [02:20<00:00, 512.69it/s]


# Evaluation

In [24]:
print("Evaluating Results...")
duration = 5
evaluation = Evaluation(today, pred, duration)
score = evaluation.results()
print(f'Today: {today} Mean Precision: {score}\n')

Evaluating Results...
read key file
login as edu-ding


100%|██████████| 72020/72020 [00:00<00:00, 674101.79it/s]

Today: 2019-02-28 Mean Precision: 0.1267981116356346




