In [90]:
import numpy as np
import pandas as pd
import datetime

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

## 依照參數回傳每人的資料 
輸入格式(資料名(combined), 年, 季(1 -> 1~3月 ...))

In [2]:
def get_season(combined, y, s):
    if s == 4:
        lower = str(y)+'-10-01'
        upper = str(y+1)+'-01-01'
    elif s == 3:
        lower = str(y)+'-07-01'
        upper = str(y)+'-10-01'
    elif s == 1:
        lower = str(y)+'-01-01'
        upper = str(y)+'-04-01'
    elif s == 2:
        lower = str(y)+'-04-01'
        upper = str(y)+'-07-01'
    else:
        print('s must be 1, 2, 3 or 4')
        return 0
    print(lower, upper)
    
    ids = combined['UnifiedUserId'].unique()
    combined = combined[(combined['OrderDateTime'] < upper) & (combined['OrderDateTime'] > lower)]
  
    lst = []
    for i in ids:
        ele = {'UnifiedUserId': i,
               'MemberCardLevel': 0,
               'Gender': 'Ungiven',
               'Age': 0,
               'order': 0, #本季主單數
               'orderSlave': 0, #本季子單數
               'low': 0, #本季低價位商品購買數
               'normal': 0, #本季中價位商品購買數
               'high': 0, #本季高價位商品購買數
               'sales': 0,# 本季總消費
               'SalePageId':""
                }
        lst.append(ele)
        
    re = pd.DataFrame(lst).set_index('UnifiedUserId').copy()
    
    for index, r in combined.iterrows():
        idx = r['UnifiedUserId']
        re.loc.__setitem__((idx, ('Gender')), r['Gender'])
        re.loc.__setitem__((idx, ('MemberCardLevel')), r['MemberCardLevel'])
        re.loc.__setitem__((idx, ('Age')), int((datetime.datetime.today() - datetime.datetime.strptime(r['Birthday'], '%Y-%m-%d')).days/365))
        re.loc.__setitem__((idx, ('orderSlave')), r['TsCount'] + re.loc[idx]['orderSlave'])
        re.loc.__setitem__((idx, ('order')), 1 + re.loc[idx]['order'])
        re.loc.__setitem__((idx, ('sales')), r['TotalSalesAmount'] + re.loc[idx]['sales'])
        re.loc.__setitem__((idx, ('high')), r['high'] + re.loc[idx]['high'])
        re.loc.__setitem__((idx, ('normal')), r['normal'] + re.loc[idx]['normal'])
        re.loc.__setitem__((idx, ('low')), r['low'] + re.loc[idx]['low'])
        re.loc.__setitem__((idx, ('SalePageId')), r['SalePageId'] + "/" + re.loc[idx]['SalePageId'])
        
    re['avgSales'] = re['sales']/re['order']
                    
    return re

## 讀取資料

In [3]:
member = pd.read_csv('91APP_MemberData.csv')
order = pd.read_csv('91APP_OrderData.csv')
product = pd.read_excel("user_tag.xlsx").dropna()

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
member = member[['UnifiedUserId', 'Gender', 'Birthday', 'MemberCardLevel']]
member.head(3)

Unnamed: 0,UnifiedUserId,Gender,Birthday,MemberCardLevel
0,wDnLYnQDE5Nt/TBttarrEw==,Female,1987-11-12,10
1,PfTmZ2HBNiyYJrv0kLZpnw==,Female,1990-09-02,10
2,MajPkebcNSUBGwNtJOjVMA==,Female,1985-09-08,10


In [5]:
order = order[['TradesGroupCode', 'OrderDateTime', 'TsCount', 'Qty', 'TotalSalesAmount']]
order.head(3)

Unnamed: 0,TradesGroupCode,OrderDateTime,TsCount,Qty,TotalSalesAmount
0,5uPmVe+VOTmLhDIRYqZ0X9WEZkAr2lq1GaDvxM2C/fw=,2016-04-29 09:56:00,1,-1,-100.0
1,5uPmVe+VOTmLhDIRYqZ0X9WEZkAr2lq1GaDvxM2C/fw=,2016-04-29 09:56:00,2,2,1284.0
2,VTI/4oAuHAlVolhrBpXzm/k7eAzdN4fsM2NjCE6YLEI=,2016-04-29 11:04:00,1,1,1184.0


In [6]:
product.head(3)

Unnamed: 0.1,Unnamed: 0,TradesGroupCode,SalePageId
0,2.0,Esf9q59OO5QVU6Rapl8m7t8Lyu1MshueQOEyRRXEwU8=,小白鞋
1,5.0,wTDcVew4UUsw/a9ag6EtoIKd4dp+bPyvV3n7jJZag0w=,菱格紋包
2,6.0,3uq44BE2pVcqbv+OgCeNtBQycM8O7Ncy6cqzWo/Wbew=,鞋用配件


In [7]:
df = pd.read_csv('data.csv')[['UnifiedUserId', 'TradesGroupCode', 'low', 'normal', 'high']]

In [8]:
df.head(3)

Unnamed: 0,UnifiedUserId,TradesGroupCode,low,normal,high
0,xjOctGt+CvE/+soyAS+LVA==,5uPmVe+VOTmLhDIRYqZ0X9WEZkAr2lq1GaDvxM2C/fw=,1,1,0
1,Agoqyv2QhggU0nmPX6tVJA==,VTI/4oAuHAlVolhrBpXzm/k7eAzdN4fsM2NjCE6YLEI=,0,1,0
2,xvZqF1Tlc8cGPJQwyRT24A==,BmdhnTN8wcHjLbMp8O3awDOib/WiGtv9yazSs3H5Jnk=,1,0,0


## 在每個主單中加入 1)高價位單品數 2)中價位單品數 3)低價位的單品數

所有子單單價的前25%為高價位，中間50%為中價位，後25%為低價位 (不含0圓商品)

In [9]:
combine = pd.merge(order, df, left_on = 'TradesGroupCode', right_on = 'TradesGroupCode', how = 'left')

In [10]:
combine.head(3)

Unnamed: 0,TradesGroupCode,OrderDateTime,TsCount,Qty,TotalSalesAmount,UnifiedUserId,low,normal,high
0,5uPmVe+VOTmLhDIRYqZ0X9WEZkAr2lq1GaDvxM2C/fw=,2016-04-29 09:56:00,1,-1,-100.0,xjOctGt+CvE/+soyAS+LVA==,1.0,1.0,0.0
1,5uPmVe+VOTmLhDIRYqZ0X9WEZkAr2lq1GaDvxM2C/fw=,2016-04-29 09:56:00,2,2,1284.0,xjOctGt+CvE/+soyAS+LVA==,1.0,1.0,0.0
2,VTI/4oAuHAlVolhrBpXzm/k7eAzdN4fsM2NjCE6YLEI=,2016-04-29 11:04:00,1,1,1184.0,Agoqyv2QhggU0nmPX6tVJA==,0.0,1.0,0.0


In [11]:
combined = pd.merge(combine, member, left_on = 'UnifiedUserId', right_on = 'UnifiedUserId', how = 'left')

In [12]:
combined = combined[combined['Qty'] >= 0]

In [13]:
combined.head()

Unnamed: 0,TradesGroupCode,OrderDateTime,TsCount,Qty,TotalSalesAmount,UnifiedUserId,low,normal,high,Gender,Birthday,MemberCardLevel
1,5uPmVe+VOTmLhDIRYqZ0X9WEZkAr2lq1GaDvxM2C/fw=,2016-04-29 09:56:00,2,2,1284.0,xjOctGt+CvE/+soyAS+LVA==,1.0,1.0,0.0,,1900-01-01,10.0
2,VTI/4oAuHAlVolhrBpXzm/k7eAzdN4fsM2NjCE6YLEI=,2016-04-29 11:04:00,1,1,1184.0,Agoqyv2QhggU0nmPX6tVJA==,0.0,1.0,0.0,,0001-01-01,10.0
3,BmdhnTN8wcHjLbMp8O3awDOib/WiGtv9yazSs3H5Jnk=,2016-04-29 11:05:00,1,1,1184.0,xvZqF1Tlc8cGPJQwyRT24A==,1.0,0.0,0.0,,1900-01-01,10.0
4,AlDaYlS/Oda/FRWZqGNt1kV4N6UhQ0tNbA2y7lmS5pI=,2016-05-02 10:29:00,1,1,1080.0,R+nvCLvUXeVCC4OYsSjSzA==,0.0,1.0,0.0,Female,1984-06-05,10.0
5,LZqYjv3x9IzUf5jNUtFz/3vijWR+KcTn3RzfTQF2yXs=,2016-05-03 10:29:00,2,2,1774.0,ftOkl2eeDxYeBcSZX3UiTA==,1.0,1.0,0.0,Female,0076-09-25,10.0


In [14]:
l = []
for i in combined['OrderDateTime']:
    l.append(i.split()[0])
combined['OrderDateTime'] = l

In [15]:
ProductTag = list(product["SalePageId"].unique())
print(ProductTag)

['小白鞋', '菱格紋包', '鞋用配件', '短靴', '拖鞋', '贈品', '跟鞋', '婚鞋', '膝靴', 'NG鞋', '身體保養', '鞋墊', '老爹鞋', '涼拖鞋', '穆勒鞋', '牛津鞋', '日用品', '厚底鞋', '尖頭鞋', '帆布鞋', '涼鞋', '長靴', '襪靴', '便鞋', '平底鞋', '小包', '皮夾x卡夾x零錢包', '紳士鞋', '服裝衣物', '懶人鞋', '休閒鞋', '包鞋', '飾品配件', '黛妃包', '包用配件', '凱莉包', '高跟鞋', '草編鞋', '踝靴', '雪靴', '水桶包', '餅乾鞋', '軍靴', '提袋', '尖頭靴', '馬丁靴', '肩背包', '娃娃鞋', '腰包', '低跟鞋', '多功能包', '方包', '雨靴', '方頭鞋', '後背包', '休閒包', '莫卡辛鞋', '劍橋包', '鞋拔', '相機包', 'NG包', '編織包', '球鞋', '腋下包', '樂福鞋', '柏金包', '童鞋', '托特包', '流蘇包', '摺疊鞋', '藤編包', '馬鞍包', '機車靴', '機車包', '工程靴', '慢跑鞋', '帆船鞋', '鍊條包', '拓特包', '短鞋', '跟靴', '蛋糕鞋', '豆豆鞋', '艾瑪包', '情侶鞋', '運動鞋', '瑪麗珍鞋']


In [16]:
combinedWithProduct = product.merge( combined, on = 'TradesGroupCode', how = 'left')

In [17]:
combinedWithProduct.describe()

Unnamed: 0.1,Unnamed: 0,TsCount,Qty,TotalSalesAmount,low,normal,high,MemberCardLevel
count,731083.0,731083.0,731083.0,731083.0,731083.0,731083.0,731083.0,731083.0
mean,1062197.0,2.920901,3.066633,2419.015292,1.587338,1.068906,0.35808,14.084912
std,278483.1,2.221187,2.610242,1578.888564,1.86034,0.975659,0.691588,7.352791
min,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
25%,829534.5,1.0,1.0,1480.0,0.0,0.0,0.0,10.0
50%,1065257.0,2.0,2.0,1943.0,1.0,1.0,0.0,10.0
75%,1294772.0,4.0,4.0,3000.0,2.0,1.0,1.0,20.0
max,1542842.0,33.0,61.0,50519.0,26.0,17.0,12.0,40.0


In [161]:
def AgeCoding(df):
    df.loc[df.Age < 21,'Age']=1
    df.loc[(df.Age > 20) & (df.Age < 31),'Age'] = 2
    df.loc[(df.Age > 30) & (df.Age < 41),'Age'] = 3
    df.loc[(df.Age > 40) & (df.Age < 51),'Age'] = 4
    df.loc[(df.Age > 50) & (df.Age < 61),'Age'] = 5
    df.loc[df.Age > 60,'Age'] = 6
    return df

def GetTag(df, TagList): ##幫每個Tag做Coding
    Dic = {}
    for i in TagList:    
        CodingList = []
        for j in df["SalePageId"]:
            words = j.split("/")
            count = 0
            for k in words:
                if k == i:
                    CodingList.append(1)
                    count += 1
                    break
            if count == 0:
                CodingList.append(0)
        Dic[i] = CodingList
    return Dic

def ModelTraining(df, tags): ##對每個Tag做Training
    score = 0
    training_scores = []
    acc_scores = []
    for i in tags.keys():
        print("這是 " + i + " 的分類器")
        df_c = df.copy()
        df_c[i] = tags[i]
        
        ListFilyer = ["ReviseMemberCardLevel", "Age", "order", "orderSlave", "low", "normal", "high", "SalesLog"]
        ListFilyer.append(i)
        
        train_data, test_data = train_test_split(df_c[ListFilyer], random_state=777, train_size=0.9)
        ListFilyer.pop()
        X_train = train_data[ListFilyer]
        Y_train = train_data[i]
        X_test = test_data[ListFilyer]
        Y_test = test_data[i]
        
        RF_model = RandomForestClassifier(n_estimators = 100)
        RF_model = RF_model.fit(X_train, Y_train)
        train_score = RF_model.score(X_train, Y_train)
        print("Training score :", train_score)

        Y_predict = RF_model.predict(X_test)
        print('準確率 :', accuracy_score(Y_test, Y_predict))

        conf_mat = confusion_matrix(Y_test, Y_predict)
        print(conf_mat)
        if train_score == 1:
            score += 1
        else:
            training_scores = training_scores + [train_score]
            acc_scores = acc_scores + [accuracy_score(Y_test, Y_predict)]
    return score, np.mean(training_scores), np.mean(acc_scores)

def Backtesting(df, tags, forwardtest_data, test_tags): ##對每個Tag做Training
    score = 0
    acc_scores = []
    for i in tags.keys():
        print("這是 " + i + " 的分類器")
        df_c = df.copy()
        forwardtest_data_c = forwardtest_data.copy() # added for forwardtest
        df_c[i] = tags[i]
        forwardtest_data_c[i] = test_tags[i] # added for forwardtest
        
        ListFilyer = ["ReviseMemberCardLevel", "Age", "order", "orderSlave", "low", "normal", "high", "SalesLog"]
        ListFilyer.append(i)
        
        train_data, test_data = train_test_split(df_c[ListFilyer], random_state=777, train_size=0.9)
        forwardtest_data_c = forwardtest_data_c[ListFilyer] # added for forwardtest
        ListFilyer.pop()
        X_train = train_data[ListFilyer]
        Y_train = train_data[i]
        X_test = test_data[ListFilyer]
        Y_test = test_data[i]
        X_forward = forwardtest_data_c[ListFilyer] # added for forwardtest
        Y_forward = forwardtest_data_c[i] # added for forwardtest
        
        RF_model = RandomForestClassifier(n_estimators = 100)
        RF_model = RF_model.fit(X_train, Y_train)
#         train_score = RF_model.score(X_train, Y_train)
#         print("Training score :", train_score)

#         Y_predict = RF_model.predict(X_test)
#         print('準確率 :', accuracy_score(Y_test, Y_predict))
        Y_predict = RF_model.predict(X_forward)  # added for forwardtest
        print('後兩年驗證準確率 :', accuracy_score(Y_forward, Y_predict))  # added for forwardtest
        
        conf_mat = confusion_matrix(Y_forward, Y_predict)
        print(conf_mat)

        acc_scores = acc_scores + [accuracy_score(Y_forward, Y_predict)]
    return  np.mean(acc_scores)

def Predicting(df, tags, predict_data,num): ##對每個Tag做Training
    score = 0
    item = []
    print('第' + str(num+1) +'位會員',predict_data.index.to_list()[num])
    for i in tags.keys():
        df_c = df.copy()
        df_c[i] = tags[i]

        ListFilyer = ["ReviseMemberCardLevel", "Age", "order", "orderSlave", "low", "normal", "high", "SalesLog"]
        ListFilyer.append(i)
        
        train_data, test_data = train_test_split(df_c[ListFilyer], random_state=777, train_size=0.9)
        ListFilyer.pop()
        X_train = train_data[ListFilyer]
        Y_train = train_data[i]

        RF_model = RandomForestClassifier(n_estimators = 100)
        RF_model = RF_model.fit(X_train, Y_train)
        train_score = RF_model.score(X_train, Y_train)

        X_predict = predict_data.copy()
        X_predict = X_predict[ListFilyer].iloc[num].to_numpy().reshape(1,-1)
        Y_predict = RF_model.predict(X_predict)
        if Y_predict == 1 and train_score != 1:
            if train_score > score:
                item = i
                score = train_score
    print('推薦商品 :', item)  
    print("Training score :", score)


### Season 1 (1-3月)(2014-2020)

In [19]:
result = get_season(combinedWithProduct, 2014, 1)
for i in range(2015, 2021,1):
    a = get_season(combinedWithProduct, i, 1)
    result = pd.concat([result, a])
ResultSeason1 = result.dropna()

2014-01-01 2014-04-01
2015-01-01 2015-04-01
2016-01-01 2016-04-01
2017-01-01 2017-04-01
2018-01-01 2018-04-01
2019-01-01 2019-04-01
2020-01-01 2020-04-01


In [20]:
ResultSeason1 = AgeCoding(ResultSeason1)
ResultSeason1["SalesLog"] = np.log(ResultSeason1["sales"])
ResultSeason1["ReviseMemberCardLevel"] = ResultSeason1["MemberCardLevel"] / 10

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ResultSeason1["SalesLog"] = np.log(ResultSeason1["sales"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ResultSeason1["ReviseMemberCardLevel"] = ResultSeason1["MemberCardLevel"] / 10


In [158]:
TagDictionary = GetTag(ResultSeason1, ProductTag)

In [146]:
NumberOfScore1, avg_train_score, avg_acc_score = ModelTraining(ResultSeason1, TagDictionary)

這是 小白鞋 的分類器
Training score : 0.9033115157799235
準確率 : 0.8145772594752186
[[2565  198]
 [ 438  229]]
這是 菱格紋包 的分類器
Training score : 1.0
準確率 : 1.0
[[3430]]
這是 鞋用配件 的分類器
Training score : 0.9875575140950036
準確率 : 0.8991253644314868
[[2828  153]
 [ 193  256]]
這是 短靴 的分類器
Training score : 0.9468926187544553
準確率 : 0.8597667638483965
[[2797  161]
 [ 320  152]]
這是 拖鞋 的分類器
Training score : 0.9990603330957164
準確率 : 0.9941690962099126
[[3400    3]
 [  17   10]]
這是 贈品 的分類器
Training score : 0.9608904153975764
準確率 : 0.878134110787172
[[2410  226]
 [ 192  602]]
這是 跟鞋 的分類器
Training score : 0.9742077635927678
準確率 : 0.9469387755102041
[[3237   38]
 [ 144   11]]
這是 婚鞋 的分類器
Training score : 0.9919318255459789
準確率 : 0.9836734693877551
[[3372   10]
 [  46    2]]
這是 膝靴 的分類器
Training score : 0.9758278789449809
準確率 : 0.9472303206997085
[[3219   46]
 [ 135   30]]
這是 NG鞋 的分類器
Training score : 0.9991899423238935
準確率 : 0.9941690962099126
[[3408    5]
 [  15    2]]
這是 身體保養 的分類器
Training score : 0.9699954636770138
準確率 

In [147]:
print("Average Training Score: ", avg_train_score)
print("Average Accuracy Score: ", avg_acc_score)

print("Training Score為1的Tag數:", NumberOfScore1)
print("樣本數不足的Tag")
for i in TagDictionary.keys():
    if sum(TagDictionary[i]) <= 3:
        print(i)

Average Training Score:  0.9902046025677029
Average Accuracy Score:  0.9735179786200194
Training Score為1的Tag數: 15
樣本數不足的Tag
菱格紋包
帆布鞋
黛妃包
餅乾鞋
尖頭靴
馬丁靴
方頭鞋
劍橋包
腋下包
藤編包
鍊條包
拓特包
跟靴
蛋糕鞋
豆豆鞋
艾瑪包
瑪麗珍鞋


## 後兩年驗證Season 1 (1-3月)(2021-2022)

In [162]:
test = get_season(combinedWithProduct, 2021, 1)
for i in range(2022, 2023,1):
    a = get_season(combinedWithProduct, i, 1)
    test = pd.concat([test, a])
TestSeason1 = test.dropna()

TestSeason1 = AgeCoding(TestSeason1)
TestSeason1["SalesLog"] = np.log(TestSeason1["sales"])
TestSeason1["ReviseMemberCardLevel"] = TestSeason1["MemberCardLevel"] / 10

TestTagDictionary = GetTag(TestSeason1, ProductTag)

2021-01-01 2021-04-01
2022-01-01 2022-04-01


In [163]:
avg_acc = Backtesting(ResultSeason1, TagDictionary, TestSeason1, TestTagDictionary)
print("Average Accuracy Score: ", avg_acc)

這是 小白鞋 的分類器
後兩年驗證準確率 : 0.8058973901670531
[[13076  1239]
 [ 2026   480]]
這是 菱格紋包 的分類器
後兩年驗證準確率 : 0.9933416562630045
[[16709     0]
 [  112     0]]
這是 鞋用配件 的分類器
後兩年驗證準確率 : 0.8764044943820225
[[14239  1471]
 [  608   503]]
這是 短靴 的分類器
後兩年驗證準確率 : 0.7490042209143333
[[11848  1370]
 [ 2852   751]]
這是 拖鞋 的分類器
後兩年驗證準確率 : 0.9966113786338505
[[16764    12]
 [   45     0]]
這是 贈品 的分類器
後兩年驗證準確率 : 0.8635039533915938
[[9970  762]
 [1534 4555]]
這是 跟鞋 的分類器
後兩年驗證準確率 : 0.9423934367754593
[[15836   250]
 [  719    16]]
這是 婚鞋 的分類器
後兩年驗證準確率 : 0.9756851554604363
[[16411   130]
 [  279     1]]
這是 膝靴 的分類器
後兩年驗證準確率 : 0.9407883003388622
[[15741   558]
 [  438    84]]
這是 NG鞋 的分類器
後兩年驗證準確率 : 0.987634504488437
[[16601    44]
 [  164    12]]
這是 身體保養 的分類器
後兩年驗證準確率 : 0.9282444563343439
[[15357   743]
 [  464   257]]
這是 鞋墊 的分類器
後兩年驗證準確率 : 0.881873848165983
[[14288   879]
 [ 1108   546]]
這是 老爹鞋 的分類器
後兩年驗證準確率 : 0.9710480946436003
[[16330    39]
 [  448     4]]
這是 涼拖鞋 的分類器
後兩年驗證準確率 : 0.9500624219725343
[[15973    90]
 [  

### 2021 Season 1 預測

In [164]:
pre = get_season(combinedWithProduct, 2021, 1)
PredictSeason1 = pre.dropna()

PredictSeason1 = AgeCoding(PredictSeason1)
PredictSeason1["SalesLog"] = np.log(PredictSeason1["sales"])
PredictSeason1["ReviseMemberCardLevel"] = PredictSeason1["MemberCardLevel"] / 10


2021-01-01 2021-04-01


In [165]:
PredictSeason1.head(3)

Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
183befUhGBlsXXVQsFUqmQ==,30,Female,3,4,16,4,8,4,21384,短靴/帆布鞋/帆布鞋/贈品/,5346.0,9.970398,3.0
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,9,29,15,14,0,23090,襪靴/襪靴/贈品/贈品/尖頭鞋/方包/服裝衣物/尖頭鞋/小白鞋/,2565.555556,10.047155,3.0
AlyUS7LorP1qdJwahdia0w==,20,Female,3,1,1,0,1,0,1406,涼鞋/,1406.0,7.248504,2.0


In [166]:
Predicting(ResultSeason1, TagDictionary, PredictSeason1,0)

第1位會員 183befUhGBlsXXVQsFUqmQ==
推薦商品 : 膝靴
Training score : 0.9758278789449809


In [167]:
Predicting(ResultSeason1, TagDictionary, PredictSeason1,1)


第2位會員 5aRn4GBf1opsUUHmYIKYUQ==
推薦商品 : 贈品
Training score : 0.9610200246257533


In [168]:
Predicting(ResultSeason1, TagDictionary, PredictSeason1,2)

第3位會員 AlyUS7LorP1qdJwahdia0w==
推薦商品 : []
Training score : 0


### Season 2 (4-6月)(2014-2019)

In [29]:
result = get_season(combinedWithProduct, 2014, 2)
for i in range(2015, 2020,1):
    a = get_season(combinedWithProduct, i, 2)
    result = pd.concat([result, a])
ResultSeason2 = result.dropna()

2014-04-01 2014-07-01
2015-04-01 2015-07-01
2016-04-01 2016-07-01
2017-04-01 2017-07-01
2018-04-01 2018-07-01
2019-04-01 2019-07-01


In [94]:
ResultSeason2 = AgeCoding(ResultSeason2)
ResultSeason2["SalesLog"] = np.log(ResultSeason2["sales"])
ResultSeason2["ReviseMemberCardLevel"] = ResultSeason2["MemberCardLevel"] / 10


In [169]:
TagDictionary = GetTag(ResultSeason2, ProductTag)

In [150]:
NumberOfScore2 , avg_train_score, avg_acc_score= ModelTraining(ResultSeason2, TagDictionary)

這是 小白鞋 的分類器
Training score : 0.8714864268815911
準確率 : 0.7972582972582972
[[1913  152]
 [ 410  297]]
這是 菱格紋包 的分類器
Training score : 1.0
準確率 : 1.0
[[2772]]
這是 鞋用配件 的分類器
Training score : 0.9882112354144111
準確率 : 0.9325396825396826
[[2456   70]
 [ 117  129]]
這是 短靴 的分類器
Training score : 0.9897349532860179
準確率 : 0.9837662337662337
[[2724   10]
 [  35    3]]
這是 拖鞋 的分類器
Training score : 0.999759412967641
準確率 : 0.9989177489177489
[[2769    2]
 [   1    0]]
這是 贈品 的分類器
Training score : 0.975500220538113
準確率 : 0.9166666666666666
[[2076  134]
 [  97  465]]
這是 跟鞋 的分類器
Training score : 0.964914391114319
準確率 : 0.9386724386724387
[[2597   32]
 [ 138    5]]
這是 婚鞋 的分類器
Training score : 0.9902963230281888
準確率 : 0.9844877344877345
[[2726    9]
 [  34    3]]
這是 膝靴 的分類器
Training score : 0.9993985324191026
準確率 : 0.9989177489177489
[[2769    0]
 [   3    0]]
這是 NG鞋 的分類器
Training score : 1.0
準確率 : 1.0
[[2772]]
這是 身體保養 的分類器
Training score : 0.991940334415975
準確率 : 0.9628427128427128
[[2643   28]
 [  75   26]]
這是 

In [151]:
print("Average Training Score: ", avg_train_score)
print("Average Accuracy Score: ", avg_acc_score)

print("Training Score為1的Tag數:", NumberOfScore2)
print("樣本數不足的Tag")
for i in TagDictionary.keys():
    if sum(TagDictionary[i]) <= 3:
        print(i)

Average Training Score:  0.9868103980163211
Average Accuracy Score:  0.9739154214960666
Training Score為1的Tag數: 25
樣本數不足的Tag
菱格紋包
NG鞋
老爹鞋
帆布鞋
黛妃包
包用配件
凱莉包
餅乾鞋
尖頭靴
馬丁靴
腰包
方頭鞋
休閒包
劍橋包
相機包
NG包
腋下包
機車靴
拓特包
短鞋
跟靴
蛋糕鞋
豆豆鞋
艾瑪包
情侶鞋
瑪麗珍鞋


## 後兩年驗證Season 2 (4-6月)(2020-2021)

In [170]:
test = get_season(combinedWithProduct, 2020, 2)
for i in range(2021, 2022, 1):
    a = get_season(combinedWithProduct, i, 2)
    test = pd.concat([test, a])
TestSeason2 = test.dropna()

TestSeason2 = AgeCoding(TestSeason2)
TestSeason2["SalesLog"] = np.log(TestSeason2["sales"])
TestSeason2["ReviseMemberCardLevel"] = TestSeason2["MemberCardLevel"] / 10

TestTagDictionary = GetTag(TestSeason2, ProductTag)

avg_acc = Backtesting(ResultSeason2, TagDictionary, TestSeason2, TestTagDictionary)
print("Average Accuracy Score: ", avg_acc)



2020-04-01 2020-07-01
2021-04-01 2021-07-01
這是 小白鞋 的分類器
後兩年驗證準確率 : 0.7348167539267015
[[10354  2322]
 [ 1730   874]]
這是 菱格紋包 的分類器
後兩年驗證準確率 : 0.9981020942408377
[[15251     0]
 [   29     0]]
這是 鞋用配件 的分類器
後兩年驗證準確率 : 0.8732329842931937
[[12854  1154]
 [  783   489]]
這是 短靴 的分類器
後兩年驗證準確率 : 0.9662303664921466
[[14755   194]
 [  322     9]]
這是 拖鞋 的分類器
後兩年驗證準確率 : 0.993324607329843
[[15178     5]
 [   97     0]]
這是 贈品 的分類器
後兩年驗證準確率 : 0.8518979057591624
[[8816  780]
 [1483 4201]]
這是 跟鞋 的分類器
後兩年驗證準確率 : 0.9390052356020943
[[14321   274]
 [  658    27]]
這是 婚鞋 的分類器
後兩年驗證準確率 : 0.9762434554973822
[[14911   200]
 [  163     6]]
這是 膝靴 的分類器
後兩年驗證準確率 : 0.9986910994764397
[[15260     8]
 [   12     0]]
這是 NG鞋 的分類器
後兩年驗證準確率 : 0.9892015706806283
[[15115     0]
 [  165     0]]
這是 身體保養 的分類器
後兩年驗證準確率 : 0.9060209424083769
[[13532   879]
 [  557   312]]
這是 鞋墊 的分類器
後兩年驗證準確率 : 0.8710078534031414
[[12722  1311]
 [  660   587]]
這是 老爹鞋 的分類器
後兩年驗證準確率 : 0.9948298429319372
[[15201     0]
 [   79     0]]
這是 涼拖鞋 的分類器
後兩年驗

### Season 2 預測

In [171]:
pre = get_season(combinedWithProduct, 2021, 1)
PredictSeason2 = pre.dropna()

PredictSeason2 = AgeCoding(PredictSeason2)
PredictSeason2["SalesLog"] = np.log(PredictSeason2["sales"])
PredictSeason2["ReviseMemberCardLevel"] = PredictSeason2["MemberCardLevel"] / 10


2021-01-01 2021-04-01


In [172]:
PredictSeason2.head(3)

Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
183befUhGBlsXXVQsFUqmQ==,30,Female,3,4,16,4,8,4,21384,短靴/帆布鞋/帆布鞋/贈品/,5346.0,9.970398,3.0
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,9,29,15,14,0,23090,襪靴/襪靴/贈品/贈品/尖頭鞋/方包/服裝衣物/尖頭鞋/小白鞋/,2565.555556,10.047155,3.0
AlyUS7LorP1qdJwahdia0w==,20,Female,3,1,1,0,1,0,1406,涼鞋/,1406.0,7.248504,2.0


In [173]:
Predicting(ResultSeason2, TagDictionary, PredictSeason2,0)
Predicting(ResultSeason2, TagDictionary, PredictSeason2,1)
Predicting(ResultSeason2, TagDictionary, PredictSeason2,2)

第1位會員 183befUhGBlsXXVQsFUqmQ==
推薦商品 : 高跟鞋
Training score : 0.9914992581899835
第2位會員 5aRn4GBf1opsUUHmYIKYUQ==
推薦商品 : 贈品
Training score : 0.9755403183768395
第3位會員 AlyUS7LorP1qdJwahdia0w==
推薦商品 : 涼鞋
Training score : 0.8225269657965436


### Season 3 (7-9月)(2014-2019)

In [104]:
result = get_season(combinedWithProduct, 2014, 3)
for i in range(2015, 2020,1):
    a = get_season(combinedWithProduct, i, 3)
    result = pd.concat([result, a])
ResultSeason3 = result.dropna()

2014-07-01 2014-10-01
2015-07-01 2015-10-01
2016-07-01 2016-10-01
2017-07-01 2017-10-01
2018-07-01 2018-10-01
2019-07-01 2019-10-01


In [105]:
ResultSeason3 = AgeCoding(ResultSeason3)
ResultSeason3["SalesLog"] = np.log(ResultSeason3["sales"])
ResultSeason3["ReviseMemberCardLevel"] = ResultSeason3["MemberCardLevel"] / 10

In [174]:
TagDictionary = GetTag(ResultSeason3, ProductTag)

In [153]:
NumberOfScore1 , avg_train_score, avg_acc_score= ModelTraining(ResultSeason3, TagDictionary)

這是 小白鞋 的分類器
Training score : 0.852050231804474
準確率 : 0.7464560550830296
[[1468  242]
 [ 384  375]]
這是 菱格紋包 的分類器
Training score : 1.0
準確率 : 1.0
[[2469]]
這是 鞋用配件 的分類器
Training score : 0.9889273979385156
準確率 : 0.9230457675172135
[[2190   65]
 [ 125   89]]
這是 短靴 的分類器
Training score : 0.9867218796417158
準確率 : 0.9700283515593358
[[2394   13]
 [  61    1]]
這是 拖鞋 的分類器
Training score : 0.9997749471125714
準確率 : 0.9995949777237748
[[2468    0]
 [   1    0]]
這是 贈品 的分類器
Training score : 0.9843813296124589
準確率 : 0.9295261239368166
[[1366  120]
 [  54  929]]
這是 跟鞋 的分類器
Training score : 0.9674573524778323
準確率 : 0.9335763466990684
[[2299   26]
 [ 138    6]]
這是 婚鞋 的分類器
Training score : 0.9910879056578296
準確率 : 0.9756986634264885
[[2407   10]
 [  50    2]]
這是 膝靴 的分類器
Training score : 0.9987397038304001
準確率 : 0.9979748886188741
[[2464    0]
 [   5    0]]
這是 NG鞋 的分類器
Training score : 0.9994148624926857
準確率 : 0.9902794653705954
[[2443    9]
 [  15    2]]
這是 身體保養 的分類器
Training score : 0.9951838682090292
準確率 

In [154]:
print("Average Training Score: ", avg_train_score)
print("Average Accuracy Score: ", avg_acc_score)

print("Training Score為1的Tag數:", NumberOfScore1)
print("樣本數不足的Tag")
for i in TagDictionary.keys():
    if sum(TagDictionary[i]) <= 3:
        print(i)

Average Training Score:  0.9901181323065463
Average Accuracy Score:  0.975569792702235
Training Score為1的Tag數: 21
樣本數不足的Tag
菱格紋包
老爹鞋
帆布鞋
黛妃包
凱莉包
餅乾鞋
尖頭靴
馬丁靴
方頭鞋
劍橋包
相機包
NG包
腋下包
拓特包
短鞋
跟靴
蛋糕鞋
豆豆鞋
艾瑪包
情侶鞋
瑪麗珍鞋


## 後兩年驗證Season 3 (7-9月)(2020-2021)

In [175]:
test = get_season(combinedWithProduct, 2020, 3)
for i in range(2021, 2022, 1):
    a = get_season(combinedWithProduct, i, 3)
    test = pd.concat([test, a])
TestSeason3 = test.dropna()

TestSeason3 = AgeCoding(TestSeason3)
TestSeason3["SalesLog"] = np.log(TestSeason3["sales"])
TestSeason3["ReviseMemberCardLevel"] = TestSeason3["MemberCardLevel"] / 10

TestTagDictionary = GetTag(TestSeason3, ProductTag)

avg_acc = Backtesting(ResultSeason3, TagDictionary, TestSeason3, TestTagDictionary)
print("Average Accuracy Score: ", avg_acc)


2020-07-01 2020-10-01
2021-07-01 2021-10-01
這是 小白鞋 的分類器
後兩年驗證準確率 : 0.7229750827452694
[[10652  2900]
 [ 1536   925]]
這是 菱格紋包 的分類器
後兩年驗證準確率 : 0.996065696621495
[[15950     0]
 [   63     0]]
這是 鞋用配件 的分類器
後兩年驗證準確率 : 0.9037656903765691
[[14185   751]
 [  790   287]]
這是 短靴 的分類器
後兩年驗證準確率 : 0.9516642727783676
[[15230   184]
 [  590     9]]
這是 拖鞋 的分類器
後兩年驗證準確率 : 0.9966901892212577
[[15960     5]
 [   48     0]]
這是 贈品 的分類器
後兩年驗證準確率 : 0.8511834134765504
[[9451 1725]
 [ 658 4179]]
這是 跟鞋 的分類器
後兩年驗證準確率 : 0.9377380878036595
[[14988   230]
 [  767    28]]
這是 婚鞋 的分類器
後兩年驗證準確率 : 0.9811403234871667
[[15703   104]
 [  198     8]]
這是 膝靴 的分類器
後兩年驗證準確率 : 0.9955036532817086
[[15941     5]
 [   67     0]]
這是 NG鞋 的分類器
後兩年驗證準確率 : 0.9636545306938112
[[15363    77]
 [  505    68]]
這是 身體保養 的分類器
後兩年驗證準確率 : 0.9426091300818086
[[14957   226]
 [  693   137]]
這是 鞋墊 的分類器
後兩年驗證準確率 : 0.9084493848747892
[[14235   605]
 [  861   312]]
這是 老爹鞋 的分類器
後兩年驗證準確率 : 0.9910697558233935
[[15870     0]
 [  143     0]]
這是 涼拖鞋 的分類器
後兩年驗

### Season 3 預測

In [176]:
pre = get_season(combinedWithProduct, 2021, 1)
PredictSeason3 = pre.dropna()

PredictSeason3 = AgeCoding(PredictSeason3)
PredictSeason3["SalesLog"] = np.log(PredictSeason3["sales"])
PredictSeason3["ReviseMemberCardLevel"] = PredictSeason3["MemberCardLevel"] / 10


2021-01-01 2021-04-01


In [177]:
PredictSeason3.head(3)

Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
183befUhGBlsXXVQsFUqmQ==,30,Female,3,4,16,4,8,4,21384,短靴/帆布鞋/帆布鞋/贈品/,5346.0,9.970398,3.0
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,9,29,15,14,0,23090,襪靴/襪靴/贈品/贈品/尖頭鞋/方包/服裝衣物/尖頭鞋/小白鞋/,2565.555556,10.047155,3.0
AlyUS7LorP1qdJwahdia0w==,20,Female,3,1,1,0,1,0,1406,涼鞋/,1406.0,7.248504,2.0


In [178]:
Predicting(ResultSeason3, TagDictionary, PredictSeason3,0)
Predicting(ResultSeason3, TagDictionary, PredictSeason3,1)
Predicting(ResultSeason3, TagDictionary, PredictSeason3,2)

第1位會員 183befUhGBlsXXVQsFUqmQ==
推薦商品 : 膝靴
Training score : 0.9987397038304001
第2位會員 5aRn4GBf1opsUUHmYIKYUQ==
推薦商品 : 贈品
Training score : 0.9843813296124589
第3位會員 AlyUS7LorP1qdJwahdia0w==
推薦商品 : []
Training score : 0


### Season 4 (10-12月)(2014-2019)

In [113]:
result = get_season(combinedWithProduct, 2013, 4)
for i in range(2014, 2020,1):
    a = get_season(combinedWithProduct, i, 4)
    result = pd.concat([result, a])
ResultSeason4 = result.dropna()

2013-10-01 2014-01-01
2014-10-01 2015-01-01
2015-10-01 2016-01-01
2016-10-01 2017-01-01
2017-10-01 2018-01-01
2018-10-01 2019-01-01
2019-10-01 2020-01-01


In [114]:
ResultSeason4 = AgeCoding(ResultSeason4)
ResultSeason4["SalesLog"] = np.log(ResultSeason4["sales"])
ResultSeason4["ReviseMemberCardLevel"] = ResultSeason4["MemberCardLevel"] / 10

In [179]:
TagDictionary = GetTag(ResultSeason4, ProductTag)

In [156]:
NumberOfScore1, avg_train_score, avg_acc_score = ModelTraining(ResultSeason4, TagDictionary)

這是 小白鞋 的分類器
Training score : 0.8983190874138095
準確率 : 0.7941092364884187
[[2577  249]
 [ 471  200]]
這是 菱格紋包 的分類器
Training score : 1.0
準確率 : 1.0
[[3497]]
這是 鞋用配件 的分類器
Training score : 0.9861142003749483
準確率 : 0.9044895624821275
[[2986  153]
 [ 181  177]]
這是 短靴 的分類器
Training score : 0.9174795843792698
準確率 : 0.7929653989133543
[[2468  308]
 [ 416  305]]
這是 拖鞋 的分類器
Training score : 0.9996186965778018
準確率 : 0.996854446668573
[[3486    1]
 [  10    0]]
這是 贈品 的分類器
Training score : 0.9438848463664962
準確率 : 0.8484415213039749
[[1936  354]
 [ 176 1031]]
這是 跟鞋 的分類器
Training score : 0.9699088049315243
準確率 : 0.9302259079210752
[[3247   45]
 [ 199    6]]
這是 婚鞋 的分類器
Training score : 0.9901496615932128
準確率 : 0.9814126394052045
[[3432   13]
 [  52    0]]
這是 膝靴 的分類器
Training score : 0.975882558545963
準確率 : 0.9348012582213325
[[3095  110]
 [ 118  174]]
這是 NG鞋 的分類器
Training score : 0.9997140224333514
準確率 : 0.9945667715184444
[[3477    3]
 [  16    1]]
這是 身體保養 的分類器
Training score : 0.9625051634838423
準確率 :

In [157]:
print("Average Training Score: ", avg_train_score)
print("Average Accuracy Score: ", avg_acc_score)

print("Training Score為1的Tag數:", NumberOfScore1)
print("樣本數不足的Tag")
for i in TagDictionary.keys():
    if sum(TagDictionary[i]) <= 3:
        print(i)

Average Training Score:  0.9896662233257832
Average Accuracy Score:  0.971649168675191
Training Score為1的Tag數: 17
樣本數不足的Tag
菱格紋包
帆布鞋
黛妃包
凱莉包
餅乾鞋
尖頭靴
馬丁靴
方頭鞋
劍橋包
NG包
腋下包
柏金包
藤編包
鍊條包
豆豆鞋
艾瑪包
情侶鞋
瑪麗珍鞋


## 後兩年驗證Season 4 (10-12月)(2020-2021)

In [180]:
test = get_season(combinedWithProduct, 2020, 4)
for i in range(2021, 2022, 1):
    a = get_season(combinedWithProduct, i, 4)
    test = pd.concat([test, a])
TestSeason4 = test.dropna()

TestSeason4 = AgeCoding(TestSeason4)
TestSeason4["SalesLog"] = np.log(TestSeason4["sales"])
TestSeason4["ReviseMemberCardLevel"] = TestSeason4["MemberCardLevel"] / 10

TestTagDictionary = GetTag(TestSeason4, ProductTag)

avg_acc = Backtesting(ResultSeason4, TagDictionary, TestSeason4, TestTagDictionary)
print("Average Accuracy Score: ", avg_acc)


2020-10-01 2021-01-01
2021-10-01 2022-01-01
這是 小白鞋 的分類器
後兩年驗證準確率 : 0.8199318821264623
[[15943  1549]
 [ 2099   668]]
這是 菱格紋包 的分類器
後兩年驗證準確率 : 0.9924478009773434
[[20106     0]
 [  153     0]]
這是 鞋用配件 的分類器
後兩年驗證準確率 : 0.8796090626388272
[[17181  1673]
 [  766   639]]
這是 短靴 的分類器
後兩年驗證準確率 : 0.6757490498050249
[[11279  2392]
 [ 4177  2411]]
這是 拖鞋 的分類器
後兩年驗證準確率 : 0.8212152623525347
[[16636     8]
 [ 3614     1]]
這是 贈品 的分類器
後兩年驗證準確率 : 0.8207710153512019
[[10100  2576]
 [ 1055  6528]]
這是 跟鞋 的分類器
後兩年驗證準確率 : 0.943531270052816
[[19088   349]
 [  795    27]]
這是 婚鞋 的分類器
後兩年驗證準確率 : 0.9763068265955871
[[19772   229]
 [  251     7]]
這是 膝靴 的分類器
後兩年驗證準確率 : 0.8862234068808924
[[17470  1382]
 [  923   484]]
這是 NG鞋 的分類器
後兩年驗證準確率 : 0.9914605854188262
[[20082    33]
 [  140     4]]
這是 身體保養 的分類器
後兩年驗證準確率 : 0.9137173601855966
[[18238   998]
 [  750   273]]
這是 鞋墊 的分類器
後兩年驗證準確率 : 0.888148477220001
[[17272  1122]
 [ 1144   721]]
這是 老爹鞋 的分類器
後兩年驗證準確率 : 0.9734932622538132
[[19713    45]
 [  492     9]]
這是 涼拖鞋 的分類器
後

### Season 4 預測

In [181]:
pre = get_season(combinedWithProduct, 2021, 1)
PredictSeason4 = pre.dropna()

PredictSeason4 = AgeCoding(PredictSeason4)
PredictSeason4["SalesLog"] = np.log(PredictSeason4["sales"])
PredictSeason4["ReviseMemberCardLevel"] = PredictSeason4["MemberCardLevel"] / 10


2021-01-01 2021-04-01


In [182]:
PredictSeason4.head(3)

Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
183befUhGBlsXXVQsFUqmQ==,30,Female,3,4,16,4,8,4,21384,短靴/帆布鞋/帆布鞋/贈品/,5346.0,9.970398,3.0
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,9,29,15,14,0,23090,襪靴/襪靴/贈品/贈品/尖頭鞋/方包/服裝衣物/尖頭鞋/小白鞋/,2565.555556,10.047155,3.0
AlyUS7LorP1qdJwahdia0w==,20,Female,3,1,1,0,1,0,1406,涼鞋/,1406.0,7.248504,2.0


In [183]:
Predicting(ResultSeason4, TagDictionary, PredictSeason4,0)
Predicting(ResultSeason4, TagDictionary, PredictSeason4,1)
Predicting(ResultSeason4, TagDictionary, PredictSeason4,2)

第1位會員 183befUhGBlsXXVQsFUqmQ==
推薦商品 : 膝靴
Training score : 0.975882558545963
第2位會員 5aRn4GBf1opsUUHmYIKYUQ==
推薦商品 : 贈品
Training score : 0.943853071081313
第3位會員 AlyUS7LorP1qdJwahdia0w==
推薦商品 : []
Training score : 0


### 結帳率 2021

In [200]:
members = pd.read_csv('91APP_MemberData.csv')
season1 = pd.read_csv('season_1.csv')
season2 = pd.read_csv('season_2.csv')
season3 = pd.read_csv('season_3.csv')
season4 = pd.read_csv('season_4.csv')


In [227]:
member_c = list(set(members['MemberId'].dropna()))
UnifiedUserIds = ['183befUhGBlsXXVQsFUqmQ==','5aRn4GBf1opsUUHmYIKYUQ==','AlyUS7LorP1qdJwahdia0w==']
print(len(member_c))

425287


### Season 1

In [221]:
behaviors = season1
avgRatio = 0

temp = behaviors[behaviors["Behavior"] == 'purchase'].dropna()
print("Numbers of Buyer:", len(set(temp['MemberId'])))

purchase = len(behaviors[behaviors["Behavior"] == 'purchase'])
add = len(behaviors[behaviors["Behavior"] == 'add'])
avgRatio = round(purchase / add, 2)
print("Average Ratio:", avgRatio)

for i in UnifiedUserIds:
    memberId = list(set(members[members["UnifiedUserId"] == i]['MemberId'].to_list()))[0]
    temp = behaviors[behaviors["MemberId"] == memberId].dropna()
    purchase = len(temp[temp["Behavior"] == 'purchase'])
    add = len(temp[temp["Behavior"] == 'add'])
    if add != 0:
        ratio = round(purchase / add, 2)
    else:
        ratio = 0
    print('MemberId: ', memberId)
    print("Season: ", 1)
    print('Purchase: ', purchase)
    print('AddToCart: ', add)
    if ratio > avgRatio:
        print('Above')
    elif ratio < avgRatio:
        print('Below')
    else:
        print('Average')
    print('=======================================================')


Numbers of Buyer: 13592
Average Ratio: 0.29
MemberId:  5Bd73SAeGSyFaSgcyBGuTO7D09mWqKXefycVfosJGgc=
Season:  1
Purchase:  3
AddToCart:  25
Below
MemberId:  ZLno3QlClWoO9qsKcpKBK7tfjCEolP80D8ddS/zGDa4=
Season:  1
Purchase:  7
AddToCart:  10
Above
MemberId:  WyLJCYsILh5wgrUtPG2x5yK44TnoxTIu3GTQpKXFkFU=
Season:  1
Purchase:  1
AddToCart:  1
Above


### Season 2

In [224]:
behaviors = season2
avgRatio = 0

temp = behaviors[behaviors["Behavior"] == 'purchase'].dropna()
print("Numbers of Buyer:", len(set(temp['MemberId'])))

purchase = len(behaviors[behaviors["Behavior"] == 'purchase'])
add = len(behaviors[behaviors["Behavior"] == 'add'])
avgRatio = round(purchase / add, 2)
print("Average Ratio:", avgRatio)

for i in UnifiedUserIds:
    memberId = list(set(members[members["UnifiedUserId"] == i]['MemberId'].to_list()))[0]
    temp = behaviors[behaviors["MemberId"] == memberId].dropna()
    purchase = len(temp[temp["Behavior"] == 'purchase'])
    add = len(temp[temp["Behavior"] == 'add'])
    if add != 0:
        ratio = round(purchase / add, 2)
    else:
        ratio = 0
    print('MemberId: ', memberId)
    print("Season: ", 2)
    print('Purchase: ', purchase)
    print('AddToCart: ', add)
    if ratio > avgRatio:
        print('Above')
    elif ratio < avgRatio:
        print('Below')
    else:
        print('Average')
    print('=======================================================')


Numbers of Buyer: 9548
Average Ratio: 0.27
MemberId:  5Bd73SAeGSyFaSgcyBGuTO7D09mWqKXefycVfosJGgc=
Season:  2
Purchase:  12
AddToCart:  24
Above
MemberId:  ZLno3QlClWoO9qsKcpKBK7tfjCEolP80D8ddS/zGDa4=
Season:  2
Purchase:  0
AddToCart:  0
Below
MemberId:  WyLJCYsILh5wgrUtPG2x5yK44TnoxTIu3GTQpKXFkFU=
Season:  2
Purchase:  1
AddToCart:  0
Below


### Season 3

In [225]:
behaviors = season3
avgRatio = 0

temp = behaviors[behaviors["Behavior"] == 'purchase'].dropna()
print("Numbers of Buyer:", len(set(temp['MemberId'])))

purchase = len(behaviors[behaviors["Behavior"] == 'purchase'])
add = len(behaviors[behaviors["Behavior"] == 'add'])
avgRatio = round(purchase / add, 2)
print("Average Ratio:", avgRatio)

for i in UnifiedUserIds:
    memberId = list(set(members[members["UnifiedUserId"] == i]['MemberId'].to_list()))[0]
    temp = behaviors[behaviors["MemberId"] == memberId].dropna()
    purchase = len(temp[temp["Behavior"] == 'purchase'])
    add = len(temp[temp["Behavior"] == 'add'])
    if add != 0:
        ratio = round(purchase / add, 2)
    else:
        ratio = 0
    print('MemberId: ', memberId)
    print("Season: ", 3)
    print('Purchase: ', purchase)
    print('AddToCart: ', add)
    if ratio > avgRatio:
        print('Above')
    elif ratio < avgRatio:
        print('Below')
    else:
        print('Average')
    print('=======================================================')


Numbers of Buyer: 10173
Average Ratio: 0.25
MemberId:  5Bd73SAeGSyFaSgcyBGuTO7D09mWqKXefycVfosJGgc=
Season:  3
Purchase:  1
AddToCart:  10
Below
MemberId:  ZLno3QlClWoO9qsKcpKBK7tfjCEolP80D8ddS/zGDa4=
Season:  3
Purchase:  3
AddToCart:  3
Above
MemberId:  WyLJCYsILh5wgrUtPG2x5yK44TnoxTIu3GTQpKXFkFU=
Season:  3
Purchase:  2
AddToCart:  4
Above


### Season 4

In [226]:
behaviors = season4
avgRatio = 0

temp = behaviors[behaviors["Behavior"] == 'purchase'].dropna()
print("Numbers of Buyer:", len(set(temp['MemberId'])))

purchase = len(behaviors[behaviors["Behavior"] == 'purchase'])
add = len(behaviors[behaviors["Behavior"] == 'add'])
avgRatio = round(purchase / add, 2)
print("Average Ratio:", avgRatio)

for i in UnifiedUserIds:
    memberId = list(set(members[members["UnifiedUserId"] == i]['MemberId'].to_list()))[0]
    temp = behaviors[behaviors["MemberId"] == memberId].dropna()
    purchase = len(temp[temp["Behavior"] == 'purchase'])
    add = len(temp[temp["Behavior"] == 'add'])
    if add != 0:
        ratio = round(purchase / add, 2)
    else:
        ratio = 0
    print('MemberId: ', memberId)
    print("Season: ", 4)
    print('Purchase: ', purchase)
    print('AddToCart: ', add)
    if ratio > avgRatio:
        print('Above')
    elif ratio < avgRatio:
        print('Below')
    else:
        print('Average')
    print('=======================================================')


Numbers of Buyer: 13960
Average Ratio: 0.23
MemberId:  5Bd73SAeGSyFaSgcyBGuTO7D09mWqKXefycVfosJGgc=
Season:  4
Purchase:  9
AddToCart:  48
Below
MemberId:  ZLno3QlClWoO9qsKcpKBK7tfjCEolP80D8ddS/zGDa4=
Season:  4
Purchase:  2
AddToCart:  5
Above
MemberId:  WyLJCYsILh5wgrUtPG2x5yK44TnoxTIu3GTQpKXFkFU=
Season:  4
Purchase:  2
AddToCart:  11
Below


### 人物特徵

In [243]:
ids = ['183befUhGBlsXXVQsFUqmQ==','5aRn4GBf1opsUUHmYIKYUQ==','AlyUS7LorP1qdJwahdia0w==']
display(ResultSeason1[(ResultSeason1.index == ids[0])])
display(ResultSeason1[(ResultSeason1.index == ids[1])])
display(ResultSeason1[(ResultSeason1.index == ids[2])])

Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
183befUhGBlsXXVQsFUqmQ==,30,Female,3,29,157,115,34,8,99233,穆勒鞋/贈品/鞋用配件/娃娃鞋/踝靴/短靴/鞋墊/贈品/雨靴/鞋墊/日用品/尖頭鞋/鞋墊/贈...,3421.827586,11.505226,3.0


Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,1,1,1,0,0,999,涼鞋/,999.0,6.906755,3.0
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,4,16,4,12,0,15532,贈品/涼鞋/涼鞋/涼鞋/,3883.0,9.650658,3.0
5aRn4GBf1opsUUHmYIKYUQ==,30,Female,4,3,9,6,3,0,6078,贈品/鞋用配件/凱莉包/,2026.0,8.712431,3.0


Unnamed: 0_level_0,MemberCardLevel,Gender,Age,order,orderSlave,low,normal,high,sales,SalePageId,avgSales,SalesLog,ReviseMemberCardLevel
UnifiedUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AlyUS7LorP1qdJwahdia0w==,20,Female,3,2,4,2,2,0,3160,贈品/小白鞋/,1580.0,8.058327,2.0
