# Trade 관련 feature engineering

In [1]:
import pickle
from collections import Counter
from tqdm import tqdm
from tqdm import tqdm_notebook
import networkx as nx
%matplotlib inline

# Train

In [2]:
# label = pd.read_csv("../data/train_label.csv")
label = pd.read_csv("~/documents/chaser_data/train_label.csv")
print("shape of train_label: ", label.shape)

shape of train_label:  (100000, 2)


In [3]:
%%time
# trade = pd.read_csv("../data/train_trade.csv")
trade = pd.read_csv("~/documents/chaser_data/train_trade.csv")
print("shape of trade: ", trade.shape)

shape of trade:  (10414351, 7)
CPU times: user 17.6 s, sys: 1.58 s, total: 19.2 s
Wall time: 19.4 s


In [4]:
trade.tail()

Unnamed: 0,trade_week,trade_day,trade_time,source_acc_id,target_acc_id,item_type,item_amount
10414346,8,7,23:59:56,2d4744d0aa06e506109f6cb5149bfa0e0dc64fedc0407a...,f6253e20488f460ec8168989b882979196673400fd422f...,money,-0.054743
10414347,8,7,23:59:56,b6756d6f26e07bf4cd930e0150783f25c5782461c1bcc1...,360a9b5ff44a044303212d1fc79c2f1c8bdddb10cfb79e...,money,-0.053459
10414348,8,7,23:59:56,8e591c7126550293e3be2deebecad9ff113be9a2046077...,474cecfc1e001be7eeb3d0e96971fc9e7488f2393f546e...,money,-0.012912
10414349,8,7,23:59:57,6dd7d1586fe87ccb0943422cf88dfb5064ba5bc910ab08...,e7527dcfa2761eceebfc42bb5f376736f087c1c45a4a20...,money,-0.055993
10414350,8,7,23:59:59,33f25cf2cccfb7bab1145104843b28d1ea41f5c45feba7...,9702cfc992a08c58fa58ee042e6a02ca83be99e7db2f98...,money,-0.053204


In [5]:
trade.describe(include="all").transpose()

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
trade_week,10414400.0,,,,4.89148,2.2914,1.0,3.0,5.0,7.0,8.0
trade_day,10414400.0,,,,4.24649,1.8932,1.0,3.0,4.0,6.0,7.0
trade_time,10414351.0,86324.0,21:21:37,269.0,,,,,,,
source_acc_id,10414351.0,128812.0,2e3a6a8802eb52fd469d170d80cd6875de032360c01144...,8891.0,,,,,,,
target_acc_id,10414351.0,71661.0,e7527dcfa2761eceebfc42bb5f376736f087c1c45a4a20...,879142.0,,,,,,,
item_type,10414351.0,6.0,money,5607383.0,,,,,,,
item_amount,10414400.0,,,,-0.0012433,1.01198,-0.0563459,-0.0563457,-0.0558274,-0.0544189,173.679


## 1. 총 sell cnt (판매 횟수) & buy_cnt 변수 생성

- 유저별 주는 거래가 있었던 횟수(sell_cnt), 받는 거래가 있었던 횟수의(buy_cnt)의 합을 구함

In [6]:
# sell_count 
trade1 = pd.DataFrame(trade.groupby('source_acc_id').size().reset_index(name='sell_cnt'))
trade1.rename(columns={'source_acc_id' : 'acc_id'},inplace = True)
trade_1 = pd.merge(label, trade1 , how = 'left')
trade_1 = trade_1.fillna(0)

In [7]:
# buy_count 
trade2 = pd.DataFrame(trade.groupby('target_acc_id').size().reset_index(name='buy_cnt'))
trade2.rename(columns={'target_acc_id' : 'acc_id'}, inplace = True)
trade_1 = pd.merge(trade_1, trade2 , how = 'left')
trade_1["buy_cnt"] = trade_1["buy_cnt"].fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0


## 2. week별/day별 sell cnt (판매 횟수) & buy_cnt 변수 생성

### 1) week별 판매/구매 횟수

- 각 주별(w1~w8)로 주는 거래 횟수와 받는 거래의 횟수 변수 생성

In [8]:
sell_cnt_by_week = trade.groupby(['source_acc_id', 'trade_week']).size().reset_index(name="counts")
sell_cnt_by_week = sell_cnt_by_week.pivot('source_acc_id', 'trade_week', 'counts').fillna(0)
sell_cnt_by_week.reset_index(inplace = True)
sell_cnt_by_week = sell_cnt_by_week.rename(columns={"source_acc_id": "acc_id", 1: "sell_cnt_w1", 2: "sell_cnt_w2", 3: "sell_cnt_w3", 4: "sell_cnt_w4",
                                                 5: "sell_cnt_w5", 6: "sell_cnt_w6", 7: "sell_cnt_w7", 8: "sell_cnt_w8"})

trade_1 = pd.merge(trade_1, sell_cnt_by_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,sell_cnt_w8
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,12.0,6.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,8.0,12.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,28.0,22.0


In [9]:
buy_cnt_by_week = trade.groupby(['target_acc_id', 'trade_week']).size().reset_index(name="counts")
buy_cnt_by_week = buy_cnt_by_week.pivot('target_acc_id', 'trade_week', 'counts').fillna(0)
buy_cnt_by_week.reset_index(inplace = True)
buy_cnt_by_week = buy_cnt_by_week.rename(columns={"target_acc_id": "acc_id", 1: "buy_cnt_w1", 2: "buy_cnt_w2", 3: "buy_cnt_w3", 4: "buy_cnt_w4",
                                                 5: "buy_cnt_w5", 6: "buy_cnt_w6", 7: "buy_cnt_w7", 8: "buy_cnt_w8"})

trade_1 = pd.merge(trade_1, buy_cnt_by_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,sell_cnt_w8,buy_cnt_w1,buy_cnt_w2,buy_cnt_w3,buy_cnt_w4,buy_cnt_w5,buy_cnt_w6,buy_cnt_w7,buy_cnt_w8
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,12.0,6.0,4.0,4.0,2.0,8.0,6.0,14.0,8.0,16.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,8.0,12.0,2.0,6.0,11.0,9.0,8.0,12.0,8.0,22.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,28.0,22.0,8.0,57.0,20.0,0.0,31.0,8.0,37.0,8.0


### 2) day별 판매/구매 횟수

- 각 요일별(d1~d7)로 주는 거래 횟수와 받는 거래의 횟수 변수 생성

In [10]:
sell_cnt_by_day = trade.groupby(['source_acc_id', 'trade_day']).size().reset_index(name="counts")
sell_cnt_by_day = sell_cnt_by_day.pivot('source_acc_id', 'trade_day', 'counts').fillna(0)
sell_cnt_by_day.reset_index(inplace = True)
sell_cnt_by_day = sell_cnt_by_day.rename(columns={"source_acc_id": "acc_id", 1: "sell_cnt_d1", 2: "sell_cnt_d2", 3: "sell_cnt_d3", 4: "sell_cnt_d4",
                                                 5: "sell_cnt_d5", 6: "sell_cnt_d6", 7: "sell_cnt_d7"})

trade_1 = pd.merge(trade_1, sell_cnt_by_day, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,buy_cnt_w6,buy_cnt_w7,buy_cnt_w8,sell_cnt_d1,sell_cnt_d2,sell_cnt_d3,sell_cnt_d4,sell_cnt_d5,sell_cnt_d6,sell_cnt_d7
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,14.0,8.0,16.0,8.0,4.0,25.0,6.0,8.0,12.0,6.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,12.0,8.0,22.0,14.0,19.0,23.0,2.0,40.0,21.0,20.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,8.0,37.0,8.0,25.0,36.0,30.0,18.0,38.0,0.0,44.0


In [11]:
buy_cnt_by_day = trade.groupby(['target_acc_id', 'trade_day']).size().reset_index(name="counts")
buy_cnt_by_day = buy_cnt_by_day.pivot('target_acc_id', 'trade_day', 'counts').fillna(0)
buy_cnt_by_day.reset_index(inplace = True)
buy_cnt_by_day = buy_cnt_by_day.rename(columns={"target_acc_id": "acc_id", 1: "buy_cnt_d1", 2: "buy_cnt_d2", 3: "buy_cnt_d3", 4: "buy_cnt_d4",
                                                 5: "buy_cnt_d5", 6: "buy_cnt_d6", 7: "buy_cnt_d7"})

trade_1 = pd.merge(trade_1, buy_cnt_by_day, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,sell_cnt_d5,sell_cnt_d6,sell_cnt_d7,buy_cnt_d1,buy_cnt_d2,buy_cnt_d3,buy_cnt_d4,buy_cnt_d5,buy_cnt_d6,buy_cnt_d7
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,8.0,12.0,6.0,2.0,4.0,20.0,12.0,10.0,12.0,2.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,40.0,21.0,20.0,22.0,5.0,10.0,0.0,16.0,16.0,9.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,38.0,0.0,44.0,29.0,22.0,40.0,40.0,14.0,6.0,18.0


## 3. item type별 거래 횟수 변수 생성

- 각 유저의 여섯 가지 item type별 주는 거래 / 받는 거래를 한 횟수를 합한 변수 생성

### 1) item type별 판매 횟수

In [12]:
item_sell_cnt = trade.groupby(['source_acc_id', 'item_type']).size().reset_index(name="counts")
item_sell_cnt = item_sell_cnt.pivot('source_acc_id', 'item_type', 'counts').fillna(0)
item_sell_cnt.reset_index(inplace = True)
item_sell_cnt = item_sell_cnt.rename(columns={"source_acc_id": "acc_id", "accessory": "sell_cnt_accessory","costume": "sell_cnt_costume", "gem": "sell_cnt_gem",\
                                             "grocery": "sell_cnt_grocery", "money": "sell_cnt_money", "weapon": "sell_cnt_weapon"}).reset_index()
trade_1 = pd.merge(trade_1, item_sell_cnt, how = 'left').fillna(0)
trade_1.drop("index", axis=1, inplace = True)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,buy_cnt_d4,buy_cnt_d5,buy_cnt_d6,buy_cnt_d7,sell_cnt_accessory,sell_cnt_costume,sell_cnt_gem,sell_cnt_grocery,sell_cnt_money,sell_cnt_weapon
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,12.0,10.0,12.0,2.0,0.0,0.0,0.0,18.0,51.0,0.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,0.0,16.0,16.0,9.0,2.0,5.0,0.0,29.0,101.0,2.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,40.0,14.0,6.0,18.0,0.0,0.0,0.0,4.0,187.0,0.0


### 2) item type별 구매 횟수

In [13]:
item_buy_cnt = trade.groupby(['target_acc_id', 'item_type']).size().reset_index(name="counts")
item_buy_cnt = item_buy_cnt.pivot('target_acc_id', 'item_type', 'counts').fillna(0)
item_buy_cnt.reset_index(inplace = True)
item_buy_cnt = item_buy_cnt.rename(columns={"target_acc_id": "acc_id", "accessory": "buy_cnt_accessory","costume": "buy_cnt_costume", "gem": "buy_cnt_gem",\
                                             "grocery": "buy_cnt_grocery", "money": "buy_cnt_money", "weapon": "buy_cnt_weapon"}).reset_index()
trade_1 = pd.merge(trade_1, item_buy_cnt, how = 'left').fillna(0)
trade_1.drop("index", axis=1, inplace = True)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,sell_cnt_gem,sell_cnt_grocery,sell_cnt_money,sell_cnt_weapon,buy_cnt_accessory,buy_cnt_costume,buy_cnt_gem,buy_cnt_grocery,buy_cnt_money,buy_cnt_weapon
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,0.0,18.0,51.0,0.0,0.0,0.0,0.0,22.0,40.0,0.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,0.0,29.0,101.0,2.0,0.0,0.0,2.0,4.0,70.0,2.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,0.0,4.0,187.0,0.0,0.0,0.0,0.0,4.0,165.0,0.0


## 4. item type별 거래량 (구매/판매) 변수 생성

- 각 유저의 여섯 가지 item type별 거래량을 합한 변수 생성

### imputation method
- 유저가 특정 item type의 거래를 하지 않을 경우 NaN값이 생성됨
- 원 data가 표준화된 값으로, 표준화 전 0의 표준화된 값 추정하기
    - 표준화된 값 중 최소값 a 거래량=1개인 경우, 그 다음 작은 값 b를 거래량=2인 경우라고 가정
    - 최소값 a에서 (b-a)의 차이를 빼면 거래량=0을 나타내는 값이라고 가정하고 imputation함

In [14]:
item_amount = trade.groupby(["item_amount"]).size().reset_index()
item_amount.loc[1,"item_amount"]
item_amount.loc[1,"item_amount"]

-0.0563459020164732

-0.0563459020164732

In [15]:
zero_value = item_amount.loc[0,"item_amount"] - (item_amount.loc[1,"item_amount"] - item_amount.loc[0,"item_amount"])
zero_value

-0.0563459193900082

### 1) item별 판매량 합

In [16]:
item_sell_amount = trade[["source_acc_id", "item_type","item_amount"]]
item_sell_amount = item_sell_amount.groupby(['source_acc_id','item_type']).agg("sum").reset_index().rename(columns = {"item_amount":"item_amount_sum"})
item_sell_amount = item_sell_amount.pivot("source_acc_id", "item_type", "item_amount_sum")
item_sell_amount.reset_index(inplace = True)
item_sell_amount = item_sell_amount.rename(columns={"source_acc_id": "acc_id", "accessory": "sell_amount_accessory","costume": "sell_amount_costume",
                                                    "gem": "sell_amount_gem", "grocery": "sell_amount_grocery", "money": "sell_amount_money",
                                                    "weapon": "sell_amount_weapon"}).reset_index()
item_sell_amount.drop("index", axis=1, inplace = True)
trade_1 = pd.merge(trade_1, item_sell_amount, how = 'left').fillna(zero_value)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,buy_cnt_gem,buy_cnt_grocery,buy_cnt_money,buy_cnt_weapon,sell_amount_accessory,sell_amount_costume,sell_amount_gem,sell_amount_grocery,sell_amount_money,sell_amount_weapon
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,0.0,22.0,40.0,0.0,-0.056346,-0.056346,-0.056346,-1.014215,1.226773,-0.056346
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,2.0,4.0,70.0,2.0,-0.112692,-0.28173,-0.056346,-1.634024,0.361481,-0.112692
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,0.0,4.0,165.0,0.0,-0.056346,-0.056346,-0.056346,-0.225384,5.038774,-0.056346


### 2) item별 구매량

In [17]:
item_buy_amount = trade[["target_acc_id", "item_type","item_amount"]]
item_buy_amount = item_buy_amount.groupby(['target_acc_id','item_type']).agg("sum").reset_index().rename(columns = {"item_amount":"item_amount_sum"})
item_buy_amount = item_buy_amount.pivot("target_acc_id", "item_type", "item_amount_sum")
item_buy_amount.reset_index(inplace = True)
item_buy_amount = item_buy_amount.rename(columns={"target_acc_id": "acc_id", "accessory": "buy_amount_accessory","costume": "buy_amount_costume",
                                                    "gem": "buy_amount_gem", "grocery": "buy_amount_grocery", "money": "buy_amount_money",
                                                    "weapon": "buy_amount_weapon"}).reset_index()
item_buy_amount.drop("index", axis=1, inplace = True)

trade_1 = pd.merge(trade_1, item_buy_amount, how = 'left').fillna(zero_value)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,sell_amount_gem,sell_amount_grocery,sell_amount_money,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,-0.056346,-1.014215,1.226773,-0.056346,-0.056346,-0.056346,-0.056346,-1.239601,2.138367,-0.056346
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,-0.056346,-1.634024,0.361481,-0.112692,-0.056346,-0.056346,-0.112692,-0.225384,3.191125,-0.112692
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,-0.056346,-0.225384,5.038774,-0.056346,-0.056346,-0.056346,-0.056346,-0.225384,-5.746013,-0.056346


## 5. 판매/구매가 있었던 첫 주/마지막 주 변수 생성

- 1-8주 중에 유저가 처음으로 판매/구매를 한 주가 언제인지, 마지막으로 판매/구매를 한 주가 언제인지 나타내는 변수 생성

In [18]:
first_sell_week = trade[["source_acc_id","trade_week"]].groupby('source_acc_id').agg(min).reset_index().rename(columns={"source_acc_id":"acc_id", "trade_week": "sell_1st_week"})

trade_1 = pd.merge(trade_1, first_sell_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,sell_amount_grocery,sell_amount_money,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,-1.014215,1.226773,-0.056346,-0.056346,-0.056346,-0.056346,-1.239601,2.138367,-0.056346,1.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,-1.634024,0.361481,-0.112692,-0.056346,-0.056346,-0.112692,-0.225384,3.191125,-0.112692,1.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,-0.225384,5.038774,-0.056346,-0.056346,-0.056346,-0.056346,-0.225384,-5.746013,-0.056346,1.0


In [19]:
last_sell_week = trade[["source_acc_id","trade_week"]].groupby('source_acc_id').agg(max).reset_index().rename(columns={"source_acc_id":"acc_id", "trade_week": "sell_last_week"})

trade_1 = pd.merge(trade_1, last_sell_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,sell_amount_money,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,1.226773,-0.056346,-0.056346,-0.056346,-0.056346,-1.239601,2.138367,-0.056346,1.0,8.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,0.361481,-0.112692,-0.056346,-0.056346,-0.112692,-0.225384,3.191125,-0.112692,1.0,8.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,5.038774,-0.056346,-0.056346,-0.056346,-0.056346,-0.225384,-5.746013,-0.056346,1.0,8.0


In [20]:
first_buy_week = trade[["target_acc_id","trade_week"]].groupby('target_acc_id').agg(min).reset_index().rename(columns={"target_acc_id":"acc_id", "trade_week": "buy_1st_week"})

trade_1 = pd.merge(trade_1, first_buy_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-1.239601,2.138367,-0.056346,1.0,8.0,1.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,-0.112692,-0.056346,-0.056346,-0.112692,-0.225384,3.191125,-0.112692,1.0,8.0,1.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.225384,-5.746013,-0.056346,1.0,8.0,1.0


In [21]:
last_buy_week = trade[["target_acc_id","trade_week"]].groupby('target_acc_id').agg(max).reset_index().rename(columns={"target_acc_id":"acc_id", "trade_week": "buy_last_week"})

trade_1 = pd.merge(trade_1, last_buy_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,label,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,...,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week,buy_last_week
99995,da6d33b03968d8e35821f6eb88ad22e12e37aa8867084e...,retained,69.0,62.0,8.0,4.0,8.0,10.0,9.0,12.0,...,-0.056346,-0.056346,-0.056346,-1.239601,2.138367,-0.056346,1.0,8.0,1.0,8.0
99996,676c944f4b6ae63818b3cad824a61233690f16a2275d5d...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0,0.0
99997,695e1f28e234fc4cc53085e332fa7a76d7895ca4cc745b...,retained,139.0,78.0,8.0,8.0,17.0,31.0,35.0,20.0,...,-0.056346,-0.056346,-0.112692,-0.225384,3.191125,-0.112692,1.0,8.0,1.0,8.0
99998,0c87fabaad5542e533f958a1d6fd739993b94e95e00989...,retained,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0,0.0
99999,47ff575cb94019df5695c5d81ec285b0d801607b2a8697...,retained,191.0,169.0,24.0,60.0,6.0,0.0,27.0,24.0,...,-0.056346,-0.056346,-0.056346,-0.225384,-5.746013,-0.056346,1.0,8.0,1.0,8.0


## 6. indegree/outdegree centrality 변수 생성

- 두 유저 사이의 거래 관계를 네트워크로 구성하여 trade network 상에서 유저의 중심성을 계산하여 변수 생성
    - networkx 주는 거래와 받는 거래의 방향성을 고려한 DiGraph()로 네트워크 구성
    - indegree와 outdegree centrality를 계산함

### 6.1 trade network 구성하기

In [68]:
a = trade["source_acc_id"].tolist()
b = trade["target_acc_id"].tolist()
# w = df_party_id["item_amount"].tolist()

# a, b, w의 list 받기
relations = list(zip(a, b))
print("relations의 길이:", len(relations))

relations의 길이: 10414351


In [69]:
len(list(set(relations)))

508928

In [70]:
%%time
relations_cnt = dict(Counter(relations))

CPU times: user 3.83 s, sys: 132 ms, total: 3.96 s
Wall time: 3.96 s


In [71]:
type(relations_cnt), len(relations_cnt)

dict

508928

In [72]:
G = nx.DiGraph()

In [73]:
def get_network(ls):
    ids = list(ls.keys())
    cnts = list(ls.values())
    for i in range(len(ls)):
        G.add_edge(ids[i][0], ids[i][1], weight = cnts[i])

In [74]:
get_network(relations_cnt)

In [75]:
# nx.draw(G)

### 6.2 network 크기 확인
- 만들어진 network의 node수는 source_acc_id와 target_acc_id의 합집합과 같다

In [76]:
len(G.nodes())

135975

In [77]:
source_ids = trade["source_acc_id"].tolist()
source_ids = list(set(source_ids))
len(source_ids)

128812

In [78]:
target_ids = trade["target_acc_id"].tolist()
target_ids = list(set(target_ids))
len(target_ids)

71661

In [79]:
len(list(set(source_ids) | set(target_ids)))

135975

### 6.3 indegree, outdegree centrality 구하기

In [82]:
indegree_centrality = nx.in_degree_centrality(G)
type(indegree_centrality)

dict

In [83]:
outdegree_centrality = nx.out_degree_centrality(G)
type(outdegree_centrality)

dict

In [84]:
in_cent_df = pd.DataFrame(columns=["acc_id","indegree_cent"])
in_cent_df

Unnamed: 0,acc_id,indegree_cent


In [85]:
in_cent_df["acc_id"] = indegree_centrality.keys()

In [86]:
in_cent_df["indegree_cent"] = indegree_centrality.values()

In [88]:
out_cent_df = pd.DataFrame(columns=["acc_id","outdegree_cent"])
out_cent_df

Unnamed: 0,acc_id,outdegree_cent


In [89]:
out_cent_df["acc_id"] = outdegree_centrality.keys()

In [90]:
out_cent_df["outdegree_cent"] = outdegree_centrality.values()

In [92]:
degree_cent = pd.merge(in_cent_df, out_cent_df, on="acc_id")

In [23]:
trade_1 = pd.merge(trade_1, degree_cent, how = 'left').fillna(0)

## 7. 결과 dataframe

In [26]:
print(trade_1.shape)
trade_1.describe()

(100000, 64)


Unnamed: 0,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,sell_cnt_w8,...,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week,buy_last_week,indegree_cent,outdegree_cent
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,60.31136,66.34435,5.4105,6.12349,6.38624,7.81033,7.55417,7.57361,9.11589,10.33713,...,-0.056548,-1.770449,1.503164,-0.056469,1.18847,2.84933,1.08452,1.84611,2.4e-05,2.1e-05
std,203.623,4945.677474,23.671034,23.847662,23.547098,30.748189,30.252494,27.505742,33.739486,38.73816,...,0.005007,139.509052,145.237642,0.007853,2.192532,3.70793,2.327207,3.206009,0.000513,0.000169
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.788843,-24762.484315,-24157.831901,-2.253832,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0,0.0,0.0,0.0
75%,22.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,...,-0.056346,-0.056346,-0.056346,-0.056346,1.0,8.0,1.0,4.0,7e-06,1.5e-05
max,8891.0,879142.0,528.0,680.0,632.0,2382.0,2364.0,1518.0,2022.0,3812.0,...,-0.056346,-0.056268,6537.758361,-0.056346,8.0,8.0,8.0,8.0,0.109006,0.020151


In [27]:
trade_1.to_csv("../data/train_trade.csv", index = False)

In [28]:
train_trade = pd.read_csv("../data/train_trade.csv")
train_trade.shape

(100000, 64)

In [23]:
pickle.dump(trade_1,open('../data/train_trade.pkl','wb'))

In [48]:
train_trade = pickle.load(open('../data/train_trade.pkl','rb'))

In [None]:
train_trade.tail()

---

# Test

In [29]:
# activity = pd.read_csv('../data/test_activity.csv')
activity = pd.read_csv('~/documents/chaser_data/test_activity.csv')
label = pd.DataFrame(list(activity['acc_id'].unique()))
label.columns = ['acc_id']

In [30]:
%%time
# trade = pd.read_csv("../data/test_trade.csv")
trade = pd.read_csv("~/documents/chaser_data/test_trade.csv")
print("shape of trade: ", trade.shape)

shape of trade:  (3873536, 7)
CPU times: user 6.75 s, sys: 717 ms, total: 7.46 s
Wall time: 7.71 s


In [31]:
trade.tail()

Unnamed: 0,trade_week,trade_day,trade_time,source_acc_id,target_acc_id,item_type,item_amount
3873531,8,7,23:59:41,302cd23b1b19a4f66bcb5bbf30b8ef2866c28389234793...,e28227a599bb3af537fbfa3c72cdaed47e58a5f44b1eb1...,money,-0.055475
3873532,8,7,23:59:48,20c3d7de620ed0e827fedb6e4a01c402490c4ee8544557...,f750f35d85a22ddfefa04555272afc2fb30b30000ccb46...,money,-0.054851
3873533,8,7,23:59:56,6cec8e08868bf98099ba4c1042c68c3bf1359cdff26994...,4f330d038e21881150d7541b69c314cee27d8dc5ae8c1c...,money,-0.055697
3873534,8,7,23:59:56,a33bcd58160e7f65331de4fc4733113de3173aad24023a...,5c5e84f844f1bd753ae5012d8f2cddd1d1b11f0866958b...,money,0.046419
3873535,8,7,23:59:59,3bb2207ea010267220ec18f402dcc34148b4c72e614f1b...,6eb15c3113fdbc1a677e6660682cf0fcbc78696d2ba6ca...,money,-0.054743


In [32]:
trade.describe(include="all").transpose()

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
trade_week,3873540.0,,,,4.85674,2.29353,1.0,3.0,5.0,7.0,8.0
trade_day,3873540.0,,,,4.26195,1.90731,1.0,3.0,4.0,6.0,7.0
trade_time,3873536.0,85938.0,23:07:38,130.0,,,,,,,
source_acc_id,3873536.0,57038.0,76d73b6ae5c1ebc1a59524c42d49ae1cbd0ad5fd0c48ac...,11229.0,,,,,,,
target_acc_id,3873536.0,43031.0,0e97849a87fc553efa12efc2c7bacb690575c1f610527f...,322922.0,,,,,,,
item_type,3873536.0,6.0,money,2128294.0,,,,,,,
item_amount,3873540.0,,,,0.00683354,0.982165,-0.0563459,-0.0563457,-0.055702,-0.0538511,138.932


## 1. 총 sell cnt (판매 횟수) & buy_cnt 변수 생성

- 유저별 주는 거래가 있었던 횟수(sell_cnt), 받는 거래가 있었던 횟수의(buy_cnt)의 합을 구함

In [33]:
# sell_count 
trade1 = pd.DataFrame(trade.groupby('source_acc_id').size().reset_index(name='sell_cnt'))
trade1.rename(columns={'source_acc_id' : 'acc_id'},inplace = True)
trade_1 = pd.merge(label, trade1 , how = 'left')
trade_1 = trade_1.fillna(0)

In [34]:
# buy_count 
trade2 = pd.DataFrame(trade.groupby('target_acc_id').size().reset_index(name='buy_cnt'))
trade2.rename(columns={'target_acc_id' : 'acc_id'}, inplace = True)
trade_1 = pd.merge(trade_1, trade2 , how = 'left')
trade_1["buy_cnt"] = trade_1["buy_cnt"].fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0


## 2. week별/day별 sell cnt (판매 횟수) & buy_cnt 변수 생성

### 1) week별 판매/구매 횟수

- 각 주별(w1~w8)로 주는 거래 횟수와 받는 거래의 횟수 변수 생성

In [35]:
sell_cnt_by_week = trade.groupby(['source_acc_id', 'trade_week']).size().reset_index(name="counts")
sell_cnt_by_week = sell_cnt_by_week.pivot('source_acc_id', 'trade_week', 'counts').fillna(0)
sell_cnt_by_week.reset_index(inplace = True)
sell_cnt_by_week = sell_cnt_by_week.rename(columns={"source_acc_id": "acc_id", 1: "sell_cnt_w1", 2: "sell_cnt_w2", 3: "sell_cnt_w3", 4: "sell_cnt_w4",
                                                 5: "sell_cnt_w5", 6: "sell_cnt_w6", 7: "sell_cnt_w7", 8: "sell_cnt_w8"})

trade_1 = pd.merge(trade_1, sell_cnt_by_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,sell_cnt_w8
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,16.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,58.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,0.0


In [36]:
buy_cnt_by_week = trade.groupby(['target_acc_id', 'trade_week']).size().reset_index(name="counts")
buy_cnt_by_week = buy_cnt_by_week.pivot('target_acc_id', 'trade_week', 'counts').fillna(0)
buy_cnt_by_week.reset_index(inplace = True)
buy_cnt_by_week = buy_cnt_by_week.rename(columns={"target_acc_id": "acc_id", 1: "buy_cnt_w1", 2: "buy_cnt_w2", 3: "buy_cnt_w3", 4: "buy_cnt_w4",
                                                 5: "buy_cnt_w5", 6: "buy_cnt_w6", 7: "buy_cnt_w7", 8: "buy_cnt_w8"})

trade_1 = pd.merge(trade_1, buy_cnt_by_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,sell_cnt_w8,buy_cnt_w1,buy_cnt_w2,buy_cnt_w3,buy_cnt_w4,buy_cnt_w5,buy_cnt_w6,buy_cnt_w7,buy_cnt_w8
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,58.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0


### 2) day별 판매/구매 횟수

- 각 요일별(d1~d7)로 주는 거래 횟수와 받는 거래의 횟수 변수 생성

In [37]:
sell_cnt_by_day = trade.groupby(['source_acc_id', 'trade_day']).size().reset_index(name="counts")
sell_cnt_by_day = sell_cnt_by_day.pivot('source_acc_id', 'trade_day', 'counts').fillna(0)
sell_cnt_by_day.reset_index(inplace = True)
sell_cnt_by_day = sell_cnt_by_day.rename(columns={"source_acc_id": "acc_id", 1: "sell_cnt_d1", 2: "sell_cnt_d2", 3: "sell_cnt_d3", 4: "sell_cnt_d4",
                                                 5: "sell_cnt_d5", 6: "sell_cnt_d6", 7: "sell_cnt_d7"})

trade_1 = pd.merge(trade_1, sell_cnt_by_day, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,buy_cnt_w6,buy_cnt_w7,buy_cnt_w8,sell_cnt_d1,sell_cnt_d2,sell_cnt_d3,sell_cnt_d4,sell_cnt_d5,sell_cnt_d6,sell_cnt_d7
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,0.0,0.0,0.0,18.0,16.0,16.0,16.0,20.0,24.0,8.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,0.0,0.0,0.0,28.0,54.0,66.0,64.0,64.0,54.0,28.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,4.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,0.0,0.0,0.0,6.0,6.0,0.0,0.0,6.0,0.0,0.0


In [38]:
buy_cnt_by_day = trade.groupby(['target_acc_id', 'trade_day']).size().reset_index(name="counts")
buy_cnt_by_day = buy_cnt_by_day.pivot('target_acc_id', 'trade_day', 'counts').fillna(0)
buy_cnt_by_day.reset_index(inplace = True)
buy_cnt_by_day = buy_cnt_by_day.rename(columns={"target_acc_id": "acc_id", 1: "buy_cnt_d1", 2: "buy_cnt_d2", 3: "buy_cnt_d3", 4: "buy_cnt_d4",
                                                 5: "buy_cnt_d5", 6: "buy_cnt_d6", 7: "buy_cnt_d7"})

trade_1 = pd.merge(trade_1, buy_cnt_by_day, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,sell_cnt_d5,sell_cnt_d6,sell_cnt_d7,buy_cnt_d1,buy_cnt_d2,buy_cnt_d3,buy_cnt_d4,buy_cnt_d5,buy_cnt_d6,buy_cnt_d7
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,20.0,24.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,64.0,54.0,28.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,6.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0


## 3. item type별 거래 횟수 변수 생성

- 각 유저의 여섯 가지 item type별 주는 거래 / 받는 거래를 한 횟수를 합한 변수 생성

### 1) item type별 판매 횟수

In [39]:
item_sell_cnt = trade.groupby(['source_acc_id', 'item_type']).size().reset_index(name="counts")
item_sell_cnt = item_sell_cnt.pivot('source_acc_id', 'item_type', 'counts').fillna(0)
item_sell_cnt.reset_index(inplace = True)
item_sell_cnt = item_sell_cnt.rename(columns={"source_acc_id": "acc_id", "accessory": "sell_cnt_accessory","costume": "sell_cnt_costume", "gem": "sell_cnt_gem",\
                                             "grocery": "sell_cnt_grocery", "money": "sell_cnt_money", "weapon": "sell_cnt_weapon"}).reset_index()
trade_1 = pd.merge(trade_1, item_sell_cnt, how = 'left').fillna(0)
trade_1.drop("index", axis=1, inplace = True)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,buy_cnt_d4,buy_cnt_d5,buy_cnt_d6,buy_cnt_d7,sell_cnt_accessory,sell_cnt_costume,sell_cnt_gem,sell_cnt_grocery,sell_cnt_money,sell_cnt_weapon
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,2.0,0.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,58.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,138.0,220.0,0.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0


### 2) item type별 구매 횟수

In [40]:
item_buy_cnt = trade.groupby(['target_acc_id', 'item_type']).size().reset_index(name="counts")
item_buy_cnt = item_buy_cnt.pivot('target_acc_id', 'item_type', 'counts').fillna(0)
item_buy_cnt.reset_index(inplace = True)
item_buy_cnt = item_buy_cnt.rename(columns={"target_acc_id": "acc_id", "accessory": "buy_cnt_accessory","costume": "buy_cnt_costume", "gem": "buy_cnt_gem",\
                                             "grocery": "buy_cnt_grocery", "money": "buy_cnt_money", "weapon": "buy_cnt_weapon"}).reset_index()
trade_1 = pd.merge(trade_1, item_buy_cnt, how = 'left').fillna(0)
trade_1.drop("index", axis=1, inplace = True)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,sell_cnt_gem,sell_cnt_grocery,sell_cnt_money,sell_cnt_weapon,buy_cnt_accessory,buy_cnt_costume,buy_cnt_gem,buy_cnt_grocery,buy_cnt_money,buy_cnt_weapon
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,0.0,60.0,58.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,0.0,138.0,220.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,5.0,0.0,0.0,2.0,0.0,0.0,4.0,0.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0


## 4. item type별 거래량 (구매/판매) 변수 생성

- 각 유저의 여섯 가지 item type별 거래량을 합한 변수 생성

### imputation method
- 유저가 특정 item type의 거래를 하지 않을 경우 NaN값이 생성됨
- 원 data가 표준화된 값으로, 표준화 전 0의 표준화된 값 추정하기
    - 표준화된 값 중 최소값 a 거래량=1개인 경우, 그 다음 작은 값 b를 거래량=2인 경우라고 가정
    - 최소값 a에서 (b-a)의 차이를 빼면 거래량=0을 나타내는 값이라고 가정하고 imputation함

In [41]:
item_amount = trade.groupby(["item_amount"]).size().reset_index()
item_amount.loc[1,"item_amount"]
item_amount.loc[1,"item_amount"]

-0.0563459020164732

-0.0563459020164732

In [42]:
zero_value = item_amount.loc[0,"item_amount"] - (item_amount.loc[1,"item_amount"] - item_amount.loc[0,"item_amount"])
zero_value

-0.0563459193900082

### 1) item별 판매량 합

In [43]:
item_sell_amount = trade[["source_acc_id", "item_type","item_amount"]]
item_sell_amount = item_sell_amount.groupby(['source_acc_id','item_type']).agg("sum").reset_index().rename(columns = {"item_amount":"item_amount_sum"})
item_sell_amount = item_sell_amount.pivot("source_acc_id", "item_type", "item_amount_sum")
item_sell_amount.reset_index(inplace = True)
item_sell_amount = item_sell_amount.rename(columns={"source_acc_id": "acc_id", "accessory": "sell_amount_accessory","costume": "sell_amount_costume",
                                                    "gem": "sell_amount_gem", "grocery": "sell_amount_grocery", "money": "sell_amount_money",
                                                    "weapon": "sell_amount_weapon"}).reset_index()
item_sell_amount.drop("index", axis=1, inplace = True)
trade_1 = pd.merge(trade_1, item_sell_amount, how = 'left').fillna(zero_value)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,buy_cnt_gem,buy_cnt_grocery,buy_cnt_money,buy_cnt_weapon,sell_amount_accessory,sell_amount_costume,sell_amount_gem,sell_amount_grocery,sell_amount_money,sell_amount_weapon
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,-0.056346,-0.056346,-0.056346,-0.112687,-0.095355,-0.056346
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,0.0,0.0,0.0,0.0,-0.056346,-0.056346,-0.056346,-3.380744,-3.231587,-0.056346
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,0.0,2.0,2.0,0.0,-0.056346,-0.056346,-0.056346,-7.775691,-12.134071,-0.056346
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,4.0,0.0,-0.056346,-0.056346,-0.056346,-0.056346,2.671771,-0.056346
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,0.0,0.0,2.0,0.0,-0.056346,-0.056346,-0.056346,-0.056346,2.221594,-0.056346


### 2) item별 구매량

In [44]:
item_buy_amount = trade[["target_acc_id", "item_type","item_amount"]]
item_buy_amount = item_buy_amount.groupby(['target_acc_id','item_type']).agg("sum").reset_index().rename(columns = {"item_amount":"item_amount_sum"})
item_buy_amount = item_buy_amount.pivot("target_acc_id", "item_type", "item_amount_sum")
item_buy_amount.reset_index(inplace = True)
item_buy_amount = item_buy_amount.rename(columns={"target_acc_id": "acc_id", "accessory": "buy_amount_accessory","costume": "buy_amount_costume",
                                                    "gem": "buy_amount_gem", "grocery": "buy_amount_grocery", "money": "buy_amount_money",
                                                    "weapon": "buy_amount_weapon"}).reset_index()
item_buy_amount.drop("index", axis=1, inplace = True)

trade_1 = pd.merge(trade_1, item_buy_amount, how = 'left').fillna(zero_value)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,sell_amount_gem,sell_amount_grocery,sell_amount_money,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.112687,-0.095355,-0.056346,-0.056346,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,-0.056346,-3.380744,-3.231587,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,-0.056346,-7.775691,-12.134071,-0.056346,-0.056346,-0.056346,-0.056346,-0.11269,-0.112171,-0.056346
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,-0.056346,-0.056346,2.671771,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,0.990764,-0.056346
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,-0.056346,-0.056346,2.221594,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,27.684964,-0.056346


## 5. 판매/구매가 있었던 첫 주/마지막 주 변수 생성

- 1-8주 중에 유저가 처음으로 판매/구매를 한 주가 언제인지, 마지막으로 판매/구매를 한 주가 언제인지 나타내는 변수 생성

In [45]:
first_sell_week = trade[["source_acc_id","trade_week"]].groupby('source_acc_id').agg(min).reset_index().rename(columns={"source_acc_id":"acc_id", "trade_week": "sell_1st_week"})

trade_1 = pd.merge(trade_1, first_sell_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,sell_amount_grocery,sell_amount_money,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.112687,-0.095355,-0.056346,-0.056346,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,8.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,-3.380744,-3.231587,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,4.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,-7.775691,-12.134071,-0.056346,-0.056346,-0.056346,-0.056346,-0.11269,-0.112171,-0.056346,4.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,-0.056346,2.671771,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,0.990764,-0.056346,1.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,-0.056346,2.221594,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,27.684964,-0.056346,2.0


In [46]:
last_sell_week = trade[["source_acc_id","trade_week"]].groupby('source_acc_id').agg(max).reset_index().rename(columns={"source_acc_id":"acc_id", "trade_week": "sell_last_week"})

trade_1 = pd.merge(trade_1, last_sell_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,sell_amount_money,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.095355,-0.056346,-0.056346,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,8.0,8.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,-3.231587,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,4.0,8.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,-12.134071,-0.056346,-0.056346,-0.056346,-0.056346,-0.11269,-0.112171,-0.056346,4.0,8.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,2.671771,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,0.990764,-0.056346,1.0,7.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,2.221594,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,27.684964,-0.056346,2.0,7.0


In [47]:
first_buy_week = trade[["target_acc_id","trade_week"]].groupby('target_acc_id').agg(min).reset_index().rename(columns={"target_acc_id":"acc_id", "trade_week": "buy_1st_week"})

trade_1 = pd.merge(trade_1, first_buy_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,sell_amount_weapon,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,8.0,8.0,8.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,4.0,8.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.11269,-0.112171,-0.056346,4.0,8.0,5.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,0.990764,-0.056346,1.0,7.0,1.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,27.684964,-0.056346,2.0,7.0,2.0


In [48]:
last_buy_week = trade[["target_acc_id","trade_week"]].groupby('target_acc_id').agg(max).reset_index().rename(columns={"target_acc_id":"acc_id", "trade_week": "buy_last_week"})

trade_1 = pd.merge(trade_1, last_buy_week, how = 'left').fillna(0)
trade_1.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,buy_amount_accessory,buy_amount_costume,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week,buy_last_week
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.112692,-0.056346,-0.056346,8.0,8.0,8.0,8.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,-0.056346,4.0,8.0,0.0,0.0
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,-0.056346,-0.056346,-0.056346,-0.11269,-0.112171,-0.056346,4.0,8.0,5.0,5.0
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,-0.056346,-0.112692,-0.056346,-0.056346,0.990764,-0.056346,1.0,7.0,1.0,8.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,-0.056346,-0.056346,-0.056346,-0.056346,27.684964,-0.056346,2.0,7.0,2.0,2.0


## 6. indegree/outdegree centrality 변수 생성

- 두 유저 사이의 거래 관계를 네트워크로 구성하여 trade network 상에서 유저의 중심성을 계산하여 변수 생성
    - networkx 주는 거래와 받는 거래의 방향성을 고려한 DiGraph()로 네트워크 구성
    - indegree와 outdegree centrality를 계산함

### 6.1 trade network 구성하기

In [8]:
a = trade["source_acc_id"].tolist()
b = trade["target_acc_id"].tolist()
# w = df_party_id["item_amount"].tolist()

# a, b, w의 list 받기
relations = list(zip(a, b))
print("relations의 길이:", len(relations))

relations의 길이: 3873536


In [9]:
len(list(set(relations)))

210979

In [10]:
%%time
relations_cnt = dict(Counter(relations))

CPU times: user 1.24 s, sys: 23 ms, total: 1.27 s
Wall time: 1.26 s


In [11]:
type(relations_cnt)
len(relations_cnt)

dict

210979

In [12]:
G = nx.DiGraph()

In [13]:
def get_network(ls):
    ids = list(ls.keys())
    cnts = list(ls.values())
    for i in range(len(ls)):
        G.add_edge(ids[i][0], ids[i][1], weight = cnts[i])

In [14]:
get_network(relations_cnt)

In [17]:
# nx.draw(G)

### 6.2 network 크기 확인
- 만들어진 network의 node수는 source_acc_id와 target_acc_id의 합집합과 같다

In [15]:
len(G.nodes())

62360

In [16]:
source_ids = trade["source_acc_id"].tolist()
source_ids = list(set(source_ids))
len(source_ids)

57038

In [17]:
target_ids = trade["target_acc_id"].tolist()
target_ids = list(set(target_ids))
len(target_ids)

43031

In [18]:
len(list(set(source_ids) | set(target_ids)))

62360

### 6.3 indegree, outdegree centrality 구하기

In [82]:
indegree_centrality = nx.in_degree_centrality(G)
type(indegree_centrality)

dict

In [83]:
outdegree_centrality = nx.out_degree_centrality(G)
type(outdegree_centrality)

dict

In [84]:
in_cent_df = pd.DataFrame(columns=["acc_id","indegree_cent"])
in_cent_df

Unnamed: 0,acc_id,indegree_cent


In [85]:
in_cent_df["acc_id"] = indegree_centrality.keys()

In [86]:
in_cent_df["indegree_cent"] = indegree_centrality.values()

In [88]:
out_cent_df = pd.DataFrame(columns=["acc_id","outdegree_cent"])
out_cent_df

Unnamed: 0,acc_id,outdegree_cent


In [89]:
out_cent_df["acc_id"] = outdegree_centrality.keys()

In [90]:
out_cent_df["outdegree_cent"] = outdegree_centrality.values()

In [92]:
degree_cent = pd.merge(in_cent_df, out_cent_df, on="acc_id")

In [23]:
trade_1 = pd.merge(trade_1, degree_cent, how = 'left').fillna(0)

## 7. 결과 dataframe

In [51]:
print(trade_1.shape)
trade_1.describe()

(40000, 63)


Unnamed: 0,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,sell_cnt_w8,...,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week,buy_last_week,indegree_cent,outdegree_cent
count,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,...,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0
mean,60.81855,42.424675,5.441475,6.20405,6.36015,7.717125,7.542475,7.71365,9.34755,10.492075,...,-0.056523,-1.088413,1.931054,-0.056437,1.205275,2.882825,1.0943,1.8629,7e-06,7e-06
std,210.080216,2598.366364,24.460543,25.374509,24.20723,30.138617,29.982102,28.337581,35.317115,40.012127,...,0.003604,73.046053,94.605369,0.002858,2.20639,3.717915,2.33434,3.21813,4e-05,3.7e-05
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.338075,-9081.164605,-8805.945196,-0.225384,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.056346,-0.056346,-0.056346,0.0,0.0,0.0,0.0,0.0,0.0
75%,22.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,...,-0.056346,-0.056346,-0.056346,-0.056346,1.0,8.0,1.0,4.0,0.0,0.0
max,11229.0,322922.0,1458.0,1620.0,1460.0,1254.0,1424.0,1311.0,2032.0,3738.0,...,-0.056346,-0.056287,11146.427401,-0.056346,8.0,8.0,8.0,8.0,0.002964,0.002964


In [52]:
trade_1.to_csv("../data/test_trade.csv", index = False)

In [53]:
pickle.dump(trade_1,open('../data/test_trade.pkl','wb'))

In [54]:
test_trade = pickle.load(open('../data/test_trade.pkl','rb'))

In [55]:
test_trade.tail()

Unnamed: 0,acc_id,sell_cnt,buy_cnt,sell_cnt_w1,sell_cnt_w2,sell_cnt_w3,sell_cnt_w4,sell_cnt_w5,sell_cnt_w6,sell_cnt_w7,...,buy_amount_gem,buy_amount_grocery,buy_amount_money,buy_amount_weapon,sell_1st_week,sell_last_week,buy_1st_week,buy_last_week,indegree_cent,outdegree_cent
39995,d048b24f5205a50916339d24fc9f7b1c69352d2ee10f1c...,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.056346,-0.112692,-0.056346,-0.056346,8.0,8.0,8.0,8.0,0.0,0.0
39996,ad5b3ef19e64aceb9ffea55310607ba62146218e616a83...,118.0,0.0,0.0,0.0,0.0,16.0,28.0,32.0,26.0,...,-0.056346,-0.056346,-0.056346,-0.056346,4.0,8.0,0.0,0.0,0.0,1.5e-05
39997,972114a3bb57c377d162fb66b9198a2eec40de9576af51...,358.0,4.0,0.0,0.0,0.0,4.0,0.0,138.0,158.0,...,-0.056346,-0.11269,-0.112171,-0.056346,4.0,8.0,5.0,5.0,0.0,1.5e-05
39998,3d10d680df3d5fa2e370997c18274bad7355e3d0ff9fb0...,5.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,...,-0.056346,-0.056346,0.990764,-0.056346,1.0,7.0,1.0,8.0,7e-06,0.0
39999,813ad8039e2692aecc1b7875f3a9b1e951477937ec30bd...,18.0,2.0,0.0,2.0,4.0,0.0,0.0,6.0,6.0,...,-0.056346,-0.056346,27.684964,-0.056346,2.0,7.0,2.0,2.0,0.0,0.0
