In [48]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn

### Personal + Behavior

In [174]:
df = pd.read_excel('personal_and_behavioral.xlsx')
ans = np.array((df['REPURCHASE']))

In [175]:
X_train, X_test, y_train, y_test = train_test_split(df, ans, test_size=0.2, random_state=10)

In [176]:
train_majority = X_train[X_train.REPURCHASE==0] #68262
train_minority = X_train[X_train.REPURCHASE==1] #8064

In [177]:
def bootstrap(train_majority, train_minority, seed, n_samples=8064):
    train_majority_downsample = resample(train_majority, replace=False,n_samples=n_samples,random_state=seed)
    train_downsample = pd.concat([train_majority_downsample, train_minority])
    ans = np.array(train_downsample['REPURCHASE'])
    train_downsample = train_downsample.drop(['REPURCHASE'], axis=1)
    return np.array(train_downsample), ans

class build_dataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = torch.LongTensor(y)
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
    
class model(nn.Module):
    def __init__(self, dimension):
        super().__init__()
        
        self.fc = nn.Sequential(
            nn.Linear(dimension, 20),
            nn.BatchNorm1d(20),
            nn.ReLU(),
            
            nn.Linear(20,10),
            nn.BatchNorm1d(10),
            nn.ReLU(),
            
            nn.Linear(10,2),
            nn.Softmax()
        )
        
    def forward(self, x):
        output = self.fc(x)
        return output

In [178]:
#建立dataloader

valid_dataset = build_dataset(np.array(X_test.drop(['REPURCHASE'], axis=1)), y_test)
valid_dataloader = DataLoader(
    dataset = valid_dataset,
    batch_size = 100,
    shuffle = False,
    num_workers = 2
)

seed = [1,2,3,4,5,6,7,8,9,10]
dataloader_list = []
for i in seed:
    data_train, label_train = bootstrap(train_majority, train_minority, i, n_samples=16128)
    repurchase_dataset = build_dataset(data_train, label_train)
    repurchase_dataloader = DataLoader(
    dataset = repurchase_dataset,
    batch_size = 100,
    shuffle = True,
    num_workers = 2
    )
    dataloader_list.append(repurchase_dataloader)
    print('finish')


finish
finish
finish
finish
finish
finish
finish
finish
finish
finish


In [179]:
total_train_loss = []
total_valid_loss = []
number_of_bootstrap = 0

# model setting
dimension = data_train.shape[1]
classifier = model(dimension).cuda()

# parameters
lr = 0.01
num_epochs = 30
CrossEntropy = nn.CrossEntropyLoss()
optim = torch.optim.Adam(classifier.parameters(), lr = lr)

for repurchase_dataloader in dataloader_list:
    number_of_bootstrap += 1
    for epoch in range(num_epochs):

        train_acc = 0
        train_loss = 0
        valid_acc = 0
        valid_loss = 0
        train_counter = 0
        valid_counter = 0

        classifier.train()
        for i, data in enumerate(repurchase_dataloader):

            optim.zero_grad()
            pred = classifier(data[0].float().cuda())
            loss = CrossEntropy(pred, data[1].cuda())
            loss.backward()
            optim.step()

            total_train_loss.append(loss.item())
            train_acc += np.sum(np.argmax(pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += loss.item()


        #evaluation on valid set
        classifier.eval()
        with torch.no_grad():
            for i, data in enumerate(valid_dataloader):
                pred = classifier(data[0].float().cuda())
                loss = CrossEntropy(pred, data[1].cuda())

                total_valid_loss.append(loss.item())
                valid_acc += np.sum(np.argmax(pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                valid_loss += loss.item()

        print("Bootstrap: %02d | Epoch: %02d | train_acc: %6.6f train_loss: %6.6f | valid_acc: %6.6f valid_loss: %6.6f" % \
              (number_of_bootstrap, epoch+1, train_acc/repurchase_dataset.__len__(), train_loss/repurchase_dataset.__len__(), \
               valid_acc/valid_dataset.__len__(), valid_loss/valid_dataset.__len__()))

Bootstrap: 01 | Epoch: 01 | train_acc: 0.718791 train_loss: 0.005784 | valid_acc: 0.830835 valid_loss: 0.004721
Bootstrap: 01 | Epoch: 02 | train_acc: 0.720941 train_loss: 0.005728 | valid_acc: 0.826853 valid_loss: 0.004862
Bootstrap: 01 | Epoch: 03 | train_acc: 0.725198 train_loss: 0.005708 | valid_acc: 0.838906 valid_loss: 0.004810
Bootstrap: 01 | Epoch: 04 | train_acc: 0.722718 train_loss: 0.005706 | valid_acc: 0.841578 valid_loss: 0.004704
Bootstrap: 01 | Epoch: 05 | train_acc: 0.724661 train_loss: 0.005700 | valid_acc: 0.841159 valid_loss: 0.004693
Bootstrap: 01 | Epoch: 06 | train_acc: 0.724909 train_loss: 0.005690 | valid_acc: 0.867781 valid_loss: 0.004555
Bootstrap: 01 | Epoch: 07 | train_acc: 0.726025 train_loss: 0.005690 | valid_acc: 0.850016 valid_loss: 0.004743
Bootstrap: 01 | Epoch: 08 | train_acc: 0.726397 train_loss: 0.005685 | valid_acc: 0.834242 valid_loss: 0.004782
Bootstrap: 01 | Epoch: 09 | train_acc: 0.726480 train_loss: 0.005674 | valid_acc: 0.830102 valid_loss: 0

Bootstrap: 03 | Epoch: 15 | train_acc: 0.741691 train_loss: 0.005552 | valid_acc: 0.843937 valid_loss: 0.004798
Bootstrap: 03 | Epoch: 16 | train_acc: 0.743428 train_loss: 0.005548 | valid_acc: 0.846557 valid_loss: 0.004744
Bootstrap: 03 | Epoch: 17 | train_acc: 0.742560 train_loss: 0.005550 | valid_acc: 0.836495 valid_loss: 0.004814
Bootstrap: 03 | Epoch: 18 | train_acc: 0.743180 train_loss: 0.005543 | valid_acc: 0.853055 valid_loss: 0.004726
Bootstrap: 03 | Epoch: 19 | train_acc: 0.744833 train_loss: 0.005543 | valid_acc: 0.848653 valid_loss: 0.004726
Bootstrap: 03 | Epoch: 20 | train_acc: 0.742973 train_loss: 0.005547 | valid_acc: 0.825438 valid_loss: 0.004883
Bootstrap: 03 | Epoch: 21 | train_acc: 0.742518 train_loss: 0.005555 | valid_acc: 0.844408 valid_loss: 0.004759
Bootstrap: 03 | Epoch: 22 | train_acc: 0.744337 train_loss: 0.005535 | valid_acc: 0.809559 valid_loss: 0.005237
Bootstrap: 03 | Epoch: 23 | train_acc: 0.744750 train_loss: 0.005529 | valid_acc: 0.844513 valid_loss: 0

Bootstrap: 05 | Epoch: 29 | train_acc: 0.743262 train_loss: 0.005519 | valid_acc: 0.856095 valid_loss: 0.004709
Bootstrap: 05 | Epoch: 30 | train_acc: 0.746486 train_loss: 0.005516 | valid_acc: 0.847029 valid_loss: 0.004795
Bootstrap: 06 | Epoch: 01 | train_acc: 0.740038 train_loss: 0.005566 | valid_acc: 0.851116 valid_loss: 0.004779
Bootstrap: 06 | Epoch: 02 | train_acc: 0.741981 train_loss: 0.005558 | valid_acc: 0.848391 valid_loss: 0.004754
Bootstrap: 06 | Epoch: 03 | train_acc: 0.742477 train_loss: 0.005546 | valid_acc: 0.861073 valid_loss: 0.004650
Bootstrap: 06 | Epoch: 04 | train_acc: 0.739955 train_loss: 0.005547 | valid_acc: 0.851378 valid_loss: 0.004720
Bootstrap: 06 | Epoch: 05 | train_acc: 0.743428 train_loss: 0.005535 | valid_acc: 0.851326 valid_loss: 0.004748
Bootstrap: 06 | Epoch: 06 | train_acc: 0.743014 train_loss: 0.005533 | valid_acc: 0.850278 valid_loss: 0.004761
Bootstrap: 06 | Epoch: 07 | train_acc: 0.743924 train_loss: 0.005531 | valid_acc: 0.855413 valid_loss: 0

Bootstrap: 08 | Epoch: 13 | train_acc: 0.745040 train_loss: 0.005519 | valid_acc: 0.848182 valid_loss: 0.004785
Bootstrap: 08 | Epoch: 14 | train_acc: 0.743221 train_loss: 0.005516 | valid_acc: 0.849754 valid_loss: 0.004765
Bootstrap: 08 | Epoch: 15 | train_acc: 0.745536 train_loss: 0.005510 | valid_acc: 0.841526 valid_loss: 0.004848
Bootstrap: 08 | Epoch: 16 | train_acc: 0.741981 train_loss: 0.005519 | valid_acc: 0.795462 valid_loss: 0.005278
Bootstrap: 08 | Epoch: 17 | train_acc: 0.742973 train_loss: 0.005511 | valid_acc: 0.850592 valid_loss: 0.004758
Bootstrap: 08 | Epoch: 18 | train_acc: 0.744461 train_loss: 0.005503 | valid_acc: 0.853999 valid_loss: 0.004761
Bootstrap: 08 | Epoch: 19 | train_acc: 0.744585 train_loss: 0.005516 | valid_acc: 0.854889 valid_loss: 0.004735
Bootstrap: 08 | Epoch: 20 | train_acc: 0.745494 train_loss: 0.005506 | valid_acc: 0.847029 valid_loss: 0.004774
Bootstrap: 08 | Epoch: 21 | train_acc: 0.746528 train_loss: 0.005501 | valid_acc: 0.827115 valid_loss: 0

Bootstrap: 10 | Epoch: 27 | train_acc: 0.749587 train_loss: 0.005478 | valid_acc: 0.844618 valid_loss: 0.004835
Bootstrap: 10 | Epoch: 28 | train_acc: 0.750248 train_loss: 0.005481 | valid_acc: 0.845404 valid_loss: 0.004770
Bootstrap: 10 | Epoch: 29 | train_acc: 0.749545 train_loss: 0.005474 | valid_acc: 0.841683 valid_loss: 0.004852
Bootstrap: 10 | Epoch: 30 | train_acc: 0.749917 train_loss: 0.005486 | valid_acc: 0.855990 valid_loss: 0.004734


In [180]:
X_test_tensor = torch.tensor(np.array(X_test.drop(['REPURCHASE'], axis=1))).float().cuda()

model_predict = classifier(X_test_tensor)
model_predict = np.argmax(model_predict.cpu().data.numpy(), axis=1)
#print("True repurchase number: %d | Predict repurchase number: %d" %(sum(y_test), sum(model_predict)))

total_acc = 0
partial_acc = 0
for i in range(model_predict.shape[0]):
    if model_predict[i] == y_test[i]:
        total_acc +=1
        if model_predict[i] == 1:
            partial_acc += 1
print("再購命中: %2.2f | 整體命中: %2.2f" %(partial_acc / sum(y_test), total_acc / y_test.shape[0]))

再購命中: 0.33 | 整體命中: 0.86


In [181]:
sum(model_predict)

2053

### Only Personal

In [182]:
df = pd.read_excel('personal.xlsx')
ans = np.array((df['REPURCHASE']))

In [183]:
X_train, X_test, y_train, y_test = train_test_split(df, ans, test_size=0.2, random_state=10)

In [184]:
train_majority = X_train[X_train.REPURCHASE==0] 
train_minority = X_train[X_train.REPURCHASE==1] 

In [185]:
#建立dataloader

valid_dataset = build_dataset(np.array(X_test.drop(['REPURCHASE'], axis=1)), y_test)
valid_dataloader = DataLoader(
    dataset = valid_dataset,
    batch_size = 100,
    shuffle = False,
    num_workers = 2
)

seed = [1,2,3,4,5,6,7,8,9,10]
dataloader_list = []
for i in seed:
    data_train, label_train = bootstrap(train_majority, train_minority, i, n_samples=16128)
    repurchase_dataset = build_dataset(data_train, label_train)
    repurchase_dataloader = DataLoader(
    dataset = repurchase_dataset,
    batch_size = 100,
    shuffle = True,
    num_workers = 2
    )
    dataloader_list.append(repurchase_dataloader)
    print('finish')


finish
finish
finish
finish
finish
finish
finish
finish
finish
finish


In [186]:
total_train_loss = []
total_valid_loss = []
number_of_bootstrap = 0

# model setting
dimension = data_train.shape[1]
classifier = model(dimension).cuda()

# parameters
lr = 0.01
num_epochs = 30
CrossEntropy = nn.CrossEntropyLoss()
optim = torch.optim.Adam(classifier.parameters(), lr = lr)

for repurchase_dataloader in dataloader_list:
    number_of_bootstrap += 1
    for epoch in range(num_epochs):

        train_acc = 0
        train_loss = 0
        valid_acc = 0
        valid_loss = 0
        train_counter = 0
        valid_counter = 0

        classifier.train()
        for i, data in enumerate(repurchase_dataloader):

            optim.zero_grad()
            pred = classifier(data[0].float().cuda())
            loss = CrossEntropy(pred, data[1].cuda())
            loss.backward()
            optim.step()

            total_train_loss.append(loss.item())
            train_acc += np.sum(np.argmax(pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += loss.item()


        #evaluation on valid set
        classifier.eval()
        with torch.no_grad():
            for i, data in enumerate(valid_dataloader):
                pred = classifier(data[0].float().cuda())
                loss = CrossEntropy(pred, data[1].cuda())

                total_valid_loss.append(loss.item())
                valid_acc += np.sum(np.argmax(pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                valid_loss += loss.item()

        print("Bootstrap: %02d | Epoch: %02d | train_acc: %6.6f train_loss: %6.6f | valid_acc: %6.6f valid_loss: %6.6f" % \
              (number_of_bootstrap, epoch+1, train_acc/repurchase_dataset.__len__(), train_loss/repurchase_dataset.__len__(), \
               valid_acc/valid_dataset.__len__(), valid_loss/valid_dataset.__len__()))

Bootstrap: 01 | Epoch: 01 | train_acc: 0.664765 train_loss: 0.006281 | valid_acc: 0.893460 valid_loss: 0.004840
Bootstrap: 01 | Epoch: 02 | train_acc: 0.666625 train_loss: 0.006255 | valid_acc: 0.893460 valid_loss: 0.004791
Bootstrap: 01 | Epoch: 03 | train_acc: 0.666667 train_loss: 0.006254 | valid_acc: 0.893460 valid_loss: 0.004766
Bootstrap: 01 | Epoch: 04 | train_acc: 0.666667 train_loss: 0.006252 | valid_acc: 0.893460 valid_loss: 0.004863
Bootstrap: 01 | Epoch: 05 | train_acc: 0.666667 train_loss: 0.006251 | valid_acc: 0.893460 valid_loss: 0.004802
Bootstrap: 01 | Epoch: 06 | train_acc: 0.666667 train_loss: 0.006248 | valid_acc: 0.893460 valid_loss: 0.004694
Bootstrap: 01 | Epoch: 07 | train_acc: 0.666667 train_loss: 0.006247 | valid_acc: 0.893460 valid_loss: 0.004751
Bootstrap: 01 | Epoch: 08 | train_acc: 0.666667 train_loss: 0.006245 | valid_acc: 0.893460 valid_loss: 0.004862
Bootstrap: 01 | Epoch: 09 | train_acc: 0.666667 train_loss: 0.006244 | valid_acc: 0.893460 valid_loss: 0

Bootstrap: 03 | Epoch: 15 | train_acc: 0.666667 train_loss: 0.006235 | valid_acc: 0.893460 valid_loss: 0.004900
Bootstrap: 03 | Epoch: 16 | train_acc: 0.666667 train_loss: 0.006236 | valid_acc: 0.893460 valid_loss: 0.004766
Bootstrap: 03 | Epoch: 17 | train_acc: 0.667163 train_loss: 0.006235 | valid_acc: 0.874594 valid_loss: 0.004910
Bootstrap: 03 | Epoch: 18 | train_acc: 0.667783 train_loss: 0.006231 | valid_acc: 0.893460 valid_loss: 0.004791
Bootstrap: 03 | Epoch: 19 | train_acc: 0.666708 train_loss: 0.006240 | valid_acc: 0.893460 valid_loss: 0.004765
Bootstrap: 03 | Epoch: 20 | train_acc: 0.666667 train_loss: 0.006234 | valid_acc: 0.893460 valid_loss: 0.004865
Bootstrap: 03 | Epoch: 21 | train_acc: 0.666791 train_loss: 0.006238 | valid_acc: 0.893460 valid_loss: 0.004817
Bootstrap: 03 | Epoch: 22 | train_acc: 0.666708 train_loss: 0.006236 | valid_acc: 0.893460 valid_loss: 0.004727
Bootstrap: 03 | Epoch: 23 | train_acc: 0.666667 train_loss: 0.006235 | valid_acc: 0.893460 valid_loss: 0

Bootstrap: 05 | Epoch: 29 | train_acc: 0.666088 train_loss: 0.006234 | valid_acc: 0.893460 valid_loss: 0.004758
Bootstrap: 05 | Epoch: 30 | train_acc: 0.666419 train_loss: 0.006230 | valid_acc: 0.893460 valid_loss: 0.004796
Bootstrap: 06 | Epoch: 01 | train_acc: 0.666873 train_loss: 0.006226 | valid_acc: 0.893460 valid_loss: 0.004791
Bootstrap: 06 | Epoch: 02 | train_acc: 0.666129 train_loss: 0.006227 | valid_acc: 0.893460 valid_loss: 0.004701
Bootstrap: 06 | Epoch: 03 | train_acc: 0.666336 train_loss: 0.006226 | valid_acc: 0.893460 valid_loss: 0.004744
Bootstrap: 06 | Epoch: 04 | train_acc: 0.666956 train_loss: 0.006229 | valid_acc: 0.893460 valid_loss: 0.004868
Bootstrap: 06 | Epoch: 05 | train_acc: 0.666253 train_loss: 0.006227 | valid_acc: 0.893460 valid_loss: 0.004802
Bootstrap: 06 | Epoch: 06 | train_acc: 0.666997 train_loss: 0.006224 | valid_acc: 0.893460 valid_loss: 0.004892
Bootstrap: 06 | Epoch: 07 | train_acc: 0.666708 train_loss: 0.006226 | valid_acc: 0.893460 valid_loss: 0

Bootstrap: 08 | Epoch: 13 | train_acc: 0.666667 train_loss: 0.006227 | valid_acc: 0.893460 valid_loss: 0.004806
Bootstrap: 08 | Epoch: 14 | train_acc: 0.667245 train_loss: 0.006227 | valid_acc: 0.893460 valid_loss: 0.004792
Bootstrap: 08 | Epoch: 15 | train_acc: 0.666915 train_loss: 0.006227 | valid_acc: 0.893460 valid_loss: 0.004858
Bootstrap: 08 | Epoch: 16 | train_acc: 0.666667 train_loss: 0.006228 | valid_acc: 0.893460 valid_loss: 0.004749
Bootstrap: 08 | Epoch: 17 | train_acc: 0.666295 train_loss: 0.006228 | valid_acc: 0.893460 valid_loss: 0.004704
Bootstrap: 08 | Epoch: 18 | train_acc: 0.666832 train_loss: 0.006228 | valid_acc: 0.880883 valid_loss: 0.004907
Bootstrap: 08 | Epoch: 19 | train_acc: 0.666253 train_loss: 0.006228 | valid_acc: 0.893460 valid_loss: 0.004847
Bootstrap: 08 | Epoch: 20 | train_acc: 0.666253 train_loss: 0.006227 | valid_acc: 0.893460 valid_loss: 0.004737
Bootstrap: 08 | Epoch: 21 | train_acc: 0.666253 train_loss: 0.006225 | valid_acc: 0.881249 valid_loss: 0

Bootstrap: 10 | Epoch: 27 | train_acc: 0.666749 train_loss: 0.006228 | valid_acc: 0.893460 valid_loss: 0.004764
Bootstrap: 10 | Epoch: 28 | train_acc: 0.666253 train_loss: 0.006229 | valid_acc: 0.883450 valid_loss: 0.004897
Bootstrap: 10 | Epoch: 29 | train_acc: 0.666584 train_loss: 0.006234 | valid_acc: 0.893460 valid_loss: 0.004798
Bootstrap: 10 | Epoch: 30 | train_acc: 0.666625 train_loss: 0.006230 | valid_acc: 0.893460 valid_loss: 0.004739


In [187]:
X_test_tensor = torch.tensor(np.array(X_test.drop(['REPURCHASE'], axis=1))).float().cuda()

model_predict = classifier(X_test_tensor)
model_predict = np.argmax(model_predict.cpu().data.numpy(), axis=1)
#print("True repurchase number: %d | Predict repurchase number: %d" %(sum(y_test), sum(model_predict)))

total_acc = 0
partial_acc = 0
for i in range(model_predict.shape[0]):
    if model_predict[i] == y_test[i]:
        total_acc +=1
        if model_predict[i] == 1:
            partial_acc += 1
print("再購命中: %2.2f | 整體命中: %2.2f" %(partial_acc / sum(y_test), total_acc / y_test.shape[0]))

再購命中: 0.00 | 整體命中: 0.89


In [188]:
sum(model_predict)

0

### only behavior

In [189]:
df = pd.read_excel('behavioral.xlsx')
ans = np.array((df['REPURCHASE']))

In [190]:
X_train, X_test, y_train, y_test = train_test_split(df, ans, test_size=0.2, random_state=10)

In [191]:
train_majority = X_train[X_train.REPURCHASE==0] 
train_minority = X_train[X_train.REPURCHASE==1] 

In [192]:
#建立dataloader

valid_dataset = build_dataset(np.array(X_test.drop(['REPURCHASE'], axis=1)), y_test)
valid_dataloader = DataLoader(
    dataset = valid_dataset,
    batch_size = 100,
    shuffle = False,
    num_workers = 2
)

seed = [1,2,3,4,5,6,7,8,9,10]
dataloader_list = []
for i in seed:
    data_train, label_train = bootstrap(train_majority, train_minority, i, n_samples=16128)
    repurchase_dataset = build_dataset(data_train, label_train)
    repurchase_dataloader = DataLoader(
    dataset = repurchase_dataset,
    batch_size = 100,
    shuffle = True,
    num_workers = 2
    )
    dataloader_list.append(repurchase_dataloader)
    print('finish')

finish
finish
finish
finish
finish
finish
finish
finish
finish
finish


In [193]:
total_train_loss = []
total_valid_loss = []
number_of_bootstrap = 0

# model setting
dimension = data_train.shape[1]
classifier = model(dimension).cuda()

# parameters
lr = 0.01
num_epochs = 30
CrossEntropy = nn.CrossEntropyLoss()
optim = torch.optim.Adam(classifier.parameters(), lr = lr)

for repurchase_dataloader in dataloader_list:
    number_of_bootstrap += 1
    for epoch in range(num_epochs):

        train_acc = 0
        train_loss = 0
        valid_acc = 0
        valid_loss = 0
        train_counter = 0
        valid_counter = 0

        classifier.train()
        for i, data in enumerate(repurchase_dataloader):

            optim.zero_grad()
            pred = classifier(data[0].float().cuda())
            loss = CrossEntropy(pred, data[1].cuda())
            loss.backward()
            optim.step()

            total_train_loss.append(loss.item())
            train_acc += np.sum(np.argmax(pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += loss.item()


        #evaluation on valid set
        classifier.eval()
        with torch.no_grad():
            for i, data in enumerate(valid_dataloader):
                pred = classifier(data[0].float().cuda())
                loss = CrossEntropy(pred, data[1].cuda())

                total_valid_loss.append(loss.item())
                valid_acc += np.sum(np.argmax(pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                valid_loss += loss.item()

        print("Bootstrap: %02d | Epoch: %02d | train_acc: %6.6f train_loss: %6.6f | valid_acc: %6.6f valid_loss: %6.6f" % \
              (number_of_bootstrap, epoch+1, train_acc/repurchase_dataset.__len__(), train_loss/repurchase_dataset.__len__(), \
               valid_acc/valid_dataset.__len__(), valid_loss/valid_dataset.__len__()))

Bootstrap: 01 | Epoch: 01 | train_acc: 0.713748 train_loss: 0.005835 | valid_acc: 0.857143 valid_loss: 0.004794
Bootstrap: 01 | Epoch: 02 | train_acc: 0.716725 train_loss: 0.005774 | valid_acc: 0.842627 valid_loss: 0.004835
Bootstrap: 01 | Epoch: 03 | train_acc: 0.715815 train_loss: 0.005772 | valid_acc: 0.867886 valid_loss: 0.004712
Bootstrap: 01 | Epoch: 04 | train_acc: 0.716394 train_loss: 0.005767 | valid_acc: 0.842365 valid_loss: 0.004788
Bootstrap: 01 | Epoch: 05 | train_acc: 0.717717 train_loss: 0.005763 | valid_acc: 0.866681 valid_loss: 0.004639
Bootstrap: 01 | Epoch: 06 | train_acc: 0.717593 train_loss: 0.005760 | valid_acc: 0.863851 valid_loss: 0.004815
Bootstrap: 01 | Epoch: 07 | train_acc: 0.720362 train_loss: 0.005756 | valid_acc: 0.851326 valid_loss: 0.004929
Bootstrap: 01 | Epoch: 08 | train_acc: 0.718171 train_loss: 0.005758 | valid_acc: 0.809664 valid_loss: 0.004914
Bootstrap: 01 | Epoch: 09 | train_acc: 0.719163 train_loss: 0.005750 | valid_acc: 0.840111 valid_loss: 0

Bootstrap: 03 | Epoch: 15 | train_acc: 0.723669 train_loss: 0.005708 | valid_acc: 0.861440 valid_loss: 0.004731
Bootstrap: 03 | Epoch: 16 | train_acc: 0.723793 train_loss: 0.005710 | valid_acc: 0.840583 valid_loss: 0.004819
Bootstrap: 03 | Epoch: 17 | train_acc: 0.722801 train_loss: 0.005711 | valid_acc: 0.852164 valid_loss: 0.004693
Bootstrap: 03 | Epoch: 18 | train_acc: 0.725198 train_loss: 0.005706 | valid_acc: 0.850330 valid_loss: 0.004737
Bootstrap: 03 | Epoch: 19 | train_acc: 0.725364 train_loss: 0.005701 | valid_acc: 0.841893 valid_loss: 0.004787
Bootstrap: 03 | Epoch: 20 | train_acc: 0.723380 train_loss: 0.005700 | valid_acc: 0.862750 valid_loss: 0.004754
Bootstrap: 03 | Epoch: 21 | train_acc: 0.724826 train_loss: 0.005709 | valid_acc: 0.850383 valid_loss: 0.004731
Bootstrap: 03 | Epoch: 22 | train_acc: 0.725322 train_loss: 0.005700 | valid_acc: 0.853999 valid_loss: 0.004773
Bootstrap: 03 | Epoch: 23 | train_acc: 0.725984 train_loss: 0.005698 | valid_acc: 0.843884 valid_loss: 0

Bootstrap: 05 | Epoch: 29 | train_acc: 0.727803 train_loss: 0.005683 | valid_acc: 0.853894 valid_loss: 0.004774
Bootstrap: 05 | Epoch: 30 | train_acc: 0.726852 train_loss: 0.005689 | valid_acc: 0.862593 valid_loss: 0.004689
Bootstrap: 06 | Epoch: 01 | train_acc: 0.727224 train_loss: 0.005688 | valid_acc: 0.850802 valid_loss: 0.004755
Bootstrap: 06 | Epoch: 02 | train_acc: 0.728051 train_loss: 0.005677 | valid_acc: 0.852217 valid_loss: 0.004774
Bootstrap: 06 | Epoch: 03 | train_acc: 0.728133 train_loss: 0.005683 | valid_acc: 0.854103 valid_loss: 0.004752
Bootstrap: 06 | Epoch: 04 | train_acc: 0.727596 train_loss: 0.005675 | valid_acc: 0.855466 valid_loss: 0.004745
Bootstrap: 06 | Epoch: 05 | train_acc: 0.728464 train_loss: 0.005670 | valid_acc: 0.852269 valid_loss: 0.004795
Bootstrap: 06 | Epoch: 06 | train_acc: 0.727348 train_loss: 0.005673 | valid_acc: 0.856252 valid_loss: 0.004745
Bootstrap: 06 | Epoch: 07 | train_acc: 0.729208 train_loss: 0.005669 | valid_acc: 0.851798 valid_loss: 0

Bootstrap: 08 | Epoch: 13 | train_acc: 0.726356 train_loss: 0.005676 | valid_acc: 0.860549 valid_loss: 0.004679
Bootstrap: 08 | Epoch: 14 | train_acc: 0.727224 train_loss: 0.005673 | valid_acc: 0.868410 valid_loss: 0.004691
Bootstrap: 08 | Epoch: 15 | train_acc: 0.726604 train_loss: 0.005684 | valid_acc: 0.840321 valid_loss: 0.004849
Bootstrap: 08 | Epoch: 16 | train_acc: 0.727513 train_loss: 0.005674 | valid_acc: 0.847867 valid_loss: 0.004835
Bootstrap: 08 | Epoch: 17 | train_acc: 0.728464 train_loss: 0.005678 | valid_acc: 0.848915 valid_loss: 0.004802
Bootstrap: 08 | Epoch: 18 | train_acc: 0.727844 train_loss: 0.005672 | valid_acc: 0.853317 valid_loss: 0.004753
Bootstrap: 08 | Epoch: 19 | train_acc: 0.728795 train_loss: 0.005676 | valid_acc: 0.836390 valid_loss: 0.004935
Bootstrap: 08 | Epoch: 20 | train_acc: 0.727348 train_loss: 0.005678 | valid_acc: 0.857772 valid_loss: 0.004763
Bootstrap: 08 | Epoch: 21 | train_acc: 0.726480 train_loss: 0.005676 | valid_acc: 0.839011 valid_loss: 0

Bootstrap: 10 | Epoch: 27 | train_acc: 0.731275 train_loss: 0.005662 | valid_acc: 0.849544 valid_loss: 0.004762
Bootstrap: 10 | Epoch: 28 | train_acc: 0.730283 train_loss: 0.005662 | valid_acc: 0.839535 valid_loss: 0.004823
Bootstrap: 10 | Epoch: 29 | train_acc: 0.730903 train_loss: 0.005661 | valid_acc: 0.848024 valid_loss: 0.004785
Bootstrap: 10 | Epoch: 30 | train_acc: 0.731440 train_loss: 0.005657 | valid_acc: 0.848915 valid_loss: 0.004755


In [194]:
X_test_tensor = torch.tensor(np.array(X_test.drop(['REPURCHASE'], axis=1))).float().cuda()

model_predict = classifier(X_test_tensor)
model_predict = np.argmax(model_predict.cpu().data.numpy(), axis=1)
#print("True repurchase number: %d | Predict repurchase number: %d" %(sum(y_test), sum(model_predict)))

total_acc = 0
partial_acc = 0
for i in range(model_predict.shape[0]):
    if model_predict[i] == y_test[i]:
        total_acc +=1
        if model_predict[i] == 1:
            partial_acc += 1
print("再購命中: %2.2f | 整體命中: %2.2f" %(partial_acc / sum(y_test), total_acc / y_test.shape[0]))

再購命中: 0.34 | 整體命中: 0.85


In [195]:
sum(model_predict)

2214