In [1]:
import numpy as np
import pandas as pd
import datetime
from tqdm import tqdm
import time
from datetime import datetime, timedelta
import shutil
import warnings
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import statsmodels.tsa.api as tsa
import torch
from pandarallel import pandarallel
import seaborn as sns
from statsmodels.nonparametric.smoothers_lowess import lowess
from sklearn.preprocessing import StandardScaler



In [2]:
X_order = np.load('tmp/sorted_order.npy')
X_page = np.load('tmp/sorted_page_day60.npy')
X_metric = np.load('tmp/sorted_metric.npy')
y = np.load('tmp/sorted_label.npy')

In [None]:
k = 10
from sklearn.model_selection import StratifiedKFold
f1_list = []
auc_list = []
precision_list = []
acc_list = []
sfolder = StratifiedKFold(n_splits=k, shuffle=True)
fold_count = 1
baseline = 'RF'
data = 'all'
for train_index, test_index in sfolder.split(np.arange(y.shape[0]), y):
    print("--------------------- fold {} ----------------------".format(fold_count))
    fold_count += 1
    X_train_order = []
    X_test_order = []
    X_train_page = []
    X_test_page = []
    X_train_metric = []
    X_test_metric = []
    y_train = []
    y_test = []


    for id in train_index:
        X_train_order.append(X_order[id])
        X_train_page.append(X_page[id])
        X_train_metric.append(X_metric[id])
        y_train.append(y[id])

    for id in test_index:
        X_test_order.append(X_order[id])
        X_test_page.append(X_page[id])
        X_test_metric.append(X_metric[id])
        y_test.append(y[id])

    X_train_order = np.array(X_train_order)
    X_train_page = np.array(X_train_page)
    X_train_metric = np.array(X_train_metric)
    X_test_order = np.array(X_test_order)
    X_test_page = np.array(X_test_page)
    X_test_metric = np.array(X_test_metric)

    y_train = np.array(y_train)
    y_test = np.array(y_test)
    
    scale = False
    if scale:
        from sklearn.preprocessing import MinMaxScaler
        min_max_scaler = MinMaxScaler()
        X_train_order = min_max_scaler.fit_transform(X_train_order)
        X_test_order = min_max_scaler.transform(X_test_order)
    
    X_train_order_t = torch.tensor(X_train_order, dtype=torch.float32)
    X_train_page_t = torch.tensor(X_train_page, dtype=torch.float32)
    X_train_metric_t = torch.tensor(X_train_metric, dtype=torch.float32)

    X_test_order_t = torch.tensor(X_test_order, dtype=torch.float32)
    X_test_page_t = torch.tensor(X_test_page, dtype=torch.float32)
    X_test_metric_t = torch.tensor(X_test_metric, dtype=torch.float32)

    y_train_t = torch.tensor(y_train, dtype=torch.long)
    y_test_t = torch.tensor(y_test, dtype=torch.long)
    
    # --------------- RandomForest Test ---------------------------------------------------
    if baseline == 'RF':
        from sklearn.ensemble import RandomForestClassifier
        model = RandomForestClassifier(n_estimators=16)
    # --------------- XGBoost Test ---------------------------------------------------
    if baseline == 'XGB':
        import xgboost
        model = xgboost.XGBClassifier(n_estimators=6, max_depth=18, learning_rate=0.2, objective='binary:logistic', subsample=0.5, scale_pos_weight=2, base_score=0.5)
    # --------------- SVM Test ---------------------------------------------------
    if baseline == 'SVM':
        from sklearn.svm import SVC
        model = SVC(C=1.0, kernel='rbf')
    # --------------- Logistic Regression Test ---------------------------------------------------
    if baseline == 'Logistic Regression':
        from sklearn.linear_model import LogisticRegression
        model = LogisticRegression(penalty='l2')
    # --------------- LDA Test ---------------------------------------------------
    if baseline == 'LDA':
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
        model = LinearDiscriminantAnalysis()
    # --------------- MLP Test ---------------------------------------------------
    if baseline == 'MLP':
        from sklearn.neural_network import MLPClassifier
        model = MLPClassifier(hidden_layer_sizes=[32, 32], max_iter=30, batch_size=64)
    # --------------- LSTM Test ---------------------------------------------------
    if baseline == 'LSTM':
        from models.LSTM import LSTMExp
        model = LSTMExp()
        
    if data == 'order':
        model.fit(X_train_order, y_train)
        pre_test = model.predict(X_test_order)
    if data == 'page':
        model.fit(X_train_page.reshape(X_train_page.shape[0], -1), y_train)
        pre_test = model.predict(X_test_page.reshape(X_test_page.shape[0], -1))
    if data == 'metric':
        model.fit(X_train_metric.reshape(X_train_metric.shape[0], -1), y_train)
        pre_test = model.predict(X_test_metric.reshape(X_test_metric.shape[0], -1))
    if data == 'both':
        X_train_both = np.hstack((X_train_order, X_train_page.reshape(X_train_page.shape[0], -1)))
        X_test_both = np.hstack((X_test_order, X_test_page.reshape(X_test_page.shape[0], -1)))
        model.fit(X_train_both, y_train)
        pre_test = model.predict(X_test_both)
    if data == 'all':
        X_train_all = np.hstack((X_train_order, X_train_page.reshape(X_train_page.shape[0], -1), X_train_metric.reshape(X_train_metric.shape[0], -1)))
        X_test_all = np.hstack((X_test_order, X_test_page.reshape(X_test_page.shape[0], -1), X_test_metric.reshape(X_test_metric.shape[0], -1)))
        model.fit(X_train_all, y_train)
        pre_test = model.predict(X_test_all)
    print(classification_report(y_test, pre_test, digits=3))
    
    precision = float(classification_report(y_test, pre_test, digits=3)[127:132]) # precision
    acc = float(classification_report(y_test, pre_test, digits=3)[202:207]) # accuracy
    f1 = f1_score(y_test, pre_test)
    auc = roc_auc_score(y_test, pre_test)
    f1_list.append(f1)
    auc_list.append(auc)
    precision_list.append(precision)
    acc_list.append(acc)

In [None]:
f1_list

In [None]:
auc_list

In [None]:
precision_list

In [None]:
acc_list

In [None]:
np.array(precision_list).mean()

In [None]:
np.array(f1_list).mean()

In [None]:
np.array(acc_list).mean()

In [None]:
np.array(auc_list).mean()