In [10]:
import warnings
warnings.filterwarnings("ignore")
from itertools import combinations

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time 
%matplotlib inline

from sklearn.feature_selection import SelectFromModel

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

In [11]:
"""
    Step 0 : Get Data
    
"""

def GetData(data_path):
    print("Get Data Start")
    operation_train = pd.read_csv(data_path + "operation_train_new.csv")
    transaction_train = pd.read_csv(data_path + "transaction_train_new.csv")
    operation_test = pd.read_csv(data_path + "operation_round1_new.csv")
    transaction_test = pd.read_csv(data_path + "transaction_round1_new.csv")
    tag_train = pd.read_csv(data_path + "tag_train_new.csv")
    tag_test = pd.read_csv("../Data/submission_example.csv")
    print("Get Data Done")
    return operation_train, transaction_train, tag_train, operation_test, transaction_test, tag_test

In [12]:
"""
    Step 1 : Data Exploration
    
    Categorical Columns : countplot, barplot
    Numerical Columns : regplot
    
"""

def DataExploration(data, tag = None):
    print("Data Exploration Start")
    plt.figure()
    plt.title("UID_count")
    uids = np.sort(data["UID"].unique())
    count = data.groupby("UID")["time"].agg(["count"])["count"]
    sns.regplot(x = uids, y = count)
    plt.show()
#     for column in data.columns:
#         if column != "UID" and data[column].nunique() < 50:
#             plt.figure()
#             sns.countplot(x = column, data = data)
#             plt.show()

    if tag is not None:
        uid_tag_dict = dict(zip(tag["UID"], tag["Tag"]))
        data["tag"] = data["UID"].map(uid_tag_dict)
        for column in data.columns:
            if data[column].unique().shape[0] < 50 and column != "tag":
                plt.figure()
                sns.barplot(x = column, y = "tag", data = data.sample(100000))
                plt.show()
        
    print("Data Exploration Done")

In [13]:
""" 
    Step 2 : Data Preprocessing 

    Drop Duplicates
    Drop Outliers: After Data Exploration 
    
"""

def DataPreprocessing(data):
    print("Data Preprocessing Start")
    data = data[data["UID"] != 17520]
    data = data.drop_duplicates()
    print("Data Preprocessing Done")
    return data

In [14]:
"""
    Step 3 : Feature Engineering
    
    Feature Creation
    Feature Selection
    
"""

def FeatureCreation(data, tag):
    print("Feature Creation Start")
    data["hour"] = data["time"].apply(lambda x : int(x[:2]))
    features = pd.DataFrame(tag["UID"])
    
    # Feature Creation 1: column_nunique
    for column in data.columns:
        if column != "UID":
            print("Create " + column + "_nunique...")
            column_nunique = data.groupby("UID")[column].agg(["nunique"]).reset_index().rename(columns = {"nunique": column + "_nunique"})
            features = features.merge(column_nunique, on = "UID", how = "left")

    # Feature Creation 2: column_nunique_UID and column_count_UID
    data_copy = data.copy()
    for column in data.columns: 
        if column != "UID":
            print("Create " + column + "_nunique_UID and " + column + "_count_UID...")
#             column_nunique_count = data_copy.groupby(column)["UID"].agg(["nunique", "count"]).reset_index().rename(columns = {"nunique": column + "_nunique_UID", "count": column + "_count_UID"})
            column_nunique_count = data.groupby(column)["UID"].agg(["nunique", "count"]).reset_index().rename(columns = {"nunique": column + "_nunique_UID", "count": column + "_count_UID"})
            data_copy = data_copy.merge(column_nunique_count, on = column, how = "left")

    column_nunique = [col + "_nunique_UID" for col in data.columns if col != "UID"]
    column_count = [col + "_count_UID" for col in data.columns if col != "UID"]
    columns = column_nunique + column_count
    
    for column in columns:
        print("Create " + column + "...")
        column_nunique_count_UID = data_copy.groupby("UID")[column].agg(["max", "min", "mean"]).reset_index().rename(columns = {"max": column + "_max", "min": column + "_min", "mean": column + "_mean"})
        features = features.merge(column_nunique_count_UID, on = "UID", how = "left")
    
    # Feature Creation 3: day_frequency, hour_frequency
    day_frequency = data.groupby(["UID", "day"])["time"].agg(["count"]).reset_index().groupby("UID")["count"].agg(["max", "min", "mean"]).rename(columns = {"max": "day_frequency_max", "min": "day_frequency_min", "mean": "day_frequency_mean"})
    features = features.merge(day_frequency, on = "UID", how = "left")
    hour_frequency = data.groupby(["UID", "day", "hour"])["time"].agg(["count"]).reset_index().groupby("UID")["count"].agg(["max", "min", "mean"]).rename(columns = {"max": "hour_frequency_max", "min": "hour_frequency_min", "mean": "hour_frequency_mean"})
    features = features.merge(hour_frequency, on = "UID", how = "left")
    
    # Feature Creation 4: column1_column2_nunique_UID, column1_column2_count_UID
    for column1 in data.columns:
        for column2 in data.columns:
            if column1 != "UID" and column2 != "UID" and column1 != column2:
                print("Create " + column1 + "_" + column2 + "_nunique and " + column1 + "_" + column2 + "_count...")
                column1_column2_nunique_count = data.groupby(column1)[column2].agg(["nunique", "count"]).reset_index().rename(columns = {"nunique": column1 + "_" + column2 + "_nunique", "count": column1 + "_" + column2 + "_count"})
                data_copy = data_copy.merge(column1_column2_nunique_count)
    data_columns = [column for column in data.columns if column != "UID"]
    column1_column2_nunique = [column1 + "_" + column2 + "_nunique" for column1, column2 in list(combinations(data_columns, 2))]
    column1_column2_count = [column1 + "_" + column2 + "_count" for column1, column2 in list(combinations(data_columns, 2))]
    columns = column1_column2_nunique + column1_column2_count
    for column in columns:
        print("Create " + column1 + "_" + column2 + "_max/min/mean...")
        column1_column2_nunique_count_UID = data_copy.groupby("UID")[column].agg(["max", "min", "mean"]).reset_index().rename(columns = {"max": column + "_max", "min": column + "_min", "mean": column + "_mean"})
        features = features.merge(column1_column2_nunique_count_UID, on = "UID", how = "left")
        
    print("Feature Creation Done")
    
    return features

def FeatureSelection(x_train, y_train, x_test):
    print("Feature Selection Start")
    sfm = SelectFromModel(GradientBoostingClassifier())
    sfm.fit(x_train, y_train)
    support = sfm.get_support()
    indices = list(range(len(support)))
    selected_indices = [index for index in indices if support[index]]
    selected_features = x_train.columns.values[selected_indices]
    x_train = x_train.loc[:, selected_features]
    x_temp = pd.DataFrame(columns = selected_features)
    for feature in selected_features:
        if feature in x_test.columns:
            x_temp[feature] = x_test[feature]
    x_test = x_temp
    print("Feature Selection Done")
    return x_train, x_test

def FeatureEngineering(operation_train, transaction_train, tag_train, operation_test, transaction_test, tag_test):
    print("Feature Engineering Start")
    operation_train_features = FeatureCreation(operation_train, tag_train)
    transaction_train_features = FeatureCreation(transaction_train, tag_train)
    operation_test_features = FeatureCreation(operation_test, tag_test)
    transaction_test_features = FeatureCreation(transaction_test, tag_test)
    x_train = operation_train_features.merge(transaction_train_features, on = "UID", how = "left")
    y_train = tag_train["Tag"]
    x_test = operation_test_features.merge(transaction_test_features, on = "UID", how = "left")
    
    # Feature Creation 3: success_mean, os_has_105, os_has_107, channel_has_118, channel_has_119
    success_mean = operation_train.groupby("UID")["success"].agg(["mean"]).reset_index().rename(columns = {"mean": "success_mean"})
    x_train = x_train.merge(success_mean, on = "UID", how = "left")
    
    os_has_105 = operation_train.groupby("UID")["os"].agg(lambda x : 105 in x.values).reset_index().rename(columns = {"os": "os_has_105"})
    os_has_105["os_has_105"] = os_has_105["os_has_105"].apply(int)
    x_train = x_train.merge(os_has_105, on = "UID", how = "left")
    
    os_has_107 = operation_train.groupby("UID")["os"].agg(lambda x : 107 in x.values).reset_index().rename(columns = {"os": "os_has_107"})
    os_has_107["os_has_107"] = os_has_107["os_has_107"].apply(int)
    x_trian = x_train.merge(os_has_107, on = "UID", how = "left")
    
    channel_has_118 = transaction_train.groupby("UID")["channel"].agg(lambda x : 118 in x.values).reset_index().rename(columns = {"channel": "channel_has_118"})
    channel_has_118["channel_has_118"] = channel_has_118["channel_has_118"].apply(int)
    x_train = x_train.merge(channel_has_118, on = "UID", how = "left")
    
    channel_has_119 = transaction_train.groupby("UID")["channel"].agg(lambda x : 119 in x.values).reset_index().rename(columns = {"channel": "channel_has_119"})
    channel_has_119["channel_has_119"] = channel_has_119["channel_has_119"].apply(int)
    x_train = x_train.merge(channel_has_119, on = "UID", how = "left")
    
    success_mean = operation_test.groupby("UID")["success"].agg(["mean"]).reset_index().rename(columns = {"mean": "success_mean"})
    x_test = x_test.merge(success_mean, on = "UID", how = "left")
    
    os_has_105 = operation_test.groupby("UID")["os"].agg(lambda x : 105 in x.values).reset_index().rename(columns = {"os": "os_has_105"})
    os_has_105["os_has_105"] = os_has_105["os_has_105"].apply(int)
    x_test = x_test.merge(os_has_105, on = "UID", how = "left")
    
    os_has_107 = operation_test.groupby("UID")["os"].agg(lambda x : 107 in x.values).reset_index().rename(columns = {"os": "os_has_107"})
    os_has_107["os_has_107"] = os_has_107["os_has_107"].apply(int)
    x_trian = x_test.merge(os_has_107, on = "UID", how = "left")
    
    channel_has_118 = transaction_test.groupby("UID")["channel"].agg(lambda x : 118 in x.values).reset_index().rename(columns = {"channel": "channel_has_118"})
    channel_has_118["channel_has_118"] = channel_has_118["channel_has_118"].apply(int)
    x_test = x_test.merge(channel_has_118, on = "UID", how = "left")
    
    channel_has_119 = transaction_test.groupby("UID")["channel"].agg(lambda x : 119 in x.values).reset_index().rename(columns = {"channel": "channel_has_119"})
    channel_has_119["channel_has_119"] = channel_has_119["channel_has_119"].apply(int)
    x_test = x_test.merge(channel_has_119, on = "UID", how = "left")
    
    # Feature Selection
    x_train = x_train.fillna(-1)
    x_test = x_test.fillna(-1)
    x_train, x_test = FeatureSelection(x_train, y_train, x_test)
    print("Feature Engineering Done")
    return x_train, x_test

In [15]:
"""
    Step 4 : Model Optimization
    
    Models : lr, gbdt, xgb
    
"""

def ModelOptimization(model, params, x_train, y_train):
    print("Model Optimizatioin Start")
    x_train = x_train.fillna(-1)
    best_params = []
#     cv = GridSearchCV(estimator = model, param_grid = params, scoring = "roc_auc", cv = 3, n_jobs = -1)
#     cv.fit(x_train, y_train)
    for param in params:
        print("Optimize param", param, "...")
        cv = GridSearchCV(estimator = model, param_grid = param, scoring = "roc_auc", cv = 3, n_jobs = -1)
        cv.fit(x_train, y_train)
        best_params.append(cv.best_params_)
    print("Model Optimizatioin Done")
    return best_params

In [16]:
"""
    Step 5 : Model Evaluation
    
"""

def tpr_weight_function(y_true, y_predict):
    d = pd.DataFrame()
    d['prob'] = list(y_predict)
    d['y'] = list(y_true)
    d = d.sort_values(['prob'], ascending=[0])
    y = d.y
    PosAll = pd.Series(y).value_counts()[1]
    NegAll = pd.Series(y).value_counts()[0]
    pCumsum = d['y'].cumsum()
    nCumsum = np.arange(len(y)) - pCumsum + 1
    pCumsumPer = pCumsum / PosAll
    nCumsumPer = nCumsum / NegAll
    TR1 = pCumsumPer[abs(nCumsumPer-0.001).idxmin()]
    TR2 = pCumsumPer[abs(nCumsumPer-0.005).idxmin()]
    TR3 = pCumsumPer[abs(nCumsumPer-0.01).idxmin()]
    return 0.4 * TR1 + 0.3 * TR2 + 0.3 * TR3

def ModelEvaluation(model, x_train, y_train):
    print("Model Evaluation Start")
    
    # roc_auc
    print("Compute roc_auc_score...")
    roc_auc = np.mean(cross_val_score(estimator = model, 
                                         X = x_train, 
                                         y = y_train, 
                                         scoring = "roc_auc", 
                                         cv = 3, 
                                         n_jobs = -1, 
                                         verbose = 10))

    # tpr_weight
    print("Compute tpr_weight_score...")
    kf = KFold(n_splits = 3)
    model_scores = []
    for train_index, test_index in kf.split(x_train):
        print("Split data...")
        x_tr, x_te = x_train.values[train_index], x_train.values[test_index]
        y_tr, y_te = y_train.values[train_index], y_train.values[test_index]
        model.fit(x_tr, y_tr)
        y_pred = model.predict(x_te)
        score = tpr_weight_function(y_te, y_pred)
        model_scores.append(score)
    tpr_weight = np.mean(model_scores)
    print("Model Evaluation Done")
    return roc_auc, tpr_weight

def Record(x_train, model, roc_auc_score, tpr_weight_score):
    print("Record Start")
    with open("../Records/record.txt", "a") as f:
        f.write("features:\t")
        f.write("[" + ", ".join(x_train.columns.values) + "]")
        f.write("\n\n")
        f.write("model:\t")
        f.write(str(model))
        f.write("\n\n")
        f.write("roc_auc_score:\t")
        f.write(str(roc_auc_score))
        f.write("\n\n")
        f.write("tpr_weight_score:\t")
        f.write(str(tpr_weight_score))
        f.write("\n")
        f.write("#" * 100)
        f.write("\n")
    print("Record Done")

In [17]:
"""
    Step 6 : Fit and Predict
    
"""

def FitPredict(model, x_train, y_train, x_test):
    print("Fit Predict Start")
    model.fit(x_train, y_train)
    y_pred = model.predict_proba(x_test)[:, 1]
    print("Fit Predict Done")
    return y_pred

In [18]:
"""
    Step 7 : Ensembling

"""

def Ensembling(y_pred_list):
    print("Ensembling Start")
    ensembling_y_pred = np.array(y_pred_list).mean(axis = 0)
    print("Ensembling Done")
    return ensembling_y_pred

In [19]:
"""
    Step 8 : Submit
    
"""
def Submit(y_pred, tag_test):
    print("Submit Start")
    submission = pd.DataFrame({"UID": tag_test["UID"], "Tag": y_pred})
    print("Submit Done")
    return submission

In [20]:
# Get Data
t_get_data_start = time.time()
operation_train, transaction_train, tag_train, operation_test, transaction_test, tag_test = GetData("../Data/")
t_get_data_end = time.time()

# Data Exploration
t_data_exploration_start = time.time()
# DataExploration(operation_train, tag_train)
# DataExploration(transaction_train, tag_train)
# DataExploration(operation_test)
# DataExploration(transaction_test)
t_data_exploration_end = time.time()

# Data Preprocessing
t_data_preprocessing_start = time.time()
operation_train = DataPreprocessing(operation_train)
transaction_train = DataPreprocessing(transaction_train)
operation_test = DataPreprocessing(operation_test)
transaction_test = DataPreprocessing(transaction_test)
t_data_preprocessing_end = time.time()

# Feature Engineering
t_feature_engineering_start = time.time()
x_train, x_test = FeatureEngineering(operation_train, transaction_train, tag_train, operation_test, transaction_test, tag_test)
t_feature_engineering_end = time.time()

# Model Optimization
t_model_optimization_start = time.time()
# lr_params = [{"C": [0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10]}, 
#              {"class_weight": [None, "balanced"]}, 
#              {"max_iter": [100, 300, 500, 1000]}, 
#              {"penalty": ["l1", "l2"]},
#              {"solver": ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}]
# lr_best_params = ModelOptimization(LogisticRegression(), lr_params, x_train, tag_train["Tag"])

# gbdt_params = [{"n_estimators": [100, 300, 500, 1000]}, 
#                {"learning_rate": [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0]}, 
#                {"max_features": [None, "log2", "sqrt"]}, 
#                {"max_depth": [3, 5, 7, 9]}, 
#                {"min_samples_split": [2, 4, 6, 8]}, 
#                {"min_samples_leaf": [1, 3, 5, 7]}]
# gbdt_best_params = ModelOptimization(GradientBoostingClassifier(), gbdt_params, x_train, tag_train["Tag"])

# xgb_params = [{"learning_rate": [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1]}, 
#               {"n_estimators": [100, 300, 500, 1000]}, 
#               {"max_depth": range(3,10,2)}, 
#               {"min_child_weight": range(1,6,2)}, 
#               {"gamma": [i/10.0 for i in range(0,5)]}, 
#               {"subsample": [i/10.0 for i in range(6,10)]},
#               {"colsample_bytree": [i/10.0 for i in range(6,10)]}, 
#               {"reg_alpha": [1e-5, 1e-2, 0.1, 1, 100]}]
# xgb_best_params = ModelOptimization(XGBClassifier(), xgb_params, x_train, tag_train["Tag"])
t_model_optimization_end = time.time()

# Model Evaluation
t_model_evaluation_start = time.time()
# lr = LogisticRegression(C = 10, class_weight = "balanced", max_iter = 1000, penalty = "l2", solver = "newton-cg")
# lr_roc_auc_score, lr_tpr_weight_score = ModelEvaluation(lr, x_train, tag_train["Tag"])
# Record(x_train, lr, lr_roc_auc_score, lr_tpr_weight_score)

# gbdt = GradientBoostingClassifier(n_estimators = 1000, learning_rate = 0.3, max_features = None, 
#                                   max_depth = 3, min_samples_split = 4, min_samples_leaf = 7)
# gbdt_roc_auc_score, gbdt_tpr_weight_score = ModelEvaluation(gbdt, x_train, tag_train["Tag"])
# Record(x_train, gbdt, gbdt_roc_auc_score, gbdt_tpr_weight_score)

# xgb = XGBClassifier(n_estimators = 300, learning_rate = 0.3, max_depth = 5, min_child_weight = 1, 
#                     gamma = 0.1, subsample = 0.8, colsample_bytree = 0.8, reg_alpha = 1e-05)
# xgb_roc_auc_score, xgb_tpr_weight_score = ModelEvaluation(xgb, x_train, tag_train["Tag"])
# Record(x_train, xgb, xgb_roc_auc_score, xgb_tpr_weight_score)
t_model_evaluation_end = time.time()

# Fit and Predict
t_fit_predict_start = time.time()
lr_y_pred = FitPredict(lr, x_train, tag_train["Tag"], x_test)
gbdt_y_pred = FitPredict(gbdt, x_train, tag_train["Tag"], x_test)
xgb_y_pred = FitPredict(xgb, x_train, tag_train["Tag"], x_test)
t_fit_predict_end = time.time()
print("t_fit_predict: ", t_fit_predict_end - t_fit_predict_start)

# Ensembling
t_ensembling_start = time.time()

y_train = tag_train["Tag"]
kf = KFold(n_splits = 3)
ensembling_roc_auc_scores = []
ensembling_tpr_weight_scores = []
for train_index, test_index in kf.split(x_train):
    x_tr, x_te = x_train.values[train_index], x_train.values[test_index]
    y_tr, y_te = y_train.values[train_index], y_train.values[test_index]
    
    lr_y_pred_train = FitPredict(lr, x_tr, y_tr, x_te)
    gbdt_y_pred_train = FitPredict(gbdt, x_tr, y_tr, x_te)
    xgb_y_pred_train = FitPredict(xgb, x_tr, y_tr, x_te)
    y_pred_list_train = [lr_y_pred_train, gbdt_y_pred_train, xgb_y_pred_train]
    ensembling_y_pred_train = Ensembling(y_pred_list_train)
    ensembling_roc_auc_score = roc_auc_score(y_te, ensembling_y_pred_train)
    ensembling_tpr_weight_score = tpr_weight_function(y_te, ensembling_y_pred_train)
    ensembling_roc_auc_scores.append(ensembling_roc_auc_score)
    ensembling_tpr_weight_scores.append(ensembling_tpr_weight_score)

ensembling_roc_auc_score = np.mean(ensembling_roc_auc_scores)
ensembling_tpr_weight_score = np.mean(ensembling_tpr_weight_scores)

with open("../Records/record.txt", "a") as f:
    f.write("ensembling\n\n")
    f.write("roc_auc_score:\t")
    f.write(str(ensembling_roc_auc_score))
    f.write("\n\n")
    f.write("tpr_weight_score:\t")
    f.write(str(ensembling_tpr_weight_score))
    f.write("\n")
    f.write("#" * 100)
    f.write("\n")

y_pred_list = [lr_y_pred, gbdt_y_pred, xgb_y_pred]
ensembling_y_pred = Ensembling(y_pred_list)
t_ensembling_end = time.time()

# Submit
t_submit_start = time.time()
lr_submission = Submit(lr_y_pred, tag_test)
gbdt_submission = Submit(gbdt_y_pred, tag_test)
xgb_submission = Submit(xgb_y_pred, tag_test)
ensembling_submission = Submit(ensembling_y_pred, tag_test)
lr_submission.to_csv("../Submission/lr_submission.csv", index = False)
gbdt_submission.to_csv("../Submission/gbdt_submission.csv", index = False)
xgb_submission.to_csv("../Submission/xgb_submission.csv", index = False)
ensembling_submission.to_csv("../Submission/ensembling_submission.csv", index = False)
t_submit_end = time.time()

print("t_get_data: ", t_get_data_end - t_get_data_start)
print("t_data_preprocessing: ", t_data_preprocessing_end - t_data_preprocessing_start)
print("t_data_exploration: ", t_data_exploration_end - t_data_exploration_start)
print("t_feature_engineering: ", t_feature_engineering_end - t_feature_engineering_start)
print("t_model_optimization: ", t_model_optimization_end - t_model_optimization_start)
print("t_model_evaluation: ", t_model_evaluation_end - t_model_evaluation_start)
print("t_ensembling: ", t_ensembling_end - t_ensembling_start)
print("t_submit: ", t_submit_end - t_submit_start)
print("total_time: ", t_submit_end - t_get_data_start)

Get Data Start
Get Data Done
Data Preprocessing Start
Data Preprocessing Done
Data Preprocessing Start
Data Preprocessing Done
Data Preprocessing Start
Data Preprocessing Done
Data Preprocessing Start
Data Preprocessing Done
Feature Engineering Start
Feature Creation Start
Create day_nunique...
Create mode_nunique...
Create success_nunique...
Create time_nunique...
Create os_nunique...
Create version_nunique...
Create device1_nunique...
Create device2_nunique...
Create device_code1_nunique...
Create device_code2_nunique...
Create device_code3_nunique...
Create mac1_nunique...
Create mac2_nunique...
Create ip1_nunique...
Create ip2_nunique...
Create wifi_nunique...
Create geo_code_nunique...
Create ip1_sub_nunique...
Create ip2_sub_nunique...
Create hour_nunique...
Create day_nunique_UID and day_count_UID...
Create mode_nunique_UID and mode_count_UID...
Create success_nunique_UID and success_count_UID...
Create time_nunique_UID and time_count_UID...
Create os_nunique_UID and os_count_UI

Create version_device2_nunique and version_device2_count...
Create version_device_code1_nunique and version_device_code1_count...
Create version_device_code2_nunique and version_device_code2_count...
Create version_device_code3_nunique and version_device_code3_count...
Create version_mac1_nunique and version_mac1_count...
Create version_mac2_nunique and version_mac2_count...
Create version_ip1_nunique and version_ip1_count...
Create version_ip2_nunique and version_ip2_count...
Create version_wifi_nunique and version_wifi_count...
Create version_geo_code_nunique and version_geo_code_count...
Create version_ip1_sub_nunique and version_ip1_sub_count...
Create version_ip2_sub_nunique and version_ip2_sub_count...
Create version_hour_nunique and version_hour_count...
Create device1_day_nunique and device1_day_count...
Create device1_mode_nunique and device1_mode_count...
Create device1_success_nunique and device1_success_count...
Create device1_time_nunique and device1_time_count...
Create d

Create mac2_device_code1_nunique and mac2_device_code1_count...
Create mac2_device_code2_nunique and mac2_device_code2_count...
Create mac2_device_code3_nunique and mac2_device_code3_count...
Create mac2_mac1_nunique and mac2_mac1_count...
Create mac2_ip1_nunique and mac2_ip1_count...
Create mac2_ip2_nunique and mac2_ip2_count...
Create mac2_wifi_nunique and mac2_wifi_count...
Create mac2_geo_code_nunique and mac2_geo_code_count...
Create mac2_ip1_sub_nunique and mac2_ip1_sub_count...
Create mac2_ip2_sub_nunique and mac2_ip2_sub_count...
Create mac2_hour_nunique and mac2_hour_count...
Create ip1_day_nunique and ip1_day_count...
Create ip1_mode_nunique and ip1_mode_count...
Create ip1_success_nunique and ip1_success_count...
Create ip1_time_nunique and ip1_time_count...
Create ip1_os_nunique and ip1_os_count...
Create ip1_version_nunique and ip1_version_count...
Create ip1_device1_nunique and ip1_device1_count...
Create ip1_device2_nunique and ip1_device2_count...
Create ip1_device_code

Create device1_nunique...
Create device2_nunique...
Create mac1_nunique...
Create ip1_nunique...
Create bal_nunique...
Create amt_src2_nunique...
Create acc_id2_nunique...
Create acc_id3_nunique...
Create geo_code_nunique...
Create trans_type2_nunique...
Create market_code_nunique...
Create market_type_nunique...
Create ip1_sub_nunique...
Create hour_nunique...
Create channel_nunique_UID and channel_count_UID...
Create day_nunique_UID and day_count_UID...
Create time_nunique_UID and time_count_UID...
Create trans_amt_nunique_UID and trans_amt_count_UID...
Create amt_src1_nunique_UID and amt_src1_count_UID...
Create merchant_nunique_UID and merchant_count_UID...
Create code1_nunique_UID and code1_count_UID...
Create code2_nunique_UID and code2_count_UID...
Create trans_type1_nunique_UID and trans_type1_count_UID...
Create acc_id1_nunique_UID and acc_id1_count_UID...
Create device_code1_nunique_UID and device_code1_count_UID...
Create device_code2_nunique_UID and device_code2_count_UID..

Create trans_amt_trans_type1_nunique and trans_amt_trans_type1_count...
Create trans_amt_acc_id1_nunique and trans_amt_acc_id1_count...
Create trans_amt_device_code1_nunique and trans_amt_device_code1_count...
Create trans_amt_device_code2_nunique and trans_amt_device_code2_count...
Create trans_amt_device_code3_nunique and trans_amt_device_code3_count...
Create trans_amt_device1_nunique and trans_amt_device1_count...
Create trans_amt_device2_nunique and trans_amt_device2_count...
Create trans_amt_mac1_nunique and trans_amt_mac1_count...
Create trans_amt_ip1_nunique and trans_amt_ip1_count...
Create trans_amt_bal_nunique and trans_amt_bal_count...
Create trans_amt_amt_src2_nunique and trans_amt_amt_src2_count...
Create trans_amt_acc_id2_nunique and trans_amt_acc_id2_count...
Create trans_amt_acc_id3_nunique and trans_amt_acc_id3_count...
Create trans_amt_geo_code_nunique and trans_amt_geo_code_count...
Create trans_amt_trans_type2_nunique and trans_amt_trans_type2_count...
Create trans

Create trans_type1_device_code3_nunique and trans_type1_device_code3_count...
Create trans_type1_device1_nunique and trans_type1_device1_count...
Create trans_type1_device2_nunique and trans_type1_device2_count...
Create trans_type1_mac1_nunique and trans_type1_mac1_count...
Create trans_type1_ip1_nunique and trans_type1_ip1_count...
Create trans_type1_bal_nunique and trans_type1_bal_count...
Create trans_type1_amt_src2_nunique and trans_type1_amt_src2_count...
Create trans_type1_acc_id2_nunique and trans_type1_acc_id2_count...
Create trans_type1_acc_id3_nunique and trans_type1_acc_id3_count...
Create trans_type1_geo_code_nunique and trans_type1_geo_code_count...
Create trans_type1_trans_type2_nunique and trans_type1_trans_type2_count...
Create trans_type1_market_code_nunique and trans_type1_market_code_count...
Create trans_type1_market_type_nunique and trans_type1_market_type_count...
Create trans_type1_ip1_sub_nunique and trans_type1_ip1_sub_count...
Create trans_type1_hour_nunique 

Create device1_trans_amt_nunique and device1_trans_amt_count...
Create device1_amt_src1_nunique and device1_amt_src1_count...
Create device1_merchant_nunique and device1_merchant_count...
Create device1_code1_nunique and device1_code1_count...
Create device1_code2_nunique and device1_code2_count...
Create device1_trans_type1_nunique and device1_trans_type1_count...
Create device1_acc_id1_nunique and device1_acc_id1_count...
Create device1_device_code1_nunique and device1_device_code1_count...
Create device1_device_code2_nunique and device1_device_code2_count...
Create device1_device_code3_nunique and device1_device_code3_count...
Create device1_device2_nunique and device1_device2_count...
Create device1_mac1_nunique and device1_mac1_count...
Create device1_ip1_nunique and device1_ip1_count...
Create device1_bal_nunique and device1_bal_count...
Create device1_amt_src2_nunique and device1_amt_src2_count...
Create device1_acc_id2_nunique and device1_acc_id2_count...
Create device1_acc_id3

Create amt_src2_bal_nunique and amt_src2_bal_count...
Create amt_src2_acc_id2_nunique and amt_src2_acc_id2_count...
Create amt_src2_acc_id3_nunique and amt_src2_acc_id3_count...
Create amt_src2_geo_code_nunique and amt_src2_geo_code_count...
Create amt_src2_trans_type2_nunique and amt_src2_trans_type2_count...
Create amt_src2_market_code_nunique and amt_src2_market_code_count...
Create amt_src2_market_type_nunique and amt_src2_market_type_count...
Create amt_src2_ip1_sub_nunique and amt_src2_ip1_sub_count...
Create amt_src2_hour_nunique and amt_src2_hour_count...
Create acc_id2_channel_nunique and acc_id2_channel_count...
Create acc_id2_day_nunique and acc_id2_day_count...
Create acc_id2_time_nunique and acc_id2_time_count...
Create acc_id2_trans_amt_nunique and acc_id2_trans_amt_count...
Create acc_id2_amt_src1_nunique and acc_id2_amt_src1_count...
Create acc_id2_merchant_nunique and acc_id2_merchant_count...
Create acc_id2_code1_nunique and acc_id2_code1_count...
Create acc_id2_code2

Create market_code_mac1_nunique and market_code_mac1_count...
Create market_code_ip1_nunique and market_code_ip1_count...
Create market_code_bal_nunique and market_code_bal_count...
Create market_code_amt_src2_nunique and market_code_amt_src2_count...
Create market_code_acc_id2_nunique and market_code_acc_id2_count...
Create market_code_acc_id3_nunique and market_code_acc_id3_count...
Create market_code_geo_code_nunique and market_code_geo_code_count...
Create market_code_trans_type2_nunique and market_code_trans_type2_count...
Create market_code_market_type_nunique and market_code_market_type_count...
Create market_code_ip1_sub_nunique and market_code_ip1_sub_count...
Create market_code_hour_nunique and market_code_hour_count...
Create market_type_channel_nunique and market_type_channel_count...
Create market_type_day_nunique and market_type_day_count...
Create market_type_time_nunique and market_type_time_count...
Create market_type_trans_amt_nunique and market_type_trans_amt_count..

Create day_mode_nunique and day_mode_count...
Create day_success_nunique and day_success_count...
Create day_time_nunique and day_time_count...
Create day_os_nunique and day_os_count...
Create day_version_nunique and day_version_count...
Create day_device1_nunique and day_device1_count...
Create day_device2_nunique and day_device2_count...
Create day_device_code1_nunique and day_device_code1_count...
Create day_device_code2_nunique and day_device_code2_count...
Create day_device_code3_nunique and day_device_code3_count...
Create day_mac1_nunique and day_mac1_count...
Create day_mac2_nunique and day_mac2_count...
Create day_ip1_nunique and day_ip1_count...
Create day_ip2_nunique and day_ip2_count...
Create day_wifi_nunique and day_wifi_count...
Create day_geo_code_nunique and day_geo_code_count...
Create day_ip1_sub_nunique and day_ip1_sub_count...
Create day_ip2_sub_nunique and day_ip2_sub_count...
Create day_hour_nunique and day_hour_count...
Create mode_day_nunique and mode_day_count

Create device2_hour_nunique and device2_hour_count...
Create device_code1_day_nunique and device_code1_day_count...
Create device_code1_mode_nunique and device_code1_mode_count...
Create device_code1_success_nunique and device_code1_success_count...
Create device_code1_time_nunique and device_code1_time_count...
Create device_code1_os_nunique and device_code1_os_count...
Create device_code1_version_nunique and device_code1_version_count...
Create device_code1_device1_nunique and device_code1_device1_count...
Create device_code1_device2_nunique and device_code1_device2_count...
Create device_code1_device_code2_nunique and device_code1_device_code2_count...
Create device_code1_device_code3_nunique and device_code1_device_code3_count...
Create device_code1_mac1_nunique and device_code1_mac1_count...
Create device_code1_mac2_nunique and device_code1_mac2_count...
Create device_code1_ip1_nunique and device_code1_ip1_count...
Create device_code1_ip2_nunique and device_code1_ip2_count...
Crea

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.


KeyboardInterrupt



In [None]:
# try drop Feature Selection