In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from lightgbm import LGBMClassifier
from tqdm import tqdm
import warnings
import gc

# Read Data

In [16]:
AS_UPDATES_ROOT_DIR = './data/asn_updates'

as_updates = {}
as_updates_dirs = sorted(glob(AS_UPDATES_ROOT_DIR+'/*'))
for dir_path in tqdm(as_updates_dirs):
    asn = dir_path.split('/')[-1]
    as_updates_files = sorted(glob(dir_path+'/*'))
    as_df_list = [pd.read_csv(file_path) for file_path in as_updates_files]
    as_df =  pd.concat(as_df_list).reset_index(drop=True)
    del as_df_list
    gc.collect()
    as_df = as_df.sort_values('time')
    as_updates[asn] = as_df    

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:15<00:00,  1.32it/s]


# Data Preparation Functions

In [17]:
def calculate_features(df):
    df['minute'] = df.time // 60
    df = df.groupby('minute')[['prefix']]\
                       .count()\
                       .rename(columns={'prefix': 'updates'})
    minutes = pd.Series(df.index, index=df.index)
    df['periods_before_update'] = minutes - minutes.shift(1)
    df = df.dropna()
    return df

In [18]:
def interpolate_index(df):
    index_interpolated = np.arange(int(df.index.min()), int(df.index.max()))
    df = df.reindex(index_interpolated).fillna({'updates': 0})
    return df

In [19]:
def make_sequences(df, feature_cols, target_col, lag):    
    X = []
    y = []
    for i in range(df.shape[0]-lag):
        X.append(df.iloc[i:i+lag][feature_cols].to_numpy().reshape(-1))
        y.append(df.iloc[i+lag][target_col])
        
    return np.array(X), np.array(y)

In [20]:
def insert_croston_zero_rows(X, y):
    zero_rows = {}
    
    for i in range(X.shape[0]):
        if i == 0:
            continue
        
        row_ = X[i].copy()
        n_zero_rows = int(row_[-1] - 1)

        if n_zero_rows > 0:
            row_[-2] = 0
            row_[-1] = 1
            zero_rows[i] = [row_]
        
        for _ in range(n_zero_rows-1):
            row_ = row_.copy()
            row_[-1] += 1
            zero_rows[i].append(row_)
    
    for i in sorted(zero_rows.keys())[-1::-1]:
        X_zeros = np.array(zero_rows[i])
        y_zeros = np.zeros(X_zeros.shape[0])
        
        X_before = X[:i]
        y_before = y[:i]
        
        X_after = X[i:]
        y_after = y[i:]
    
        X = np.concatenate([X_before, X_zeros, X_after])
        y = np.concatenate([y_before, y_zeros, y_after])

    return X, y

# Single AS

In [21]:
test_size = 24 * 60
test_size

1440

## AR

In [22]:
df = calculate_features(as_updates['25139'])
df_ar = interpolate_index(df)
X, y = make_sequences(df_ar, ['updates'], 'updates', 20)

In [23]:
X_train, y_train = X[:-test_size], (y[:-test_size] > 0).astype(int)
X_test, y_test = X[-test_size:], (y[-test_size:] > 0).astype(int)

In [24]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((18609, 20), (18609,), (1440, 20), (1440,))

In [25]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [39]:
X_train_reg = np.concat([np.ones((X_train.shape[0],1)), X_train], axis=1)
X_test_reg = np.concat([np.ones((X_test.shape[0],1)), X_test], axis=1)

In [40]:
model = LogisticRegression()
model.fit(X_train_reg, y_train)
y_pred = model.predict(X_test_reg)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 0.9680555555555556
Recall: 0.7704918032786885
Precision: 0.9724137931034482
F1: 0.8597560975609756


In [41]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 0.9784722222222222
Recall: 0.8961748633879781
Precision: 0.9318181818181818
F1: 0.9136490250696379


In [42]:
model = LGBMClassifier(objective='binary')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))



[LightGBM] [Info] Number of positive: 2687, number of negative: 15922
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004581 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1080
[LightGBM] [Info] Number of data points in the train set: 18609, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.144392 -> initscore=-1.779276
[LightGBM] [Info] Start training from score -1.779276
Accuracy: 0.98125
Recall: 0.8852459016393442
Precision: 0.9642857142857143
F1: 0.9230769230769231




## Croston-like AR

In [47]:
df = calculate_features(as_updates['25139'])
X, y = make_sequences(df, ['updates', 'periods_before_update'], 'updates', 20)
X, y = insert_croston_zero_rows(X, y)

In [48]:
X_train, y_train = X[:-test_size], (y[:-test_size] > 0).astype(int)
X_test, y_test = X[-test_size:], (y[-test_size:] > 0).astype(int)

In [49]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((18447, 40), (18447,), (1440, 40), (1440,))

In [50]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [51]:
X_train_reg = np.concat([np.ones((X_train.shape[0],1)), X_train], axis=1)
X_test_reg = np.concat([np.ones((X_test.shape[0],1)), X_test], axis=1)

In [54]:
model = LogisticRegression()
model.fit(X_train_reg, y_train)
y_pred = model.predict(X_test_reg)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 0.9916666666666667
Recall: 0.9347826086956522
Precision: 1.0
F1: 0.9662921348314607


In [53]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 1.0
Recall: 1.0
Precision: 1.0
F1: 1.0


In [55]:
model = LGBMClassifier(objective='binary')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))



[LightGBM] [Info] Number of positive: 2670, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008949 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1675
[LightGBM] [Info] Number of data points in the train set: 18447, number of used features: 40
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.144739 -> initscore=-1.776475
[LightGBM] [Info] Start training from score -1.776475
Accuracy: 1.0
Recall: 1.0
Precision: 1.0
F1: 1.0




# Multiple AS

## AR

### Data Preparation

In [56]:
X_trains = []
y_trains = []
X_tests = []
y_tests = []
X_zero_shots = []
y_zero_shots = []

for i, (asn, df) in enumerate(as_updates.items()):
    print(i, '| Processing AS:', asn)
    df = calculate_features(df)
    df = interpolate_index(df)
    X, y = make_sequences(df, ['updates'], 'updates', 20)

    if i % 10 == 0:
        X_zero_shots.append(X)
        y_zero_shots.append((y > 0).astype(int))
    
    X_train, y_train = X[:-test_size], (y[:-test_size] > 0).astype(int)
    X_test, y_test = X[-test_size:], (y[-test_size:] > 0).astype(int)
    X_trains.append(X_train)
    y_trains.append(y_train)
    X_tests.append(X_test)
    y_tests.append(y_test)

X_train = np.concatenate(X_trains)
y_train = np.concatenate(y_trains)
X_test = np.concatenate(X_tests)
y_test = np.concatenate(y_tests)
X_zero_shot = np.concatenate(X_zero_shots)
y_zero_shot = np.concatenate(y_zero_shots)

del X_trains
del y_trains
del X_tests
del y_tests
del X_zero_shots
del y_zero_shots
gc.collect()

0 | Processing AS: 11913
1 | Processing AS: 131292
2 | Processing AS: 133840
3 | Processing AS: 134645
4 | Processing AS: 135101
5 | Processing AS: 136844
6 | Processing AS: 136991
7 | Processing AS: 138146
8 | Processing AS: 138630
9 | Processing AS: 138645
10 | Processing AS: 139002
11 | Processing AS: 139054
12 | Processing AS: 139245
13 | Processing AS: 141139
14 | Processing AS: 142354
15 | Processing AS: 147182
16 | Processing AS: 149001
17 | Processing AS: 149282
18 | Processing AS: 151853
19 | Processing AS: 152438
20 | Processing AS: 18036
21 | Processing AS: 18109
22 | Processing AS: 19263
23 | Processing AS: 197915
24 | Processing AS: 198239
25 | Processing AS: 200179
26 | Processing AS: 200400
27 | Processing AS: 200536
28 | Processing AS: 200914
29 | Processing AS: 201547
30 | Processing AS: 2018
31 | Processing AS: 202140
32 | Processing AS: 202188
33 | Processing AS: 202627
34 | Processing AS: 204446
35 | Processing AS: 20783
36 | Processing AS: 208115
37 | Processing AS

432

In [58]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_zero_shot.shape, y_zero_shot.shape

((1814183, 20), (1814183,), (144000, 20), (144000,), (198765, 20), (198765,))

In [59]:
X_train_test, y_train_test = np.concatenate([X_train, X_test]), np.concatenate([y_train, y_test])

In [60]:
scaler1 = StandardScaler()
X_train = scaler1.fit_transform(X_train)
X_test = scaler1.transform(X_test)

In [61]:
scaler2 = StandardScaler()
X_train_test = scaler2.fit_transform(X_train_test)
X_zero_shot = scaler2.transform(X_zero_shot)

In [62]:
X_train_reg = np.concat([np.ones((X_train.shape[0],1)), X_train], axis=1)
X_test_reg = np.concat([np.ones((X_test.shape[0],1)), X_test], axis=1)

X_train_test_reg = np.concat([np.ones((X_train_test.shape[0],1)), X_train_test], axis=1)
X_zero_shot_reg = np.concat([np.ones((X_zero_shot.shape[0],1)), X_zero_shot], axis=1)

### Logistic Regression

In [63]:
model = LogisticRegression()
model.fit(X_train_reg, y_train)
y_pred = model.predict(X_test_reg)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 0.9500347222222222
Recall: 0.2384785005512679
Precision: 0.8824969400244798
F1: 0.3754882388681538


In [65]:
feature_names = ['bias']
for i in range(1, 21):
    feature_names.extend([f'updates_{i}'])
    
pd.DataFrame(model.coef_, columns=feature_names).T

Unnamed: 0,0
bias,-1.215784
updates_1,0.275087
updates_2,0.039667
updates_3,0.046616
updates_4,0.066289
updates_5,0.129304
updates_6,0.161521
updates_7,0.039958
updates_8,0.102751
updates_9,0.094201


In [66]:
model = LogisticRegression()
model.fit(X_train_test_reg, y_train_test)
y_pred = model.predict(X_zero_shot_reg)

print('Accuracy:', accuracy_score(y_zero_shot, y_pred))
print('Recall:', recall_score(y_zero_shot, y_pred))
print('Precision:', precision_score(y_zero_shot, y_pred))
print('F1:', f1_score(y_zero_shot, y_pred))

Accuracy: 0.9368450179860639
Recall: 0.134440199823685
Precision: 0.7035755478662054
F1: 0.2257447727132548


### Decision Tree

In [67]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 0.9573402777777777
Recall: 0.5315325248070563
Precision: 0.7179448994787788
F1: 0.6108330693696548


In [68]:
model = DecisionTreeClassifier()
model.fit(X_train_test, y_train_test)
y_pred = model.predict(X_zero_shot)

print('Accuracy:', accuracy_score(y_zero_shot, y_pred))
print('Recall:', recall_score(y_zero_shot, y_pred))
print('Precision:', precision_score(y_zero_shot, y_pred))
print('F1:', f1_score(y_zero_shot, y_pred))

Accuracy: 0.972253666389958
Recall: 0.6259917719659124
Precision: 0.9525992174399106
F1: 0.7555082679434322


### Gradient Boosted Decision Tree

In [69]:
model = LGBMClassifier(objective='binary')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))



[LightGBM] [Info] Number of positive: 134681, number of negative: 1679502
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.174308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 1814183, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.074238 -> initscore=-2.523344
[LightGBM] [Info] Start training from score -2.523344




Accuracy: 0.9643958333333333
Recall: 0.5119073869900772
Precision: 0.8689874602283362
F1: 0.6442794699229862


In [70]:
model = LGBMClassifier(objective='binary')
model.fit(X_train_test, y_train_test)
y_pred = model.predict(X_zero_shot)

print('Accuracy:', accuracy_score(y_zero_shot, y_pred))
print('Recall:', recall_score(y_zero_shot, y_pred))
print('Precision:', precision_score(y_zero_shot, y_pred))
print('F1:', f1_score(y_zero_shot, y_pred))



[LightGBM] [Info] Number of positive: 143751, number of negative: 1814432
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.249012 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 1958183, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.073410 -> initscore=-2.535445
[LightGBM] [Info] Start training from score -2.535445




Accuracy: 0.9562297185118104
Recall: 0.42535997649133117
Precision: 0.8683263347330534
F1: 0.5710059171597633


## Croston-like AR

### Data Preparation

In [71]:
X_trains = []
y_trains = []
X_tests = []
y_tests = []
X_zero_shots = []
y_zero_shots = []

for i, (asn, df) in enumerate(as_updates.items()):
    print(i, '| Processing AS:', asn)
    df = calculate_features(df)
    X, y = make_sequences(df, ['updates', 'periods_before_update'], 'updates', 20)
    X, y = insert_croston_zero_rows(X, y)

    if i % 10 == 0:
        X_zero_shots.append(X)
        y_zero_shots.append((y > 0).astype(int))
    
    X_train, y_train = X[:-test_size], (y[:-test_size] > 0).astype(int)
    X_test, y_test = X[-test_size:], (y[-test_size:] > 0).astype(int)
    X_trains.append(X_train)
    y_trains.append(y_train)
    X_tests.append(X_test)
    y_tests.append(y_test)

X_train = np.concatenate(X_trains)
y_train = np.concatenate(y_trains)
X_test = np.concatenate(X_tests)
y_test = np.concatenate(y_tests)
X_zero_shot = np.concatenate(X_zero_shots)
y_zero_shot = np.concatenate(y_zero_shots)

del X_trains
del y_trains
del X_tests
del y_tests
del X_zero_shots
del y_zero_shots
gc.collect()

0 | Processing AS: 11913
1 | Processing AS: 131292
2 | Processing AS: 133840
3 | Processing AS: 134645
4 | Processing AS: 135101
5 | Processing AS: 136844
6 | Processing AS: 136991
7 | Processing AS: 138146
8 | Processing AS: 138630
9 | Processing AS: 138645
10 | Processing AS: 139002
11 | Processing AS: 139054
12 | Processing AS: 139245
13 | Processing AS: 141139
14 | Processing AS: 142354
15 | Processing AS: 147182
16 | Processing AS: 149001
17 | Processing AS: 149282
18 | Processing AS: 151853
19 | Processing AS: 152438
20 | Processing AS: 18036
21 | Processing AS: 18109
22 | Processing AS: 19263
23 | Processing AS: 197915
24 | Processing AS: 198239
25 | Processing AS: 200179
26 | Processing AS: 200400
27 | Processing AS: 200536
28 | Processing AS: 200914
29 | Processing AS: 201547
30 | Processing AS: 2018
31 | Processing AS: 202140
32 | Processing AS: 202188
33 | Processing AS: 202627
34 | Processing AS: 204446
35 | Processing AS: 20783
36 | Processing AS: 208115
37 | Processing AS

373

In [77]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_zero_shot.shape, y_zero_shot.shape

((1708279, 40), (1708279,), (144000, 40), (144000,), (188800, 40), (188800,))

In [78]:
X_train_test, y_train_test = np.concatenate([X_train, X_test]), np.concatenate([y_train, y_test])

In [79]:
scaler1 = StandardScaler()
X_train = scaler1.fit_transform(X_train)
X_test = scaler1.transform(X_test)

In [80]:
scaler2 = StandardScaler()
X_train_test = scaler2.fit_transform(X_train_test)
X_zero_shot = scaler2.transform(X_zero_shot)

In [81]:
X_train_reg = np.concat([np.ones((X_train.shape[0],1)), X_train], axis=1)
X_test_reg = np.concat([np.ones((X_test.shape[0],1)), X_test], axis=1)

X_train_test_reg = np.concat([np.ones((X_train_test.shape[0],1)), X_train_test], axis=1)
X_zero_shot_reg = np.concat([np.ones((X_zero_shot.shape[0],1)), X_zero_shot], axis=1)

### Logistic Regression

In [82]:
model = LogisticRegression()
model.fit(X_train_reg, y_train)
y_pred = model.predict(X_test_reg)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 0.9986041666666666
Recall: 0.9781355379092788
Precision: 1.0
F1: 0.9889469342864998


In [83]:
feature_names = ['bias']
for i in range(1, 21):
    feature_names.extend([f'updates_{i}', f'periods_before_{i}'])
    
pd.DataFrame(model.coef_, columns=feature_names).T

Unnamed: 0,0
bias,5.893082
updates_1,-0.012762
periods_before_1,-0.055316
updates_2,0.008543
periods_before_2,-0.037418
updates_3,-0.0061
periods_before_3,-0.044471
updates_4,-0.011151
periods_before_4,-0.054799
updates_5,-0.010837


In [84]:
model = LogisticRegression()
model.fit(X_train_test_reg, y_train_test)
y_pred = model.predict(X_zero_shot_reg)

print('Accuracy:', accuracy_score(y_zero_shot, y_pred))
print('Recall:', recall_score(y_zero_shot, y_pred))
print('Precision:', precision_score(y_zero_shot, y_pred))
print('F1:', f1_score(y_zero_shot, y_pred))

Accuracy: 0.9995074152542373
Recall: 0.993081386698408
Precision: 1.0
F1: 0.9965286850061588


### Decision Tree

In [85]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))

Accuracy: 1.0
Recall: 1.0
Precision: 1.0
F1: 1.0


In [86]:
model = DecisionTreeClassifier()
model.fit(X_train_test, y_train_test)
y_pred = model.predict(X_zero_shot)

print('Accuracy:', accuracy_score(y_zero_shot, y_pred))
print('Recall:', recall_score(y_zero_shot, y_pred))
print('Precision:', precision_score(y_zero_shot, y_pred))
print('F1:', f1_score(y_zero_shot, y_pred))

Accuracy: 1.0
Recall: 1.0
Precision: 1.0
F1: 1.0


### GBDT

In [87]:
model = LGBMClassifier(objective='binary', n_estimators=5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1:', f1_score(y_test, y_pred))



[LightGBM] [Info] Number of positive: 132903, number of negative: 1575376
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.279978 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10082
[LightGBM] [Info] Number of data points in the train set: 1708279, number of used features: 40
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.077799 -> initscore=-2.472630
[LightGBM] [Info] Start training from score -2.472630
Accuracy: 1.0
Recall: 1.0
Precision: 1.0
F1: 1.0




In [88]:
model = LGBMClassifier(objective='binary', n_estimators=5)
model.fit(X_train_test, y_train_test)
y_pred = model.predict(X_zero_shot)

print('Accuracy:', accuracy_score(y_zero_shot, y_pred))
print('Recall:', recall_score(y_zero_shot, y_pred))
print('Precision:', precision_score(y_zero_shot, y_pred))
print('F1:', f1_score(y_zero_shot, y_pred))



[LightGBM] [Info] Number of positive: 142096, number of negative: 1710183
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.057038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10087
[LightGBM] [Info] Number of data points in the train set: 1852279, number of used features: 40
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.076714 -> initscore=-2.487853
[LightGBM] [Info] Start training from score -2.487853
Accuracy: 1.0
Recall: 1.0
Precision: 1.0
F1: 1.0


