# Merge by probability the best results

In [1]:
import numpy  as np
import pandas as pd

In [2]:
df_trn = pd.read_csv('../data/train-agg-cut.csv')
df_tst = pd.read_csv('../data/test-agg-cut.csv')
df_sbm = pd.read_csv('../data/sample_submission.csv')

In [3]:
X_trn = df_trn.drop(columns=['FraudResult'], axis=1)
y_trn = df_trn['FraudResult']

X_tst = df_tst

In [4]:
import os
from collections import Counter
from sklearn.ensemble import BaggingClassifier

In [5]:
def prediction(X_trn, y_trn, X_tst, name):
    predict = BaggingClassifier(n_estimators=1000, n_jobs=-1, random_state=24).fit(X_trn, y_trn).predict(X_tst)
    print('Results:', Counter(predict))
    df_sbm['FraudResult'] = predict

    # определяем был ли ранее точно такой же результат
    current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

    # просматриваем все файлы в папке submitted
    is_exist = False
    files = os.listdir('../submitted')
    files.sort()
    for f in files:
        f_csv = pd.read_csv('../submitted/' + f)
        if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
            print('It is the same as in: ' + f)
            is_exist = True
    if not is_exist:
        print('New result! Submit it!')
        df_sbm.to_csv('../submitted/AlBo0814_' + name + 'Bagging.csv', encoding='utf-8', index=False)

9 best features, отобранных с помощью RFE и ETC

In [6]:
columns9 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountNegative_week_count',
]

prediction(X_trn[columns9], y_trn, X_tst[columns9], 'RFE_ETC_9')

Results: Counter({0: 44938, 1: 81})
It is the same as in: AlBo0808_RFE_ETC_9.csv


In [7]:
predict_proba_RFE_ETC_9 = BaggingClassifier(n_estimators=1000, n_jobs=-1, random_state=24).fit(X_trn[columns9], y_trn).predict_proba(X_tst[columns9])

In [27]:
df_sbm_RFE_ETC_9 = df_sbm
df_sbm_RFE_ETC_9['RFE_ETC_9'] = predict_proba_RFE_ETC_9[:, 1]

In [29]:
df_sbm_RFE_ETC_9[320:330]

Unnamed: 0,TransactionId,FraudResult,RFE_ETC_9
320,TransactionId_100428,0,0.0
321,TransactionId_5369,0,0.0
322,TransactionId_90439,0,0.0
323,TransactionId_99894,0,0.0
324,TransactionId_59114,1,1.0
325,TransactionId_36185,1,1.0
326,TransactionId_70295,0,0.0
327,TransactionId_98831,0,0.0
328,TransactionId_18001,1,1.0
329,TransactionId_140530,0,0.0


## 5 'super features' from top20 $\chi^2$

In [31]:
columns5 = [
    'AmountPositive',                                                           #01
#     'Value',                                                                    #02
    'account_product_transactions__AmountPositive_global_sum',                  #03
    'account_provider_transactions__AmountPositive_global_avg',                 #04
#     'account_product_category_transactions__AmountPositive_global_sum',         #05
#     'account_provider_transactions__Value_global_avg',                          #06
#     'account_provider_transactions__AmountPositive_global_sum',                 #07
#     'account_channel_transactions__AmountPositive_global_sum',                  #08
#     'account_product_transactions__AmountPositive_global_avg',                  #09
#     'account_transactions__AmountPositive_global_sum',                          #10
#     'account_pricing_strategy_transactions__AmountPositive_global_sum',         #11
#     'account_product_category_transactions__AmountPositive_global_avg',         #12
    'account_product_transactions__AmountPositive_week_sum',                    #13
#     'account_provider_transactions__AmountPositive_week_avg',                   #14
#     'account_pricing_strategy_transactions__AmountPositive_global_avg',         #15
#     'account_product_transactions__Value_global_avg',                           #16
#     'account_product_category_transactions__Value_global_avg',                  #17
    'account_channel_transactions__AmountPositive_global_avg',                  #18
#     'account_transactions__AmountPositive_global_avg',                          #19
#     'account_provider_transactions__AmountPositive_week_sum'                    #20
]

prediction(X_trn[columns5], y_trn, X_tst[columns5], 'top20chi2_5')

Results: Counter({0: 44946, 1: 73})
It is the same as in: AlBo0807_Manually_Feature_Elimination_ver2.csv


### Проверим совпадают ли прогнозы `top20chi2_5` и `top18chi2` (результаты совпадают)

In [36]:
def compare_submitted_files(fname_A, fname_B):
    df_A = pd.read_csv('../submitted/' + fname_A)
    df_B = pd.read_csv('../submitted/' + fname_B)
    
    set_A = set(df_A['TransactionId'][df_A['FraudResult']==1].tolist())
    set_B = set(df_B['TransactionId'][df_B['FraudResult']==1].tolist())

    print('Number of frauds in set A =', len(set_A))
    print('Number of frauds in set B =', len(set_B))
    
    print('Symmetric Difference of sets A and B (set_A ^ set_B) = ', np.sort(list(set_A  ^ set_B)))
    
    print('Difference of sets A and B (set_A - set_B) =', np.sort(list(set_A - set_B)))
    print('Difference of sets A and B (set_B - set_A) =', np.sort(list(set_B - set_A)))

In [58]:
compare_submitted_files('AlBo0807_Manually_Feature_Elimination_ver2.csv', 'AlBo0726_top18chi2_BaggingClassifier.csv')

Number of frauds in set A = 73
Number of frauds in set B = 72
Symmetric Difference of sets A and B (set_A ^ set_B) =  ['TransactionId_88604']
Difference of sets A and B (set_A - set_B) = ['TransactionId_88604']
Difference of sets A and B (set_B - set_A) = []


### Чем отличаются предикты `top20chi2_5`  и `top9RFE_ETC`

In [59]:
compare_submitted_files('AlBo0807_Manually_Feature_Elimination_ver2.csv', 'AlBo0808_RFE_ETC_9.csv')

Number of frauds in set A = 73
Number of frauds in set B = 81
Symmetric Difference of sets A and B (set_A ^ set_B) =  ['TransactionId_11832' 'TransactionId_22203' 'TransactionId_24572'
 'TransactionId_45907' 'TransactionId_47953' 'TransactionId_54314'
 'TransactionId_62319' 'TransactionId_6746' 'TransactionId_88480'
 'TransactionId_88604']
Difference of sets A and B (set_A - set_B) = ['TransactionId_88604']
Difference of sets A and B (set_B - set_A) = ['TransactionId_11832' 'TransactionId_22203' 'TransactionId_24572'
 'TransactionId_45907' 'TransactionId_47953' 'TransactionId_54314'
 'TransactionId_62319' 'TransactionId_6746' 'TransactionId_88480']


In [38]:
predict_proba_top20chi2_5 = BaggingClassifier(n_estimators=1000, n_jobs=-1, random_state=24).fit(X_trn[columns5], y_trn).predict_proba(X_tst[columns5])

In [40]:
df_sbm = pd.read_csv('../data/sample_submission.csv')

df_sbm['top9RFE_ETC'] = predict_proba_RFE_ETC_9  [:, 1]
df_sbm['top20chi2_5'] = predict_proba_top20chi2_5[:, 1]

df_sbm[320:330]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
320,TransactionId_100428,,0.0,0.0
321,TransactionId_5369,,0.0,0.000133
322,TransactionId_90439,,0.0,0.0
323,TransactionId_99894,,0.0,0.0
324,TransactionId_59114,,1.0,1.0
325,TransactionId_36185,,1.0,1.0
326,TransactionId_70295,,0.0,0.0
327,TransactionId_98831,,0.0,0.0
328,TransactionId_18001,,1.0,1.0
329,TransactionId_140530,,0.0,0.0


In [46]:
df_sbm[(df_sbm['top9RFE_ETC'] > 0.5) | (df_sbm['top20chi2_5'] > 0.5)][:50]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
324,TransactionId_59114,,1.0,1.0
325,TransactionId_36185,,1.0,1.0
328,TransactionId_18001,,1.0,1.0
330,TransactionId_38835,,1.0,1.0
460,TransactionId_116948,,1.0,1.0
462,TransactionId_129063,,1.0,1.0
466,TransactionId_56143,,1.0,1.0
1373,TransactionId_20058,,1.0,0.998917
2323,TransactionId_64322,,1.0,1.0
2728,TransactionId_50612,,0.995,1.0


In [48]:
df_sbm[(df_sbm['top9RFE_ETC'] > 0.9) & (df_sbm['top20chi2_5'] > 0.9)]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
324,TransactionId_59114,,1.000,1.000000
325,TransactionId_36185,,1.000,1.000000
328,TransactionId_18001,,1.000,1.000000
330,TransactionId_38835,,1.000,1.000000
460,TransactionId_116948,,1.000,1.000000
462,TransactionId_129063,,1.000,1.000000
466,TransactionId_56143,,1.000,1.000000
1373,TransactionId_20058,,1.000,0.998917
2323,TransactionId_64322,,1.000,1.000000
2728,TransactionId_50612,,0.995,1.000000


### Merger via `AND`

In [51]:
df_submit = pd.read_csv('../data/sample_submission.csv')
df_sbm    = pd.read_csv('../data/sample_submission.csv')

df_submit['top9RFE_ETC'] = predict_proba_RFE_ETC_9  [:, 1]
df_submit['top20chi2_5'] = predict_proba_top20chi2_5[:, 1]

for threshold in [50, 60, 65, 70, 75, 80, 85, 90, 95]:
    predict = np.where((df_submit['top9RFE_ETC'] > threshold/100) & (df_submit['top20chi2_5'] > threshold/100), 1, 0)
    print('Threshold = 0.%d Number of predicted frauds = %d' % (threshold, predict.sum()))
    df_sbm['FraudResult'] = predict
    
    # определяем был ли ранее точно такой же результат
    current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

    # просматриваем все файлы в папке submitted
    is_exist = False
    files = os.listdir('../submitted')
    files.sort()
    for f in files:
        f_csv = pd.read_csv('../submitted/' + f)
        if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
            print('It is the same as in: ' + f)
            is_exist = True
    if not is_exist:
        print('New result! Submit it!')
#         df_sbm.to_csv('../submitted/AlBo0814_Merge_AND_'+str(threshold)+'.csv', encoding='utf-8', index=False)
        df_sbm.to_csv('../submitted/AlBo0814_Merge_thrsh'+str(threshold)+'.csv', encoding='utf-8', index=False)

Threshold = 0.50 Number of predicted frauds = 72
It is the same as in: AlBo0726_top18chi2_BaggingClassifier.csv
It is the same as in: rf-custom-features-v18.csv
Threshold = 0.60 Number of predicted frauds = 71
New result! Submit it!
Threshold = 0.65 Number of predicted frauds = 70
New result! Submit it!
Threshold = 0.70 Number of predicted frauds = 69
New result! Submit it!
Threshold = 0.75 Number of predicted frauds = 68
New result! Submit it!
Threshold = 0.80 Number of predicted frauds = 67
New result! Submit it!
Threshold = 0.85 Number of predicted frauds = 67
It is the same as in: AlBo0814_Merge_thrsh80.csv
Threshold = 0.90 Number of predicted frauds = 67
It is the same as in: AlBo0814_Merge_thrsh80.csv
Threshold = 0.95 Number of predicted frauds = 60
New result! Submit it!


**Merger `AND` Results:**

`Threshold  Frauds  F1-score`

`0.50       72      0.827586206896552`

`0.60       71      0.807017543859649`

`0.65       70      0.807017543859649`

`0.70       69      0.785714285714286`

`0.75       68      0.763636363636364`

`0.80       67      0.763636363636364`

`0.85       67      0.763636363636364`

`0.90       67      0.763636363636364`

`0.95       60      0.692307692307692`

### Merger via ` OR`

In [53]:
df_submit = pd.read_csv('../data/sample_submission.csv')
df_sbm    = pd.read_csv('../data/sample_submission.csv')

df_submit['top9RFE_ETC'] = predict_proba_RFE_ETC_9  [:, 1]
df_submit['top20chi2_5'] = predict_proba_top20chi2_5[:, 1]

for threshold in [50, 55, 60, 65, 70, 75, 80, 85, 90, 95]:
    predict = np.where((df_submit['top9RFE_ETC'] > threshold/100) | (df_submit['top20chi2_5'] > threshold/100), 1, 0)
    print('Threshold = 0.%d Number of predicted frauds = %d' % (threshold, predict.sum()))
    df_sbm['FraudResult'] = predict
    
    # определяем был ли ранее точно такой же результат
    current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

    # просматриваем все файлы в папке submitted
    is_exist = False
    files = os.listdir('../submitted')
    files.sort()
    for f in files:
        f_csv = pd.read_csv('../submitted/' + f)
        if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
            print('It is the same as in: ' + f)
            is_exist = True
    if not is_exist:
        print('New result! Submit it!')
#         df_sbm.to_csv('../submitted/AlBo0815_Merge_OR_'+str(threshold)+'.csv', encoding='utf-8', index=False)
        df_sbm.to_csv('../submitted/AlBo0815_Merge_thrsh'+str(threshold)+'.csv', encoding='utf-8', index=False)

Threshold = 0.50 Number of predicted frauds = 82
New result! Submit it!
Threshold = 0.55 Number of predicted frauds = 82
It is the same as in: AlBo0815_Merge_thrsh50.csv
Threshold = 0.60 Number of predicted frauds = 74
It is the same as in: AlBo0726_top8chi2_BaggingClassifier.csv
Threshold = 0.65 Number of predicted frauds = 73
New result! Submit it!
Threshold = 0.70 Number of predicted frauds = 72
It is the same as in: AlBo0726_top18chi2_BaggingClassifier.csv
It is the same as in: rf-custom-features-v18.csv
Threshold = 0.75 Number of predicted frauds = 72
It is the same as in: AlBo0726_top18chi2_BaggingClassifier.csv
It is the same as in: rf-custom-features-v18.csv
Threshold = 0.80 Number of predicted frauds = 71
New result! Submit it!
Threshold = 0.85 Number of predicted frauds = 70
New result! Submit it!
Threshold = 0.90 Number of predicted frauds = 69
New result! Submit it!
Threshold = 0.95 Number of predicted frauds = 67
New result! Submit it!


#### **Merger `OR` Results:**

`Threshold  Frauds  F1-score`

`0.50       82      0.793650793650794`

`0.55       82      0.793650793650794` (as 0.50)

`0.60       74      0.714285714285714`

`0.65       73      0.813559322033898`

`0.70       72      0.827586206896552` (as top18chi2)

`0.75       72      0.827586206896552` (as top18chi2)

`0.80       71      0.807017543859649`

`0.85       70      0.785714285714286`

`0.90       69      0.763636363636364`

`0.95       67      0.`

### Difference of `top20chi2_5`  и `top9RFE_ETC`
 
`top9RFE_ETC` - `top20chi2_5`:

- `TransactionId_11832`
- `TransactionId_22203`
- `TransactionId_24572`
- `TransactionId_45907`
- `TransactionId_47953`
- `TransactionId_54314`
- `TransactionId_62319`
- `TransactionId_6746`
- `TransactionId_88480`

### Каковы вероятности для тех предиктов, которые дали прирост F1-score? 

- `TransactionId_62319`
- `TransactionId_6746`
- `TransactionId_88480`

In [55]:
df_submit[
    (df_submit['TransactionId'] == 'TransactionId_62319') | 
    (df_submit['TransactionId'] == 'TransactionId_6746' ) |
    (df_submit['TransactionId'] == 'TransactionId_88480')
]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
5271,TransactionId_88480,,0.57,0.224
5367,TransactionId_62319,,0.569,0.101
11023,TransactionId_6746,,0.586,0.072


Каковы вероятности для тех предиктов, которые НЕ дали прирост F1-score
- `TransactionId_11832`,
- `TransactionId_22203`,
- `TransactionId_24572`,
- `TransactionId_45907`,
- `TransactionId_47953`,
- `TransactionId_54314`

In [57]:
df_submit[
    (df_submit['TransactionId'] == 'TransactionId_11832') |
    (df_submit['TransactionId'] == 'TransactionId_22203') |
    (df_submit['TransactionId'] == 'TransactionId_24572') |
    (df_submit['TransactionId'] == 'TransactionId_45907') |
    (df_submit['TransactionId'] == 'TransactionId_47953') |
    (df_submit['TransactionId'] == 'TransactionId_54314')
]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
5273,TransactionId_54314,,0.594,0.164
5276,TransactionId_24572,,0.577,0.109
19763,TransactionId_11832,,0.581,0.270506
19897,TransactionId_22203,,0.581,0.270506
22076,TransactionId_45907,,0.688,0.411
30847,TransactionId_47953,,0.614,0.000133


In [60]:
df_8 = df_submit[
    (df_submit['TransactionId'] == 'TransactionId_11832') |
    (df_submit['TransactionId'] == 'TransactionId_22203') |
    (df_submit['TransactionId'] == 'TransactionId_24572') |
    (df_submit['TransactionId'] == 'TransactionId_45907') |
    (df_submit['TransactionId'] == 'TransactionId_47953') |
    (df_submit['TransactionId'] == 'TransactionId_54314') |
    (df_submit['TransactionId'] == 'TransactionId_62319') | 
    (df_submit['TransactionId'] == 'TransactionId_6746' ) |
    (df_submit['TransactionId'] == 'TransactionId_88480')
]

In [63]:
df_8

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
5271,TransactionId_88480,,0.57,0.224
5273,TransactionId_54314,,0.594,0.164
5276,TransactionId_24572,,0.577,0.109
5367,TransactionId_62319,,0.569,0.101
11023,TransactionId_6746,,0.586,0.072
19763,TransactionId_11832,,0.581,0.270506
19897,TransactionId_22203,,0.581,0.270506
22076,TransactionId_45907,,0.688,0.411
30847,TransactionId_47953,,0.614,0.000133


### Пусть сумма вероятностей превышает 1.0

In [73]:
df_8[df_8['top9RFE_ETC'] + df_8['top20chi2_5'] > 1.0]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
22076,TransactionId_45907,,0.688,0.411


In [81]:
# определяем был ли ранее точно такой же результат
def is_predict_exist(df):
    current_subm_set = set(df[df['FraudResult'] == 1].index.tolist())

    # просматриваем все файлы в папке submitted
    is_exist = False
    files = os.listdir('../submitted')
    files.sort()
    for f in files:
        f_csv = pd.read_csv('../submitted/' + f)
        if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
            print('It is the same as in: ' + f)
            is_exist = True
    return is_exist

In [82]:
predict = np.where((df_submit['top9RFE_ETC'] + df_submit['top20chi2_5'] > 1.0), 1, 0)
df_sbm['FraudResult'] = predict
Counter(df_sbm['FraudResult'])

Counter({0: 44945, 1: 74})

In [84]:
if not is_predict_exist(df_sbm):
    print('New result!')
    df_sbm.to_csv('../submitted/AlBo0815_Merge_SUM_1_00.csv', encoding='utf-8', index=False)

New result!


**Results:**

`74  0.813559322033898`

`TransactionId_45907` - FP

In [90]:
0.827586206896552 - 0.813559322033898

0.014026884862654021

### Пусть сумма вероятностей превышает 0.85

In [87]:
df_8[df_8['top9RFE_ETC'] + df_8['top20chi2_5'] > 0.85]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
19763,TransactionId_11832,,0.581,0.270506
19897,TransactionId_22203,,0.581,0.270506
22076,TransactionId_45907,,0.688,0.411


In [89]:
predict = np.where((df_submit['top9RFE_ETC'] + df_submit['top20chi2_5'] > 0.85), 1, 0)
df_sbm['FraudResult'] = predict
print(Counter(df_sbm['FraudResult']))

if not is_predict_exist(df_sbm):
    print('New result!')
    df_sbm.to_csv('../submitted/AlBo0815_Merge_SUM_0_85.csv', encoding='utf-8', index=False)

New result!


In [92]:
print(Counter(df_sbm['FraudResult']))

Counter({0: 44943, 1: 76})


**Results:**

`76  0.786885245901639`

`TransactionId_11832`, `TransactionId_22203`, `TransactionId_45907` - FP

In [93]:
(0.813559322033898 - 0.786885245901639) / 2

0.013337038066129514

### Пусть сумма вероятностей превышает 0.75

In [104]:
df_8[df_8['top9RFE_ETC'] + df_8['top20chi2_5'] > 0.75]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
5271,TransactionId_88480,,0.57,0.224
5273,TransactionId_54314,,0.594,0.164
19763,TransactionId_11832,,0.581,0.270506
19897,TransactionId_22203,,0.581,0.270506
22076,TransactionId_45907,,0.688,0.411


In [105]:
predict = np.where(df_submit['top9RFE_ETC'] + df_submit['top20chi2_5'] > 0.75, 1, 0)
df_sbm['FraudResult'] = predict
print(Counter(df_sbm['FraudResult']))

if not is_predict_exist(df_sbm):
    print('New result!')
    df_sbm.to_csv('../submitted/AlBo0815_Merge_SUM_0_75.csv', encoding='utf-8', index=False)

Counter({0: 44938, 1: 81})
New result!


In [106]:
compare_submitted_files('AlBo0815_Merge_SUM_0_75.csv', 'AlBo0815_Merge_SUM_0_85.csv')

Number of frauds in set A = 81
Number of frauds in set B = 76
Symmetric Difference of sets A and B (set_A ^ set_B) =  ['TransactionId_37370' 'TransactionId_54314' 'TransactionId_62470'
 'TransactionId_88480' 'TransactionId_99792']
Difference of sets A and B (set_A - set_B) = ['TransactionId_37370' 'TransactionId_54314' 'TransactionId_62470'
 'TransactionId_88480' 'TransactionId_99792']
Difference of sets A and B (set_B - set_A) = []


In [107]:
df_submit[
    (df_submit['TransactionId'] == 'TransactionId_37370') |
    (df_submit['TransactionId'] == 'TransactionId_54314') |
    (df_submit['TransactionId'] == 'TransactionId_62470') |
    (df_submit['TransactionId'] == 'TransactionId_88480') |
    (df_submit['TransactionId'] == 'TransactionId_99792')
]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
5271,TransactionId_88480,,0.57,0.224
5273,TransactionId_54314,,0.594,0.164
20146,TransactionId_99792,,0.494,0.270506
21954,TransactionId_62470,,0.409,0.403
21963,TransactionId_37370,,0.409,0.403


In [108]:
predict = np.where((df_submit['top9RFE_ETC'] + df_submit['top20chi2_5'] > 0.75) & 
                   ((df_submit['top9RFE_ETC'] > 0.5) | (df_submit['top20chi2_5'] > 0.5)), 1, 0)
df_sbm['FraudResult'] = predict
print(Counter(df_sbm['FraudResult']))

if not is_predict_exist(df_sbm):
    print('New result!')
    df_sbm.to_csv('../submitted/AlBo0815_Merge_SUM_0_75.csv', encoding='utf-8', index=False)

Counter({0: 44941, 1: 78})
New result!


**Results:**

`78  0.786885245901639`

`TransactionId_88480`, `TransactionId_54314` is in PrivateLeaderboard

### Пусть сумма вероятностей превышает 0.65

In [110]:
df_8[df_8['top9RFE_ETC'] + df_8['top20chi2_5'] > 0.65]

Unnamed: 0,TransactionId,FraudResult,top9RFE_ETC,top20chi2_5
5271,TransactionId_88480,,0.57,0.224
5273,TransactionId_54314,,0.594,0.164
5276,TransactionId_24572,,0.577,0.109
5367,TransactionId_62319,,0.569,0.101
11023,TransactionId_6746,,0.586,0.072
19763,TransactionId_11832,,0.581,0.270506
19897,TransactionId_22203,,0.581,0.270506
22076,TransactionId_45907,,0.688,0.411


In [111]:
predict = np.where((df_submit['top9RFE_ETC'] + df_submit['top20chi2_5'] > 0.65) & 
                   ((df_submit['top9RFE_ETC'] > 0.5) | (df_submit['top20chi2_5'] > 0.5)), 1, 0)
df_sbm['FraudResult'] = predict
print(Counter(df_sbm['FraudResult']))

if not is_predict_exist(df_sbm):
    print('New result!')
    df_sbm.to_csv('../submitted/AlBo0815_Merge_SUM_0_65.csv', encoding='utf-8', index=False)

Counter({0: 44938, 1: 81})
New result!


**Results:**

`81 0.806451612903226`

видимо, что `TransactionId_62319` или `TransactionId_6746` - TP, а `TransactionId_24572` is in `PrivateLeaderBoard`

In [20]:
predict = np.where(predict_proba_RFE_ETC_9[:, 1] > 0.50, 1, 0)
predict.sum()

81