# Recursive Feature Elimination (RFE), cont.
1. Extra Trees Classifier
2. Random Forest Classifier
3. Ada Boost Classifier

## Extra Trees Classifier

In [1]:
import numpy  as np
import pandas as pd

In [2]:
df_trn = pd.read_csv('../data/train-agg-cut.csv')
df_tst = pd.read_csv('../data/test-agg-cut.csv')
df_sbm = pd.read_csv('../data/sample_submission.csv')

In [3]:
X_trn = df_trn.drop(columns=['FraudResult'], axis=1)
y_trn = df_trn['FraudResult']

X_tst = df_tst

In [4]:
import os
from collections import Counter
from sklearn.ensemble import BaggingClassifier

In [8]:
def prediction(X_trn, y_trn, X_tst, name):
    predict = BaggingClassifier(n_estimators=1000, n_jobs=-1, random_state=24).fit(X_trn, y_trn).predict(X_tst)
    print('Results:', Counter(predict))
    df_sbm['FraudResult'] = predict

    # определяем был ли ранее точно такой же результат
    current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

    # просматриваем все файлы в папке submitted
    is_exist = False
    files = os.listdir('../submitted')
    files.sort()
    for f in files:
        f_csv = pd.read_csv('../submitted/' + f)
        if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
            print('It is the same as in: ' + f)
            is_exist = True
    if not is_exist:
        print('New result! Submit it!')
        df_sbm.to_csv('../submitted/AlBo0808_RFE_' + name + '.csv', encoding='utf-8', index=False)

In [9]:
columns5 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
]

In [10]:
prediction(X_trn[columns5], y_trn, X_tst[columns5], 'ETC_5')

Results: Counter({0: 44951, 1: 68})
It is the same as in: AlBo0808_RFE_ETC_5.csv


**Results:** `0.689655172413793`

In [11]:
columns6 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
]

prediction(X_trn[columns6], y_trn, X_tst[columns6], 'ETC_6')

Results: Counter({0: 44946, 1: 73})
It is the same as in: AlBo0726_top292chi2_BaggingClassifier.csv


In [12]:
columns7 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
]

prediction(X_trn[columns7], y_trn, X_tst[columns7], 'ETC_7')

Results: Counter({0: 44946, 1: 73})
It is the same as in: AlBo0726_top292chi2_BaggingClassifier.csv


In [13]:
columns8 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
]

prediction(X_trn[columns8], y_trn, X_tst[columns8], 'ETC_8')

Results: Counter({0: 44946, 1: 73})
It is the same as in: AlBo0726_top28chi2_BaggingClassifier.csv


**`AlBo0726_top28chi2_BaggingClassifier.csv:`** `0.793103448275862`

In [15]:
columns8 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
]

prediction(X_trn[columns8], y_trn, X_tst[columns8], 'ETC_8')

Results: Counter({0: 44946, 1: 73})
It is the same as in: AlBo0726_top28chi2_BaggingClassifier.csv


**`AlBo0726_top28chi2_BaggingClassifier.csv:`** `0.793103448275862`

In [16]:
columns9 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountNegative_week_count',
]

prediction(X_trn[columns9], y_trn, X_tst[columns9], 'ETC_9')

Results: Counter({0: 44938, 1: 81})
New result! Submit it!


**Results:** `0.793650793650794`

### Cheating

In [46]:
predict_proba = BaggingClassifier(n_estimators=1000, n_jobs=-1, random_state=24).fit(X_trn[columns9], y_trn).predict_proba(X_tst[columns9])

In [50]:
predict = np.where(predict_proba[:, 1] > 0.55, 1, 0)
predict.sum()

80

In [55]:
df_sbm['FraudResult'] = predict
# определяем был ли ранее точно такой же результат
current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

# просматриваем все файлы в папке submitted
is_exist = False
files = os.listdir('../submitted')
files.sort()
for f in files:
    f_csv = pd.read_csv('../submitted/' + f)
    if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
        print('It is the same as in: ' + f)
        is_exist = True
if not is_exist:
    print('New result! Submit it!')
    df_sbm.to_csv('../submitted/AlBo0808_RFE_ETC_9_80.csv', encoding='utf-8', index=False)

New result! Submit it!


In [71]:
df_ETC_9    = pd.read_csv('../submitted/AlBo0808_RFE_ETC_9.csv')
df_ETC_9_80 = pd.read_csv('../submitted/AlBo0808_RFE_ETC_9_80.csv')

set_ETC_9    = set(df_ETC_9['TransactionId'][df_ETC_9['FraudResult']==1].tolist())
set_ETC_9_80 = set(df_ETC_9_80['TransactionId'][df_ETC_9_80['FraudResult']==1].tolist())

print(len(set_ETC_9))
print(len(set_ETC_9_80))
print(np.sort(list(set_ETC_9 - set_ETC_9_80)))
print(np.sort(list(set_ETC_9_80 - set_ETC_9)))
print(np.sort(list(set_ETC_9_80 ^ set_ETC_9)))

81
80
['TransactionId_11374']
[]
['TransactionId_11374']


In [72]:
df_top18 = pd.read_csv('../submitted/AlBo0726_top18chi2_BaggingClassifier.csv')
df_ETC_9 = pd.read_csv('../submitted/AlBo0808_RFE_ETC_9.csv')

set_top18 = set(df_top18['TransactionId'][df_top18['FraudResult']==1].tolist())
set_ETC_9 = set(df_ETC_9['TransactionId'][df_ETC_9['FraudResult']==1].tolist())

print(len(set_ETC_9))
print(len(set_top18))
print(np.sort(list(set_ETC_9 - set_top18)))
print(np.sort(list(set_top18 - set_ETC_9)))
print(np.sort(list(set_top18 ^ set_ETC_9)))

81
72
['TransactionId_11832' 'TransactionId_22203' 'TransactionId_24572'
 'TransactionId_45907' 'TransactionId_47953' 'TransactionId_54314'
 'TransactionId_62319' 'TransactionId_6746' 'TransactionId_88480']
[]
['TransactionId_11832' 'TransactionId_22203' 'TransactionId_24572'
 'TransactionId_45907' 'TransactionId_47953' 'TransactionId_54314'
 'TransactionId_62319' 'TransactionId_6746' 'TransactionId_88480']


In [74]:
'TransactionId_11374' in set_top18

True

берем первую тройку транзакций, отличающихся от лучшего результата (`top18chi2`)

In [None]:
df_sbm['FraudResult'] = df_top18['FraudResult']

print(df_sbm['FraudResult'][df_sbm['TransactionId']=='TransactionId_11832'])
print(df_sbm['FraudResult'][df_sbm['TransactionId']=='TransactionId_22203'])
print(df_sbm['FraudResult'][df_sbm['TransactionId']=='TransactionId_24572'])

df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_11832', 'FraudResult'] = 1
df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_22203', 'FraudResult'] = 1
df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_24572', 'FraudResult'] = 1

print(df_sbm['FraudResult'][df_sbm['TransactionId']=='TransactionId_11832'])
print(df_sbm['FraudResult'][df_sbm['TransactionId']=='TransactionId_22203'])
print(df_sbm['FraudResult'][df_sbm['TransactionId']=='TransactionId_24572'])

In [92]:
# определяем был ли ранее точно такой же результат
current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

# просматриваем все файлы в папке submitted
is_exist = False
files = os.listdir('../submitted')
files.sort()
for f in files:
    f_csv = pd.read_csv('../submitted/' + f)
    if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
        print('It is the same as in: ' + f)
        is_exist = True
if not is_exist:
    print('New result! Submit it!')
    df_sbm.to_csv('../submitted/AlBo0808_top18chi2plus3.csv', encoding='utf-8', index=False)

New result! Submit it!


**Result:** `0.8`

берем вторую тройку транзакций, отличающихся от лучшего результата (`top18chi2`)

In [94]:
df_sbm['FraudResult'] = df_top18['FraudResult']

df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_45907', 'FraudResult'] = 1
df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_47953', 'FraudResult'] = 1
df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_54314', 'FraudResult'] = 1

In [95]:
# определяем был ли ранее точно такой же результат
current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

# просматриваем все файлы в папке submitted
is_exist = False
files = os.listdir('../submitted')
files.sort()
for f in files:
    f_csv = pd.read_csv('../submitted/' + f)
    if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
        print('It is the same as in: ' + f)
        is_exist = True
if not is_exist:
    print('New result! Submit it!')
    df_sbm.to_csv('../submitted/AlBo0808_top18chi2plus3_2.csv', encoding='utf-8', index=False)

New result! Submit it!


берем третью тройку транзакций, отличающихся от лучшего результата (`top18chi2`)

In [97]:
df_sbm['FraudResult'] = df_top18['FraudResult']

df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_62319', 'FraudResult'] = 1
df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_6746',  'FraudResult'] = 1
df_sbm.at[df_sbm['TransactionId'] == 'TransactionId_88480', 'FraudResult'] = 1

In [98]:
# определяем был ли ранее точно такой же результат
current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

# просматриваем все файлы в папке submitted
is_exist = False
files = os.listdir('../submitted')
files.sort()
for f in files:
    f_csv = pd.read_csv('../submitted/' + f)
    if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
        print('It is the same as in: ' + f)
        is_exist = True
if not is_exist:
    print('New result! Submit it!')
    df_sbm.to_csv('../submitted/AlBo0808_top18chi2plus3_3.csv', encoding='utf-8', index=False)

New result! Submit it!


**Result:** `0.847457627118644`

### Continue

In [100]:
columns10 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountNegative_week_count',
    'provider_transactions__AmountPositive_week_count',
]

prediction(X_trn[columns10], y_trn, X_tst[columns10], 'ETC_10')

Results: Counter({0: 44937, 1: 82})
New result! Submit it!


In [101]:
df_ETC_9  = pd.read_csv('../submitted/AlBo0808_RFE_ETC_9.csv')
df_ETC_10 = pd.read_csv('../submitted/AlBo0808_RFE_ETC_10.csv')

set_ETC_9  = set(df_ETC_9 ['TransactionId'][df_ETC_9 ['FraudResult']==1].tolist())
set_ETC_10 = set(df_ETC_10['TransactionId'][df_ETC_10['FraudResult']==1].tolist())

print(len(set_ETC_9))
print(len(set_ETC_10))
print(np.sort(list(set_ETC_10 - set_ETC_9)))
print(np.sort(list(set_ETC_9  - set_ETC_10)))
print(np.sort(list(set_ETC_9  ^ set_ETC_10)))

81
82
['TransactionId_99792']
[]
['TransactionId_99792']


In [102]:
columns11 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountNegative_week_count',
    'provider_transactions__AmountPositive_week_count',
    'account_provider_transactions__Value_global_avg',
]

prediction(X_trn[columns11], y_trn, X_tst[columns11], 'ETC_11')

Results: Counter({0: 44938, 1: 81})
It is the same as in: AlBo0808_RFE_ETC_9.csv


In [103]:
columns12 = [
    'Value',
    'AmountPositive',
    'product_category_transactions__AmountNegative_global_count',
    'account_provider_transactions__AmountPositive_global_avg',
    'provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_sum',
    'product_category_transactions__AmountPositive_global_count',
    'account_pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountNegative_week_count',
    'provider_transactions__AmountPositive_week_count',
    'account_provider_transactions__Value_global_avg',
    'pricing_strategy_transactions__Value_global_avg',
]

prediction(X_trn[columns12], y_trn, X_tst[columns12], 'ETC_12')

Results: Counter({0: 44932, 1: 87})
New result! Submit it!


In [104]:
df_ETC_9  = pd.read_csv('../submitted/AlBo0808_RFE_ETC_9.csv')
df_ETC_12 = pd.read_csv('../submitted/AlBo0808_RFE_ETC_12.csv')

set_ETC_9  = set(df_ETC_9 ['TransactionId'][df_ETC_9 ['FraudResult']==1].tolist())
set_ETC_12 = set(df_ETC_12['TransactionId'][df_ETC_12['FraudResult']==1].tolist())

print(len(set_ETC_9))
print(len(set_ETC_12))
print(np.sort(list(set_ETC_12 - set_ETC_9)))
print(np.sort(list(set_ETC_9  - set_ETC_12)))
print(np.sort(list(set_ETC_9  ^ set_ETC_12)))

81
87
['TransactionId_112983' 'TransactionId_132243' 'TransactionId_29526'
 'TransactionId_3655' 'TransactionId_6285' 'TransactionId_99792']
[]
['TransactionId_112983' 'TransactionId_132243' 'TransactionId_29526'
 'TransactionId_3655' 'TransactionId_6285' 'TransactionId_99792']


## Random Forest Classifier

In [105]:
import numpy  as np
import pandas as pd

In [106]:
df_trn = pd.read_csv('../data/train-agg-cut.csv')
df_tst = pd.read_csv('../data/test-agg-cut.csv')
df_sbm = pd.read_csv('../data/sample_submission.csv')

In [107]:
X_trn = df_trn.drop(columns=['FraudResult'], axis=1)
y_trn = df_trn['FraudResult']

X_tst = df_tst

In [108]:
import os
from collections import Counter
from sklearn.ensemble import BaggingClassifier

In [109]:
def prediction(X_trn, y_trn, X_tst, name):
    predict = BaggingClassifier(n_estimators=1000, n_jobs=-1, random_state=24).fit(X_trn, y_trn).predict(X_tst)
    print('Results:', Counter(predict))
    df_sbm['FraudResult'] = predict

    # определяем был ли ранее точно такой же результат
    current_subm_set = set(df_sbm[df_sbm['FraudResult'] == 1].index.tolist())

    # просматриваем все файлы в папке submitted
    is_exist = False
    files = os.listdir('../submitted')
    files.sort()
    for f in files:
        f_csv = pd.read_csv('../submitted/' + f)
        if set(f_csv[f_csv['FraudResult'] == 1].index.tolist()) == current_subm_set:
            print('It is the same as in: ' + f)
            is_exist = True
    if not is_exist:
        print('New result! Submit it!')
        df_sbm.to_csv('../submitted/AlBo0809_RFE_' + name + '.csv', encoding='utf-8', index=False)

In [110]:
columns5 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
]

prediction(X_trn[columns5], y_trn, X_tst[columns5], 'RFC_5')

Results: Counter({0: 44958, 1: 61})
It is the same as in: AlBo0726_top2chi2_BaggingClassifier.csv


In [111]:
columns6 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
]

prediction(X_trn[columns6], y_trn, X_tst[columns6], 'RFC_6')

Results: Counter({0: 44941, 1: 78})
New result! Submit it!


**Result:** `0.73015873015873`

In [113]:
columns7 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
]

prediction(X_trn[columns7], y_trn, X_tst[columns7], 'RFC_7')

Results: Counter({0: 44951, 1: 68})
New result! Submit it!


**Result:** `0.714285714285714`

In [114]:
columns8 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
]

prediction(X_trn[columns8], y_trn, X_tst[columns8], 'RFC_8')

Results: Counter({0: 44948, 1: 71})
New result! Submit it!


**Result:** `0.736842105263158`

In [115]:
columns9 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
]

prediction(X_trn[columns9], y_trn, X_tst[columns9], 'RFC_9')

Results: Counter({0: 44927, 1: 92})
New result! Submit it!


**Result:** `0.704225352112676`

In [117]:
columns10 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
]

prediction(X_trn[columns10], y_trn, X_tst[columns10], 'RFC_10')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [118]:
columns11 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
]

prediction(X_trn[columns11], y_trn, X_tst[columns11], 'RFC_11')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [119]:
columns12 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
]

prediction(X_trn[columns12], y_trn, X_tst[columns12], 'RFC_12')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [120]:
columns13 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
]

prediction(X_trn[columns13], y_trn, X_tst[columns13], 'RFC_13')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [121]:
columns14 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg'
]

prediction(X_trn[columns14], y_trn, X_tst[columns14], 'RFC_14')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [122]:
columns15 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
]

prediction(X_trn[columns15], y_trn, X_tst[columns15], 'RFC_15')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [123]:
columns16 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
]

prediction(X_trn[columns16], y_trn, X_tst[columns16], 'RFC_16')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [124]:
columns17 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
]

prediction(X_trn[columns17], y_trn, X_tst[columns17], 'RFC_17')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [126]:
columns18 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
]

prediction(X_trn[columns18], y_trn, X_tst[columns18], 'RFC_18')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [127]:
columns19 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountPositive_global_sum',
]

prediction(X_trn[columns19], y_trn, X_tst[columns19], 'RFC_19')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [128]:
columns20 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountPositive_global_sum',
    'account_channel_transactions__Value_global_avg',
]

prediction(X_trn[columns20], y_trn, X_tst[columns20], 'RFC_20')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [129]:
columns21 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountPositive_global_sum',
    'account_channel_transactions__Value_global_avg',
    'pricing_strategy_transactions__Value_week_sum',
]

prediction(X_trn[columns21], y_trn, X_tst[columns21], 'RFC_21')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [130]:
columns22 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountPositive_global_sum',
    'account_channel_transactions__Value_global_avg',
    'pricing_strategy_transactions__Value_week_sum',
    'provider_transactions__Value_week_avg'
]

prediction(X_trn[columns22], y_trn, X_tst[columns22], 'RFC_22')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [131]:
columns23 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountPositive_global_sum',
    'account_channel_transactions__Value_global_avg',
    'pricing_strategy_transactions__Value_week_sum',
    'provider_transactions__Value_week_avg',
    'product_transactions__Value_global_avg'
]

prediction(X_trn[columns23], y_trn, X_tst[columns23], 'RFC_23')

Results: Counter({0: 44927, 1: 92})
It is the same as in: AlBo0809_RFE_RFC_9.csv


In [132]:
columns24 = [
    'AmountPositive',
    'Value',
    'product_category_transactions__AmountPositive_global_count',
    'account_provider_transactions__Value_global_avg',
    'account_product_transactions__AmountPositive_global_avg',
    'product_category_transactions__AmountPositive_global_avg',
    'provider_transactions__AmountPositive_global_avg',
    'account_provider_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_sum',
    'account_product_transactions__Value_global_avg',
    'channel_transactions__AmountNegative_week_count',
    'product_transactions__Value_global_sum',
    'provider_transactions__Value_global_avg',
    'account_product_category_transactions__Value_global_avg',
    'product_transactions__AmountPositive_global_sum',
    'pricing_strategy_transactions__AmountPositive_global_avg',
    'pricing_strategy_transactions__AmountPositive_week_count',
    'pricing_strategy_transactions__Value_global_avg',
    'product_category_transactions__AmountPositive_global_sum',
    'account_channel_transactions__Value_global_avg',
    'pricing_strategy_transactions__Value_week_sum',
    'provider_transactions__Value_week_avg',
    'product_transactions__Value_global_avg',
    'product_category_transactions__Value_global_avg',
]

prediction(X_trn[columns24], y_trn, X_tst[columns24], 'RFC_24')

Results: Counter({0: 44922, 1: 97})
New result! Submit it!


In [14]:
df_AB  = pd.read_csv('../submitted/AlBo0726_top18chi2_BaggingClassifier.csv')
df_OM  = pd.read_csv('../submitted/rf-custom-features-v18.csv')
set_AB = set(df_AB[df_AB['FraudResult'] == 1].index.tolist())
set_OM = set(df_OM[df_OM['FraudResult'] == 1].index.tolist())
if set_AB == set_OM:
    print('Equal')
else:
    print('Not equal')

Equal
