In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [29]:
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', 150)

# 1. LOAD DATA

In [30]:
bureau_df = pd.read_csv(r'../data/raw/bureau.csv')
bureau_bl = pd.read_csv(r'../data/raw/bureau_balance.csv')
application_train = pd.read_csv(r'../data/raw/application_train.csv', usecols = ['SK_ID_CURR', 'TARGET'])
application_test = pd.read_csv(r'../data/raw/application_test.csv', usecols = ['SK_ID_CURR'])

In [31]:
bureau_df.shape

(1716428, 17)

In [32]:
bureau_df.drop(columns='AMT_CREDIT_MAX_OVERDUE', inplace=True)
bureau_df.head()

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,0,2700000.0,,,0.0,Consumer credit,-21,


In [33]:
bureau_bl.head()

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


# 2. OUTLINERS & MISSING VALUES

In [34]:
outlier_cols = ["DAYS_CREDIT_ENDDATE", "DAYS_CREDIT_UPDATE", "DAYS_ENDDATE_FACT"]

for col in outlier_cols:
    bureau_df.loc[bureau_df[col] < -20000, col] = np.nan

## 2.1 BUREAU_BL

In [None]:
#  Encode STATUS theo ordinal mapping
status_map = {
    'C': 0,   # closed loan 
    '0': 0,   # paid on time
    'X': 1,   # unknown â†’ neutral/light risk
    '1': 2,
    '2': 3,
    '3': 4,
    '4': 5,
    '5': 6    
}
bureau_bl["STATUS_NUM"] = bureau_bl["STATUS"].map(status_map)

# 2.2 Aggregate bureau_balance theo SK_ID_BUREAU
bb_agg = bureau_bl.groupby("SK_ID_BUREAU").agg({
    "MONTHS_BALANCE": ["min", "max", "size"],
    "STATUS_NUM": ["mean", "max"]
})

bb_agg.columns = [f"BB_{c[0]}_{c[1].upper()}" for c in bb_agg.columns]
bb_agg = bb_agg.reset_index()


In [None]:
bb_temp = bureau_df[["SK_ID_BUREAU", "SK_ID_CURR"]].merge(bb_agg, on="SK_ID_BUREAU", how="left")


bb_agg_dict = {
    "BB_MONTHS_BALANCE_MIN": ["min", "max"],
    "BB_MONTHS_BALANCE_MAX": ["min", "max"],
    "BB_MONTHS_BALANCE_SIZE": ["sum", "max"],
    "BB_STATUS_NUM_MEAN": ["mean", "max"],
    "BB_STATUS_NUM_MAX": ["max"],
}

bb_final = bb_temp.groupby("SK_ID_CURR").agg(bb_agg_dict)

# flatten column names
bb_final.columns = ["_".join([col[0], col[1].upper()]) for col in bb_final.columns]
bb_final = bb_final.reset_index()

bb_final.head()

Unnamed: 0,SK_ID_CURR,BB_MONTHS_BALANCE_MIN_MIN,BB_MONTHS_BALANCE_MIN_MAX,BB_MONTHS_BALANCE_MAX_MIN,BB_MONTHS_BALANCE_MAX_MAX,BB_MONTHS_BALANCE_SIZE_SUM,BB_MONTHS_BALANCE_SIZE_MAX,BB_STATUS_NUM_MEAN_MEAN,BB_STATUS_NUM_MEAN_MAX,BB_STATUS_NUM_MAX_MAX
0,100001,-51.0,-1.0,0.0,0.0,172.0,52.0,0.229627,0.5,2.0
1,100002,-47.0,-3.0,-32.0,0.0,110.0,22.0,0.673295,1.0,2.0
2,100003,,,,,0.0,,,,
3,100004,,,,,0.0,,,,
4,100005,-12.0,-2.0,0.0,0.0,21.0,13.0,0.136752,0.333333,1.0


In [37]:
bureau_full = bureau_df.copy()

# 3. FEATURE ENGINEERING

In [None]:
# Total of bureau loans for creating ratio
total_loans = bureau_df.groupby("SK_ID_CURR").size().rename("TOTAL_BUREAU_LOANS")

# Ordinal encode CREDIT_ACTIVE theo severity
credit_active_map = {
    "Closed": 0,
    "Active": 2,
    "Sold": 3,
    "Bad debt": 4  
}
bureau_full["CREDIT_ACTIVE_ENC"] = bureau_full["CREDIT_ACTIVE"].map(credit_active_map)

# Credit_active ratio
active_cnt = bureau_full[bureau_full["CREDIT_ACTIVE"]=="Active"].groupby("SK_ID_CURR").size().rename("CNT_ACTIVE")
sold_cnt   = bureau_full[bureau_full["CREDIT_ACTIVE"]=="Sold"].groupby("SK_ID_CURR").size().rename("CNT_SOLD")


high_risk = ((active_cnt>0) & (sold_cnt>0)).astype(int).rename("HIGH_CREDIT_RISK_FLAG")

# Currency flag
bureau_full["CURRENCY_1_FLAG"] = (bureau_full["CREDIT_CURRENCY"]=="currency 1").astype(int)

# Loan types
major_types = ["Consumer credit","Credit card","Car loan","Mortgage","Microloan"]

bureau_full["CREDIT_TYPE_GRP"] = bureau_full["CREDIT_TYPE"].apply(
    lambda x: x if x in major_types else "Other"
)


credit_type_map = {
    "Mortgage": 1,
    "Car loan": 0,
    "Consumer credit": 2,
    "Credit card": 3,
    "Microloan": 4,
    "Other": 2
}
bureau_full["CREDIT_TYPE_ENC"] = bureau_full["CREDIT_TYPE_GRP"].map(credit_type_map)

In [39]:
# Count overdue months
dpd1 = bureau_bl[bureau_bl["STATUS_NUM"] >= 1].groupby("SK_ID_BUREAU").size().rename("DPD1_COUNT")
dpd2 = bureau_bl[bureau_bl["STATUS_NUM"] >= 2].groupby("SK_ID_BUREAU").size().rename("DPD2_COUNT")
dpd3 = bureau_bl[bureau_bl["STATUS_NUM"] >= 3].groupby("SK_ID_BUREAU").size().rename("DPD3_COUNT")


last_status = (
    bureau_bl.sort_values(["SK_ID_BUREAU","MONTHS_BALANCE"])
             .groupby("SK_ID_BUREAU")["STATUS_NUM"]
             .last()
             .rename("LAST_STATUS")
)


overdue_mask = bureau_bl[bureau_bl["STATUS_NUM"]>=1]

days_last_overdue = (
    overdue_mask.groupby("SK_ID_BUREAU")["MONTHS_BALANCE"]
                .min()
                .rename("MONTHS_SINCE_LAST_OVERDUE")
)

status_features = pd.concat([dpd1, dpd2, dpd3, last_status, days_last_overdue], axis=1)
bureau_full = bureau_full.merge(status_features, on="SK_ID_BUREAU", how="left")

In [None]:
def time_to_end_bucket(x):
    if pd.isna(x): return "unknown"
    if x <= 0: return "closed"
    if x <= 30: return "<=30d"
    if x <= 180: return "<=6m"
    if x <= 365: return "<=12m"
    return ">12m"

def safe_div(a, b):
    return a / b.mask(b == 0)

bureau_full["BUREAU_TIME_TO_END"] = bureau_full["DAYS_CREDIT_ENDDATE"].apply(time_to_end_bucket)
bureau_full = pd.get_dummies(bureau_full, columns=["BUREAU_TIME_TO_END"], prefix="TIMEEND")

In [None]:
# DPD trend feature
def add_dpd_features_for_bureau(df):
    """
    Create DPD trend and recent-window features for one SK_BUREAU_ID.
    """
    # sort by time: older first, newest last (MONTHS_BALANCE is negative)
    df = df.sort_values("MONTHS_BALANCE")
    months = df["MONTHS_BALANCE"].values
    status = df["STATUS_NUM"].fillna(0).values

    # DPD trend
    if len(months) >= 2:
        slope = np.polyfit(months, status, 1)[0]
    else:
        slope = 0.0

    # Last 1 / 3 / 6 months mean DPD
    if len(status) >= 1:
        last1 = status[-1]
    else:
        last1 = 0.0

    if len(status) >= 1:
        last3 = status[-3:].mean()
        last6 = status[-6:].mean()
    else:
        last3 = 0.0
        last6 = 0.0

    # Transition features
    late = status > 0
    if len(late) >= 2:
        trans_0_to_late = ((late[:-1] == False) & (late[1:] == True)).sum()
        trans_late_to_0 = ((late[:-1] == True) & (late[1:] == False)).sum()
    else:
        trans_0_to_late = 0
        trans_late_to_0 = 0

    # longest streak of consecutive late months
    max_streak = 0
    current = 0
    for v in late:
        if v:
            current += 1
            if current > max_streak:
                max_streak = current
        else:
            current = 0

    return pd.Series({
        "BB_DPD_TREND": slope,
        "BB_DPD_LAST1M": last1,
        "BB_DPD_LAST3M": last3,
        "BB_DPD_LAST6M": last6,
        "BB_DPD_TRANS_0_TO_LATE": trans_0_to_late,
        "BB_DPD_TRANS_LATE_TO_0": trans_late_to_0,
        "BB_DPD_MAX_STREAK_LATE": max_streak
    })


bb_extra = bureau_bl.groupby("SK_BUREAU_ID").apply(add_dpd_features_for_bureau).reset_index()


bb_extra = bb_extra.merge(
    bureau_df[["SK_ID_BUREAU", "SK_ID_CURR"]],
    on="SK_ID_BUREAU",
    how="left"
)

bb_extra_agg = bb_extra.groupby("SK_ID_CURR").agg({
    "BB_DPD_TREND": ["mean", "max", "min"],
    "BB_DPD_LAST1M": ["mean", "max"],
    "BB_DPD_LAST3M": ["mean", "max"],
    "BB_DPD_LAST6M": ["mean", "max"],
    "BB_DPD_TRANS_0_TO_LATE": ["sum", "mean"],
    "BB_DPD_TRANS_LATE_TO_0": ["sum", "mean"],
    "BB_DPD_MAX_STREAK_LATE": ["max", "mean"]
}).reset_index()

# flatten columns
bb_extra_agg.columns = ["SK_ID_CURR"] + [
    "BB_EXTRA_" + "_".join(col).upper()
    for col in bb_extra_agg.columns[1:]
]


# Loan age features
bureau_full["LOAN_AGE_DAYS"] = -bureau_full["DAYS_CREDIT"]
bureau_full["LOAN_AGE_YEARS"] = bureau_full["LOAN_AGE_DAYS"] / 365.0

# Remaining duration and total duration (approx)
bureau_full["LOAN_REMAINING_DAYS"] = bureau_full["DAYS_CREDIT_ENDDATE"]
bureau_full["LOAN_DURATION_DAYS"] = bureau_full["DAYS_CREDIT_ENDDATE"] - bureau_full["DAYS_CREDIT"]
bureau_full["LOAN_DURATION_YEARS"] = bureau_full["LOAN_DURATION_DAYS"] / 365.0

bureau_full["LOAN_AGE_RATIO"] = safe_div(
    bureau_full["LOAN_AGE_DAYS"],
    bureau_full["LOAN_DURATION_DAYS"]
)

# Aggregate loan age features to client level
loan_age_agg = bureau_full.groupby("SK_ID_CURR")[[
    "LOAN_AGE_DAYS",
    "LOAN_AGE_YEARS",
    "LOAN_REMAINING_DAYS",
    "LOAN_DURATION_DAYS",
    "LOAN_DURATION_YEARS",
    "LOAN_AGE_RATIO"
]].agg(["mean", "max", "min"]).reset_index()

loan_age_agg.columns = ["SK_ID_CURR"] + [
    "BURO_LOAN_" + "_".join(col).upper()
    for col in loan_age_agg.columns[1:]
]



# Active loans features
active_df = bureau_full[bureau_full["CREDIT_ACTIVE"] == "Active"].copy()

active_agg = active_df.groupby("SK_ID_CURR").agg({
    "SK_ID_BUREAU": "count",       # number of active loans
    "DEBT_TO_CREDIT": "mean",
    "OVERDUE_RATIO": "mean",
    "DPD1_COUNT": "sum",
    "DPD2_COUNT": "sum",
    "DPD3_COUNT": "sum"
}).reset_index()

active_agg = active_agg.rename(columns={
    "SK_ID_BUREAU": "ACTIVE_LOANS",
    "DEBT_TO_CREDIT": "ACTIVE_DEBT_TO_CREDIT_MEAN",
    "OVERDUE_RATIO": "ACTIVE_OVERDUE_RATIO_MEAN",
    "DPD1_COUNT": "ACTIVE_DPD1_SUM",
    "DPD2_COUNT": "ACTIVE_DPD2_SUM",
    "DPD3_COUNT": "ACTIVE_DPD3_SUM"
})

# active loans / total bureau loans
active_agg = active_agg.merge(
    total_loans.reset_index(),
    on="SK_ID_CURR",
    how="left"
)
active_agg["ACTIVE_LOAN_RATIO"] = safe_div(
    active_agg["ACTIVE_LOANS"],
    active_agg["TOTAL_BUREAU_LOANS"]
)

In [None]:
# Ratio features
bureau_full["PCT_CREDIT_LIMIT_USED"] = safe_div(bureau_full["AMT_CREDIT_SUM"], bureau_full["AMT_CREDIT_SUM_LIMIT"])
bureau_full["PCT_CREDIT_DEBT"] = safe_div(bureau_full["AMT_CREDIT_SUM_DEBT"], bureau_full["AMT_CREDIT_SUM_LIMIT"])
bureau_full["OVERDUE_RATIO"] = safe_div(bureau_full["AMT_CREDIT_SUM_OVERDUE"], bureau_full["AMT_CREDIT_SUM"])
bureau_full["DEBT_TO_CREDIT"] = safe_div(bureau_full["AMT_CREDIT_SUM_DEBT"], bureau_full["AMT_CREDIT_SUM"])
bureau_full["OVERDUE_DEBT"] = safe_div(bureau_full["AMT_CREDIT_SUM_OVERDUE"], bureau_full["AMT_CREDIT_SUM_DEBT"])
bureau_full["ANNUITY_TO_SUM"] = safe_div(bureau_full["AMT_ANNUITY"], bureau_full["AMT_CREDIT_SUM"])
bureau_full["ANNUITY_TO_DEBT"] = safe_div(bureau_full["AMT_ANNUITY"], bureau_full["AMT_CREDIT_SUM_DEBT"])


numeric_cols = bureau_full.select_dtypes(include=['number']).columns
bureau_numeric = bureau_full[numeric_cols].copy()
bureau_numeric["SK_ID_CURR"] = bureau_full["SK_ID_CURR"]

agg_functions = ["mean", "max", "min", "sum", "median"]

bureau_agg = bureau_numeric.groupby("SK_ID_CURR").agg(agg_functions)
bureau_agg.columns = ["BURO_" + "_".join(col).upper() for col in bureau_agg.columns]
bureau_agg = bureau_agg.reset_index()

In [42]:
bureau_final = bureau_agg.merge(bb_final, on="SK_ID_CURR", how="left")

In [43]:
bureau_final.shape

(305811, 145)

In [44]:
def check_nan(col):
    num_nan = col.isnull().sum()
    num_per = num_nan/len(col) * 100
    col_miss = pd.concat([num_nan, num_per], axis=1, keys=['number_of_NaN', 'percentage_of_NaN']).sort_values(by='percentage_of_NaN', ascending = False).round(1)
    col_miss = col_miss[col_miss['number_of_NaN'] != 0]
    print('Num fields: ', col.shape[1])
    print('Num missing fields: ', col_miss.shape[0])
    return col_miss

In [45]:
bureau_final.columns.tolist()

['SK_ID_CURR',
 'BURO_SK_ID_BUREAU_MEAN',
 'BURO_SK_ID_BUREAU_MAX',
 'BURO_SK_ID_BUREAU_MIN',
 'BURO_SK_ID_BUREAU_SUM',
 'BURO_SK_ID_BUREAU_MEDIAN',
 'BURO_DAYS_CREDIT_MEAN',
 'BURO_DAYS_CREDIT_MAX',
 'BURO_DAYS_CREDIT_MIN',
 'BURO_DAYS_CREDIT_SUM',
 'BURO_DAYS_CREDIT_MEDIAN',
 'BURO_CREDIT_DAY_OVERDUE_MEAN',
 'BURO_CREDIT_DAY_OVERDUE_MAX',
 'BURO_CREDIT_DAY_OVERDUE_MIN',
 'BURO_CREDIT_DAY_OVERDUE_SUM',
 'BURO_CREDIT_DAY_OVERDUE_MEDIAN',
 'BURO_DAYS_CREDIT_ENDDATE_MEAN',
 'BURO_DAYS_CREDIT_ENDDATE_MAX',
 'BURO_DAYS_CREDIT_ENDDATE_MIN',
 'BURO_DAYS_CREDIT_ENDDATE_SUM',
 'BURO_DAYS_CREDIT_ENDDATE_MEDIAN',
 'BURO_DAYS_ENDDATE_FACT_MEAN',
 'BURO_DAYS_ENDDATE_FACT_MAX',
 'BURO_DAYS_ENDDATE_FACT_MIN',
 'BURO_DAYS_ENDDATE_FACT_SUM',
 'BURO_DAYS_ENDDATE_FACT_MEDIAN',
 'BURO_CNT_CREDIT_PROLONG_MEAN',
 'BURO_CNT_CREDIT_PROLONG_MAX',
 'BURO_CNT_CREDIT_PROLONG_MIN',
 'BURO_CNT_CREDIT_PROLONG_SUM',
 'BURO_CNT_CREDIT_PROLONG_MEDIAN',
 'BURO_AMT_CREDIT_SUM_MEAN',
 'BURO_AMT_CREDIT_SUM_MAX',
 'BURO_AM

In [46]:
check_nan(bureau_final)

Num fields:  145
Num missing fields:  84


Unnamed: 0,number_of_NaN,percentage_of_NaN
BURO_DPD3_COUNT_MEDIAN,296485,97.0
BURO_DPD3_COUNT_MAX,296485,97.0
BURO_DPD3_COUNT_MIN,296485,97.0
BURO_DPD3_COUNT_MEAN,296485,97.0
BURO_DPD2_COUNT_MEAN,257289,84.1
BURO_DPD2_COUNT_MAX,257289,84.1
BURO_DPD2_COUNT_MIN,257289,84.1
BURO_DPD2_COUNT_MEDIAN,257289,84.1
BURO_PCT_CREDIT_DEBT_MEDIAN,249378,81.5
BURO_PCT_CREDIT_DEBT_MEAN,249378,81.5


In [None]:
# Fill ratio logic
ratio_cols = [col for col in bureau_final.columns 
              if 'DPD' in col or 'RATIO' in col or 'DEBT' in col or 'PCT' in col]

bureau_final[ratio_cols] = bureau_final[ratio_cols].fillna(0)

# Fill numeric with median
num_cols = bureau_final.select_dtypes(include='number').columns
num_cols = [c for c in num_cols if c not in ratio_cols]

bureau_final[num_cols] = bureau_final[num_cols].fillna(bureau_final[num_cols].median())

# Fill one-hot / category flags
cat_cols = bureau_final.select_dtypes(include=['uint8']).columns
bureau_final[cat_cols] = bureau_final[cat_cols].fillna(0)

In [48]:
check_nan(bureau_final)

Num fields:  145
Num missing fields:  0


Unnamed: 0,number_of_NaN,percentage_of_NaN


In [49]:
bureau_application_train_feature = application_train.merge(bureau_final, on="SK_ID_CURR", how="left")
bureau_application_test_feature  = application_test.merge(bureau_final, on="SK_ID_CURR", how="left")

In [50]:
print(f'Shape of bureau_train_fe: {bureau_application_train_feature.shape}')
print(f'Shape of bureau_test_fe: {bureau_application_test_feature.shape}')

Shape of bureau_train_fe: (307511, 146)
Shape of bureau_test_fe: (48744, 145)


In [51]:
bureau_application_train_feature.head()

Unnamed: 0,SK_ID_CURR,TARGET,BURO_SK_ID_BUREAU_MEAN,BURO_SK_ID_BUREAU_MAX,BURO_SK_ID_BUREAU_MIN,BURO_SK_ID_BUREAU_SUM,BURO_SK_ID_BUREAU_MEDIAN,BURO_DAYS_CREDIT_MEAN,BURO_DAYS_CREDIT_MAX,BURO_DAYS_CREDIT_MIN,BURO_DAYS_CREDIT_SUM,BURO_DAYS_CREDIT_MEDIAN,BURO_CREDIT_DAY_OVERDUE_MEAN,BURO_CREDIT_DAY_OVERDUE_MAX,BURO_CREDIT_DAY_OVERDUE_MIN,BURO_CREDIT_DAY_OVERDUE_SUM,BURO_CREDIT_DAY_OVERDUE_MEDIAN,BURO_DAYS_CREDIT_ENDDATE_MEAN,BURO_DAYS_CREDIT_ENDDATE_MAX,BURO_DAYS_CREDIT_ENDDATE_MIN,BURO_DAYS_CREDIT_ENDDATE_SUM,BURO_DAYS_CREDIT_ENDDATE_MEDIAN,BURO_DAYS_ENDDATE_FACT_MEAN,BURO_DAYS_ENDDATE_FACT_MAX,BURO_DAYS_ENDDATE_FACT_MIN,BURO_DAYS_ENDDATE_FACT_SUM,BURO_DAYS_ENDDATE_FACT_MEDIAN,BURO_CNT_CREDIT_PROLONG_MEAN,BURO_CNT_CREDIT_PROLONG_MAX,BURO_CNT_CREDIT_PROLONG_MIN,BURO_CNT_CREDIT_PROLONG_SUM,BURO_CNT_CREDIT_PROLONG_MEDIAN,BURO_AMT_CREDIT_SUM_MEAN,BURO_AMT_CREDIT_SUM_MAX,BURO_AMT_CREDIT_SUM_MIN,BURO_AMT_CREDIT_SUM_SUM,BURO_AMT_CREDIT_SUM_MEDIAN,BURO_AMT_CREDIT_SUM_DEBT_MEAN,BURO_AMT_CREDIT_SUM_DEBT_MAX,BURO_AMT_CREDIT_SUM_DEBT_MIN,BURO_AMT_CREDIT_SUM_DEBT_SUM,BURO_AMT_CREDIT_SUM_DEBT_MEDIAN,BURO_AMT_CREDIT_SUM_LIMIT_MEAN,BURO_AMT_CREDIT_SUM_LIMIT_MAX,BURO_AMT_CREDIT_SUM_LIMIT_MIN,BURO_AMT_CREDIT_SUM_LIMIT_SUM,BURO_AMT_CREDIT_SUM_LIMIT_MEDIAN,BURO_AMT_CREDIT_SUM_OVERDUE_MEAN,BURO_AMT_CREDIT_SUM_OVERDUE_MAX,BURO_AMT_CREDIT_SUM_OVERDUE_MIN,BURO_AMT_CREDIT_SUM_OVERDUE_SUM,BURO_AMT_CREDIT_SUM_OVERDUE_MEDIAN,BURO_DAYS_CREDIT_UPDATE_MEAN,BURO_DAYS_CREDIT_UPDATE_MAX,BURO_DAYS_CREDIT_UPDATE_MIN,BURO_DAYS_CREDIT_UPDATE_SUM,BURO_DAYS_CREDIT_UPDATE_MEDIAN,BURO_AMT_ANNUITY_MEAN,BURO_AMT_ANNUITY_MAX,BURO_AMT_ANNUITY_MIN,BURO_AMT_ANNUITY_SUM,BURO_AMT_ANNUITY_MEDIAN,BURO_CREDIT_ACTIVE_ENC_MEAN,BURO_CREDIT_ACTIVE_ENC_MAX,BURO_CREDIT_ACTIVE_ENC_MIN,BURO_CREDIT_ACTIVE_ENC_SUM,BURO_CREDIT_ACTIVE_ENC_MEDIAN,BURO_CURRENCY_1_FLAG_MEAN,BURO_CURRENCY_1_FLAG_MAX,BURO_CURRENCY_1_FLAG_MIN,BURO_CURRENCY_1_FLAG_SUM,BURO_CURRENCY_1_FLAG_MEDIAN,BURO_CREDIT_TYPE_ENC_MEAN,BURO_CREDIT_TYPE_ENC_MAX,BURO_CREDIT_TYPE_ENC_MIN,BURO_CREDIT_TYPE_ENC_SUM,BURO_CREDIT_TYPE_ENC_MEDIAN,BURO_DPD1_COUNT_MEAN,BURO_DPD1_COUNT_MAX,BURO_DPD1_COUNT_MIN,BURO_DPD1_COUNT_SUM,BURO_DPD1_COUNT_MEDIAN,BURO_DPD2_COUNT_MEAN,BURO_DPD2_COUNT_MAX,BURO_DPD2_COUNT_MIN,BURO_DPD2_COUNT_SUM,BURO_DPD2_COUNT_MEDIAN,BURO_DPD3_COUNT_MEAN,BURO_DPD3_COUNT_MAX,BURO_DPD3_COUNT_MIN,BURO_DPD3_COUNT_SUM,BURO_DPD3_COUNT_MEDIAN,BURO_LAST_STATUS_MEAN,BURO_LAST_STATUS_MAX,BURO_LAST_STATUS_MIN,BURO_LAST_STATUS_SUM,BURO_LAST_STATUS_MEDIAN,BURO_MONTHS_SINCE_LAST_OVERDUE_MEAN,BURO_MONTHS_SINCE_LAST_OVERDUE_MAX,BURO_MONTHS_SINCE_LAST_OVERDUE_MIN,BURO_MONTHS_SINCE_LAST_OVERDUE_SUM,BURO_MONTHS_SINCE_LAST_OVERDUE_MEDIAN,BURO_PCT_CREDIT_LIMIT_USED_MEAN,BURO_PCT_CREDIT_LIMIT_USED_MAX,BURO_PCT_CREDIT_LIMIT_USED_MIN,BURO_PCT_CREDIT_LIMIT_USED_SUM,BURO_PCT_CREDIT_LIMIT_USED_MEDIAN,BURO_PCT_CREDIT_DEBT_MEAN,BURO_PCT_CREDIT_DEBT_MAX,BURO_PCT_CREDIT_DEBT_MIN,BURO_PCT_CREDIT_DEBT_SUM,BURO_PCT_CREDIT_DEBT_MEDIAN,BURO_OVERDUE_RATIO_MEAN,BURO_OVERDUE_RATIO_MAX,BURO_OVERDUE_RATIO_MIN,BURO_OVERDUE_RATIO_SUM,BURO_OVERDUE_RATIO_MEDIAN,BURO_DEBT_TO_CREDIT_MEAN,BURO_DEBT_TO_CREDIT_MAX,BURO_DEBT_TO_CREDIT_MIN,BURO_DEBT_TO_CREDIT_SUM,BURO_DEBT_TO_CREDIT_MEDIAN,BURO_OVERDUE_DEBT_MEAN,BURO_OVERDUE_DEBT_MAX,BURO_OVERDUE_DEBT_MIN,BURO_OVERDUE_DEBT_SUM,BURO_OVERDUE_DEBT_MEDIAN,BURO_ANNUITY_TO_SUM_MEAN,BURO_ANNUITY_TO_SUM_MAX,BURO_ANNUITY_TO_SUM_MIN,BURO_ANNUITY_TO_SUM_SUM,BURO_ANNUITY_TO_SUM_MEDIAN,BURO_ANNUITY_TO_DEBT_MEAN,BURO_ANNUITY_TO_DEBT_MAX,BURO_ANNUITY_TO_DEBT_MIN,BURO_ANNUITY_TO_DEBT_SUM,BURO_ANNUITY_TO_DEBT_MEDIAN,BB_MONTHS_BALANCE_MIN_MIN,BB_MONTHS_BALANCE_MIN_MAX,BB_MONTHS_BALANCE_MAX_MIN,BB_MONTHS_BALANCE_MAX_MAX,BB_MONTHS_BALANCE_SIZE_SUM,BB_MONTHS_BALANCE_SIZE_MAX,BB_STATUS_NUM_MEAN_MEAN,BB_STATUS_NUM_MEAN_MAX,BB_STATUS_NUM_MAX_MAX
0,100002,1,6153272.125,6158909.0,6113835.0,49226177.0,6158905.5,-874.0,-103.0,-1437.0,-6992.0,-1042.5,0.0,0.0,0.0,0.0,0.0,-349.0,780.0,-1072.0,-2094.0,-424.5,-697.5,-36.0,-1185.0,-4185.0,-939.0,0.0,0.0,0.0,0.0,0.0,108131.945625,450000.0,0.0,865055.565,54130.5,49156.2,245781.0,0.0,245781.0,0.0,7997.14125,31988.565,0.0,31988.565,0.0,0.0,0.0,0.0,0.0,0.0,-499.875,-7.0,-1185.0,-3999.0,-402.5,0.0,0.0,0.0,0.0,0.0,0.5,2.0,0.0,4.0,0.0,1.0,1.0,1.0,8.0,1.0,2.5,3.0,2.0,20.0,2.5,6.0,9.0,2.0,42.0,9.0,4.5,6.0,1.0,27.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-30.0,-3.0,-47.0,-210.0,-34.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.136545,0.54618,0.0,0.54618,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-47.0,-3.0,-32.0,0.0,110.0,22.0,0.673295,1.0,2.0
1,100003,0,5885878.5,5885880.0,5885877.0,23543514.0,5885878.5,-1400.75,-606.0,-2586.0,-5603.0,-1205.5,0.0,0.0,0.0,0.0,0.0,-544.5,1216.0,-2434.0,-2178.0,-480.0,-1097.333333,-540.0,-2131.0,-3292.0,-621.0,0.0,0.0,0.0,0.0,0.0,254350.125,810000.0,22248.0,1017400.5,92576.25,0.0,0.0,0.0,0.0,0.0,202500.0,810000.0,0.0,810000.0,0.0,0.0,0.0,0.0,0.0,0.0,-816.0,-43.0,-2131.0,-3264.0,-545.0,6516.0,13500.0,0.0,0.0,2970.0,0.5,2.0,0.0,2.0,0.0,1.0,1.0,1.0,4.0,1.0,2.5,3.0,2.0,10.0,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,1.0,0.0,0.0,0.0,-32.375,-11.0,-52.0,0.0,-30.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029207,0.069444,0.0,0.0,0.012607,0.0,0.0,0.0,0.0,0.0,-58.0,-10.0,0.0,0.0,0.0,47.0,0.179326,0.5,1.0
2,100004,0,6829133.5,6829134.0,6829133.0,13658267.0,6829133.5,-867.0,-408.0,-1326.0,-1734.0,-867.0,0.0,0.0,0.0,0.0,0.0,-488.5,-382.0,-595.0,-977.0,-488.5,-532.5,-382.0,-683.0,-1065.0,-532.5,0.0,0.0,0.0,0.0,0.0,94518.9,94537.8,94500.0,189037.8,94518.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-532.0,-382.0,-682.0,-1064.0,-532.0,6516.0,13500.0,0.0,0.0,2970.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,1.0,0.0,0.0,0.0,-32.375,-11.0,-52.0,0.0,-30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029207,0.069444,0.0,0.0,0.012607,0.0,0.0,0.0,0.0,0.0,-58.0,-10.0,0.0,0.0,0.0,47.0,0.179326,0.5,1.0
3,100006,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,100007,0,5987200.0,5987200.0,5987200.0,5987200.0,5987200.0,-1149.0,-1149.0,-1149.0,-1149.0,-1149.0,0.0,0.0,0.0,0.0,0.0,-783.0,-783.0,-783.0,-783.0,-783.0,-783.0,-783.0,-783.0,-783.0,-783.0,0.0,0.0,0.0,0.0,0.0,146250.0,146250.0,146250.0,146250.0,146250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-783.0,-783.0,-783.0,-783.0,-783.0,6516.0,13500.0,0.0,0.0,2970.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,1.0,0.0,0.0,0.0,-32.375,-11.0,-52.0,0.0,-30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029207,0.069444,0.0,0.0,0.012607,0.0,0.0,0.0,0.0,0.0,-58.0,-10.0,0.0,0.0,0.0,47.0,0.179326,0.5,1.0


In [52]:
bureau_application_train_feature.columns.to_list()

['SK_ID_CURR',
 'TARGET',
 'BURO_SK_ID_BUREAU_MEAN',
 'BURO_SK_ID_BUREAU_MAX',
 'BURO_SK_ID_BUREAU_MIN',
 'BURO_SK_ID_BUREAU_SUM',
 'BURO_SK_ID_BUREAU_MEDIAN',
 'BURO_DAYS_CREDIT_MEAN',
 'BURO_DAYS_CREDIT_MAX',
 'BURO_DAYS_CREDIT_MIN',
 'BURO_DAYS_CREDIT_SUM',
 'BURO_DAYS_CREDIT_MEDIAN',
 'BURO_CREDIT_DAY_OVERDUE_MEAN',
 'BURO_CREDIT_DAY_OVERDUE_MAX',
 'BURO_CREDIT_DAY_OVERDUE_MIN',
 'BURO_CREDIT_DAY_OVERDUE_SUM',
 'BURO_CREDIT_DAY_OVERDUE_MEDIAN',
 'BURO_DAYS_CREDIT_ENDDATE_MEAN',
 'BURO_DAYS_CREDIT_ENDDATE_MAX',
 'BURO_DAYS_CREDIT_ENDDATE_MIN',
 'BURO_DAYS_CREDIT_ENDDATE_SUM',
 'BURO_DAYS_CREDIT_ENDDATE_MEDIAN',
 'BURO_DAYS_ENDDATE_FACT_MEAN',
 'BURO_DAYS_ENDDATE_FACT_MAX',
 'BURO_DAYS_ENDDATE_FACT_MIN',
 'BURO_DAYS_ENDDATE_FACT_SUM',
 'BURO_DAYS_ENDDATE_FACT_MEDIAN',
 'BURO_CNT_CREDIT_PROLONG_MEAN',
 'BURO_CNT_CREDIT_PROLONG_MAX',
 'BURO_CNT_CREDIT_PROLONG_MIN',
 'BURO_CNT_CREDIT_PROLONG_SUM',
 'BURO_CNT_CREDIT_PROLONG_MEDIAN',
 'BURO_AMT_CREDIT_SUM_MEAN',
 'BURO_AMT_CREDIT_SUM_MAX'

In [53]:
bureau_application_test_feature.head()

Unnamed: 0,SK_ID_CURR,BURO_SK_ID_BUREAU_MEAN,BURO_SK_ID_BUREAU_MAX,BURO_SK_ID_BUREAU_MIN,BURO_SK_ID_BUREAU_SUM,BURO_SK_ID_BUREAU_MEDIAN,BURO_DAYS_CREDIT_MEAN,BURO_DAYS_CREDIT_MAX,BURO_DAYS_CREDIT_MIN,BURO_DAYS_CREDIT_SUM,BURO_DAYS_CREDIT_MEDIAN,BURO_CREDIT_DAY_OVERDUE_MEAN,BURO_CREDIT_DAY_OVERDUE_MAX,BURO_CREDIT_DAY_OVERDUE_MIN,BURO_CREDIT_DAY_OVERDUE_SUM,BURO_CREDIT_DAY_OVERDUE_MEDIAN,BURO_DAYS_CREDIT_ENDDATE_MEAN,BURO_DAYS_CREDIT_ENDDATE_MAX,BURO_DAYS_CREDIT_ENDDATE_MIN,BURO_DAYS_CREDIT_ENDDATE_SUM,BURO_DAYS_CREDIT_ENDDATE_MEDIAN,BURO_DAYS_ENDDATE_FACT_MEAN,BURO_DAYS_ENDDATE_FACT_MAX,BURO_DAYS_ENDDATE_FACT_MIN,BURO_DAYS_ENDDATE_FACT_SUM,BURO_DAYS_ENDDATE_FACT_MEDIAN,BURO_CNT_CREDIT_PROLONG_MEAN,BURO_CNT_CREDIT_PROLONG_MAX,BURO_CNT_CREDIT_PROLONG_MIN,BURO_CNT_CREDIT_PROLONG_SUM,BURO_CNT_CREDIT_PROLONG_MEDIAN,BURO_AMT_CREDIT_SUM_MEAN,BURO_AMT_CREDIT_SUM_MAX,BURO_AMT_CREDIT_SUM_MIN,BURO_AMT_CREDIT_SUM_SUM,BURO_AMT_CREDIT_SUM_MEDIAN,BURO_AMT_CREDIT_SUM_DEBT_MEAN,BURO_AMT_CREDIT_SUM_DEBT_MAX,BURO_AMT_CREDIT_SUM_DEBT_MIN,BURO_AMT_CREDIT_SUM_DEBT_SUM,BURO_AMT_CREDIT_SUM_DEBT_MEDIAN,BURO_AMT_CREDIT_SUM_LIMIT_MEAN,BURO_AMT_CREDIT_SUM_LIMIT_MAX,BURO_AMT_CREDIT_SUM_LIMIT_MIN,BURO_AMT_CREDIT_SUM_LIMIT_SUM,BURO_AMT_CREDIT_SUM_LIMIT_MEDIAN,BURO_AMT_CREDIT_SUM_OVERDUE_MEAN,BURO_AMT_CREDIT_SUM_OVERDUE_MAX,BURO_AMT_CREDIT_SUM_OVERDUE_MIN,BURO_AMT_CREDIT_SUM_OVERDUE_SUM,BURO_AMT_CREDIT_SUM_OVERDUE_MEDIAN,BURO_DAYS_CREDIT_UPDATE_MEAN,BURO_DAYS_CREDIT_UPDATE_MAX,BURO_DAYS_CREDIT_UPDATE_MIN,BURO_DAYS_CREDIT_UPDATE_SUM,BURO_DAYS_CREDIT_UPDATE_MEDIAN,BURO_AMT_ANNUITY_MEAN,BURO_AMT_ANNUITY_MAX,BURO_AMT_ANNUITY_MIN,BURO_AMT_ANNUITY_SUM,BURO_AMT_ANNUITY_MEDIAN,BURO_CREDIT_ACTIVE_ENC_MEAN,BURO_CREDIT_ACTIVE_ENC_MAX,BURO_CREDIT_ACTIVE_ENC_MIN,BURO_CREDIT_ACTIVE_ENC_SUM,BURO_CREDIT_ACTIVE_ENC_MEDIAN,BURO_CURRENCY_1_FLAG_MEAN,BURO_CURRENCY_1_FLAG_MAX,BURO_CURRENCY_1_FLAG_MIN,BURO_CURRENCY_1_FLAG_SUM,BURO_CURRENCY_1_FLAG_MEDIAN,BURO_CREDIT_TYPE_ENC_MEAN,BURO_CREDIT_TYPE_ENC_MAX,BURO_CREDIT_TYPE_ENC_MIN,BURO_CREDIT_TYPE_ENC_SUM,BURO_CREDIT_TYPE_ENC_MEDIAN,BURO_DPD1_COUNT_MEAN,BURO_DPD1_COUNT_MAX,BURO_DPD1_COUNT_MIN,BURO_DPD1_COUNT_SUM,BURO_DPD1_COUNT_MEDIAN,BURO_DPD2_COUNT_MEAN,BURO_DPD2_COUNT_MAX,BURO_DPD2_COUNT_MIN,BURO_DPD2_COUNT_SUM,BURO_DPD2_COUNT_MEDIAN,BURO_DPD3_COUNT_MEAN,BURO_DPD3_COUNT_MAX,BURO_DPD3_COUNT_MIN,BURO_DPD3_COUNT_SUM,BURO_DPD3_COUNT_MEDIAN,BURO_LAST_STATUS_MEAN,BURO_LAST_STATUS_MAX,BURO_LAST_STATUS_MIN,BURO_LAST_STATUS_SUM,BURO_LAST_STATUS_MEDIAN,BURO_MONTHS_SINCE_LAST_OVERDUE_MEAN,BURO_MONTHS_SINCE_LAST_OVERDUE_MAX,BURO_MONTHS_SINCE_LAST_OVERDUE_MIN,BURO_MONTHS_SINCE_LAST_OVERDUE_SUM,BURO_MONTHS_SINCE_LAST_OVERDUE_MEDIAN,BURO_PCT_CREDIT_LIMIT_USED_MEAN,BURO_PCT_CREDIT_LIMIT_USED_MAX,BURO_PCT_CREDIT_LIMIT_USED_MIN,BURO_PCT_CREDIT_LIMIT_USED_SUM,BURO_PCT_CREDIT_LIMIT_USED_MEDIAN,BURO_PCT_CREDIT_DEBT_MEAN,BURO_PCT_CREDIT_DEBT_MAX,BURO_PCT_CREDIT_DEBT_MIN,BURO_PCT_CREDIT_DEBT_SUM,BURO_PCT_CREDIT_DEBT_MEDIAN,BURO_OVERDUE_RATIO_MEAN,BURO_OVERDUE_RATIO_MAX,BURO_OVERDUE_RATIO_MIN,BURO_OVERDUE_RATIO_SUM,BURO_OVERDUE_RATIO_MEDIAN,BURO_DEBT_TO_CREDIT_MEAN,BURO_DEBT_TO_CREDIT_MAX,BURO_DEBT_TO_CREDIT_MIN,BURO_DEBT_TO_CREDIT_SUM,BURO_DEBT_TO_CREDIT_MEDIAN,BURO_OVERDUE_DEBT_MEAN,BURO_OVERDUE_DEBT_MAX,BURO_OVERDUE_DEBT_MIN,BURO_OVERDUE_DEBT_SUM,BURO_OVERDUE_DEBT_MEDIAN,BURO_ANNUITY_TO_SUM_MEAN,BURO_ANNUITY_TO_SUM_MAX,BURO_ANNUITY_TO_SUM_MIN,BURO_ANNUITY_TO_SUM_SUM,BURO_ANNUITY_TO_SUM_MEDIAN,BURO_ANNUITY_TO_DEBT_MEAN,BURO_ANNUITY_TO_DEBT_MAX,BURO_ANNUITY_TO_DEBT_MIN,BURO_ANNUITY_TO_DEBT_SUM,BURO_ANNUITY_TO_DEBT_MEDIAN,BB_MONTHS_BALANCE_MIN_MIN,BB_MONTHS_BALANCE_MIN_MAX,BB_MONTHS_BALANCE_MAX_MIN,BB_MONTHS_BALANCE_MAX_MAX,BB_MONTHS_BALANCE_SIZE_SUM,BB_MONTHS_BALANCE_SIZE_MAX,BB_STATUS_NUM_MEAN_MEAN,BB_STATUS_NUM_MEAN_MAX,BB_STATUS_NUM_MAX_MAX
0,100001,5896633.0,5896636.0,5896630.0,41276431.0,5896633.0,-735.0,-49.0,-1572.0,-5145.0,-857.0,0.0,0.0,0.0,0.0,0.0,82.428571,1778.0,-1329.0,577.0,-179.0,-825.5,-544.0,-1328.0,-3302.0,-715.0,0.0,0.0,0.0,0.0,0.0,207623.571429,378000.0,85500.0,1453365.0,168345.0,85240.928571,373239.0,0.0,596686.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-93.142857,-6.0,-155.0,-652.0,-155.0,3545.357143,10822.5,0.0,24817.5,0.0,0.857143,2.0,0.0,6.0,0.0,1.0,1.0,1.0,7.0,1.0,2.0,2.0,2.0,14.0,2.0,6.2,9.0,1.0,31.0,7.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.285714,2.0,0.0,2.0,0.0,-25.0,-1.0,-51.0,-125.0,-28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.282518,0.987405,0.0,1.977625,0.0,0.0,0.0,0.0,0.0,0.0,0.013996,0.055627,0.0,0.09797,0.0,0.051609,0.084915,0.028996,0.154828,0.040918,-51.0,-1.0,0.0,0.0,172.0,52.0,0.229627,0.5,2.0
1,100005,6735201.0,6735202.0,6735200.0,20205603.0,6735201.0,-190.666667,-62.0,-373.0,-572.0,-137.0,0.0,0.0,0.0,0.0,0.0,439.333333,1324.0,-128.0,1318.0,122.0,-123.0,-123.0,-123.0,-123.0,-123.0,0.0,0.0,0.0,0.0,0.0,219042.0,568800.0,29826.0,657126.0,58500.0,189469.5,543087.0,0.0,568408.5,25321.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-54.333333,-11.0,-121.0,-163.0,-31.0,1420.5,4261.5,0.0,4261.5,0.0,1.333333,2.0,0.0,4.0,2.0,1.0,1.0,1.0,3.0,1.0,2.333333,3.0,2.0,7.0,2.0,1.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,1.0,0.0,1.0,0.0,-6.0,0.0,-12.0,-12.0,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.601256,0.954794,0.0,1.803768,0.848974,0.0,0.0,0.0,0.0,0.0,0.047626,0.142879,0.0,0.142879,0.0,0.084148,0.168296,0.0,0.168296,0.084148,-12.0,-2.0,0.0,0.0,21.0,13.0,0.136752,0.333333,1.0
2,100013,5922080.75,5922083.0,5922077.0,23688323.0,5922081.5,-1737.5,-1210.0,-2070.0,-6950.0,-1835.0,0.0,0.0,0.0,0.0,0.0,-1068.0,-567.0,-1707.0,-4272.0,-999.0,-1054.75,-549.0,-1334.0,-4219.0,-1168.0,0.0,0.0,0.0,0.0,0.0,518070.015,1262250.0,26490.06,2072280.06,391770.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-775.5,-4.0,-1334.0,-3102.0,-882.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,4.0,1.0,1.0,2.0,0.0,4.0,1.0,12.0,40.0,2.0,48.0,3.0,2.333333,3.0,2.0,7.0,2.0,0.0,0.0,0.0,0.0,0.0,0.25,1.0,0.0,1.0,0.0,-52.25,-39.0,-66.0,-209.0,-52.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-68.0,-39.0,0.0,0.0,230.0,69.0,0.309947,1.0,2.0
3,100028,6356884.5,6356890.0,6356879.0,76282614.0,6356884.5,-1401.75,-269.0,-2105.0,-16821.0,-1612.0,0.0,0.0,0.0,0.0,0.0,2387.7,30885.0,-1862.0,23877.0,-896.5,-1238.285714,-687.0,-1862.0,-8668.0,-1375.0,0.0,0.0,0.0,0.0,0.0,126739.59,393750.0,0.0,1520875.08,129614.04,18630.45,151015.5,0.0,186304.5,0.0,14484.394286,101390.76,0.0,101390.76,0.0,0.0,0.0,0.0,0.0,0.0,-651.5,-20.0,-1564.0,-7818.0,-683.5,3012.010714,12897.09,0.0,21084.075,0.0,0.833333,2.0,0.0,10.0,0.0,1.0,1.0,1.0,12.0,1.0,2.416667,3.0,2.0,29.0,2.0,16.625,60.0,1.0,133.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.416667,1.0,0.0,5.0,0.0,-38.5,0.0,-69.0,-308.0,-39.0,1.27202,1.27202,1.27202,1.27202,1.27202,0.271977,0.271977,0.271977,0.271977,0.271977,0.0,0.0,0.0,0.0,0.0,0.122267,0.838975,0.0,1.100401,0.0,0.0,0.0,0.0,0.0,0.0,0.018099,0.1,0.0,0.126694,0.0,0.339155,0.467693,0.210618,0.678311,0.339155,-69.0,-8.0,0.0,0.0,560.0,70.0,0.260434,1.0,1.0
4,100038,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [54]:
bureau_application_train_feature.to_csv(r'../data/features/bureau_fe_train.csv', index=False)
bureau_application_test_feature.to_csv(r'../data/features/bureau_fe_test.csv', index=False)