#Features from bureau dataset

Этот блокнот создает признаки из набора данных bureau. Набор данных bureau содержит информацию о прошлых кредитах (активных или закрытых), предоставленных другими финансовыми учреждениями, о которых было сообщено в Бюро кредитных историй.

In [None]:
import numpy as np
import pandas as pd

# -----------------------------------------------------
from google.colab import drive

# -----------------------------------------------------
import zipfile
import time
import sys
import os
import gc

In [None]:
# Сброс ограничений на число столбцов
pd.set_option("display.max_columns", None)

# Сброс ограничений на число строк до 400
pd.set_option("display.max_rows", 400)

In [None]:
# Подключение Google Drive к текущей среде выполнения

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# Функция распаковки содержимого архива в указанную директорию

def extract_zip_file(zip_path, extract_path):
    """
    Распаковка содержимого архива в указанную директорию

    :param zip_path: Путь к архиву
    :param extract_path: Путь к директории, куда нужно распаковать содержимое архива
    """
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(extract_path)

In [None]:
# Распаковка содержимого архива home-credit-default-risk в текущую директорию

zip_file = "/content/drive/My Drive/home-credit-default-risk.zip"
extract_path = "/content/"

extract_zip_file(zip_file, extract_path)

In [None]:
# Распаковка содержимого архива 1 - Application features в текущую директорию

zip_file = "/content/drive/My Drive/1 - Application features.zip"
extract_path = "/content/"

extract_zip_file(zip_file, extract_path)

In [None]:
# Чтение данных 

application = pd.read_csv("/content/1 - Application features/application.csv")

bureau = pd.read_csv("/content/home-credit-default-risk/bureau.csv")
homecredit_columns_description = pd.read_csv("/content/1 - Application features/homecredit_columns_description.csv", encoding="ISO-8859-1")

In [None]:
# Проверка

application.shape

In [None]:
# Описание столбцов в bureau

description_application = homecredit_columns_description[homecredit_columns_description["Table"] == "bureau.csv"]
for index, row in description_application.iterrows():
    print(row["Row"], row["Description"])

bureau.head(10)

SK_ID_CURR ID of loan in our sample - one loan in our sample can have 0,1,2 or more related previous credits in credit bureau 
SK_BUREAU_ID Recoded ID of previous Credit Bureau credit related to our loan (unique coding for each loan application)
CREDIT_ACTIVE Status of the Credit Bureau (CB) reported credits
CREDIT_CURRENCY Recoded currency of the Credit Bureau credit
DAYS_CREDIT How many days before current application did client apply for Credit Bureau credit
CREDIT_DAY_OVERDUE Number of days past due on CB credit at the time of application for related loan in our sample
DAYS_CREDIT_ENDDATE Remaining duration of CB credit (in days) at the time of application in Home Credit
DAYS_ENDDATE_FACT Days since CB credit ended at the time of application in Home Credit (only for closed credit)
AMT_CREDIT_MAX_OVERDUE Maximal amount overdue on the Credit Bureau credit so far (at application date of loan in our sample)
CNT_CREDIT_PROLONG How many times was the Credit Bureau credit prolonged
AMT_CR

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,
5,215354,5714467,Active,currency 1,-273,0,27460.0,,0.0,0,180000.0,71017.38,108982.62,0.0,Credit card,-31,
6,215354,5714468,Active,currency 1,-43,0,79.0,,0.0,0,42103.8,42103.8,0.0,0.0,Consumer credit,-22,
7,162297,5714469,Closed,currency 1,-1896,0,-1684.0,-1710.0,14985.0,0,76878.45,0.0,0.0,0.0,Consumer credit,-1710,
8,162297,5714470,Closed,currency 1,-1146,0,-811.0,-840.0,0.0,0,103007.7,0.0,0.0,0.0,Consumer credit,-840,
9,162297,5714471,Active,currency 1,-1146,0,-484.0,,0.0,0,4500.0,0.0,0.0,0.0,Credit card,-690,


In [None]:
# Создание новых признаков из bureau

# Создание пустых списков для каждого нового списка
nb_past_bureau = []
fr_closed_bureau = []
nb_active_bureau = []
fr_active_bureau = []
fr_sold_bureau = []
fr_bd_bureau = []
days_most_recent_bureau = []
days_oldest_bureau = []
average_time_between_bureau = []
nb_credit_overdue_bureau = []
total_days_credit_overdue = []
fr_days_credit_overdue = []
nb_max_overdue_bureau = []
nb_max_overdue_active_bureau = []
sum_max_overdue_bureau = []
fr_max_overdue_bureau = []
nb_sum_overdue_bureau = []
sum_sum_overdue_bureau = []
fr_sum_overdue_bureau = []
nb_prolong_bureau = []
sum_closed_fx_repaid_bureau = []
weighted_avg_days_closed_fx_bureau = []
weighted_avg_days_active_fx_bureau = []
sum_active_fx_bureau = []
fr_active_fx_remaining_bureau = []
sum_active_fx_remaining_bureau = []
nb_fx_bureau = []
sum_fx_bureau = []
fr_sum_remaining_bureau = []
nb_cc_bureau = []
nb_cc_active_bureau = []
sum_debt_cc_bureau = []
fr_debt_cc_bureau = []
sum_sum_cc_bureau = []
total_debt_bureau_application = []
ratio_income_debt_bureau_application = []

counter = 0
lenght = len(application)
for index, row in application.iterrows():
    counter += 1
    sys.stdout.write('\r'+"Progress:"+str(round(counter/lenght*100,1))+"%")
    df = bureau[bureau["SK_ID_CURR"] == row["SK_ID_CURR"]]
    # Количество прошлых кредитов бюро
    nb_past_bureau.append(len(df))
    if len(df) > 0:
        # Доля закрытых кредитов бюро
        fr_closed_bureau.append(len(df[df["CREDIT_ACTIVE"] == "Closed"])/len(df))
        # Количество активных кредитов бюро
        nb_active_bureau.append(len(df[df["CREDIT_ACTIVE"] == "Active"]))
        # Доля активных кредитов бюро
        fr_active_bureau.append(len(df[df["CREDIT_ACTIVE"] == "Active"])/len(df))
        # Доля проданных кредитов бюро
        fr_sold_bureau.append(len(df[df["CREDIT_ACTIVE"] == "Sold"])/len(df))
        # Доля кредитов бюро безнадежных долгов
        fr_bd_bureau.append(len(df[df["CREDIT_ACTIVE"] == "Bad debt"])/len(df))
        # Дней с момента последней заявки в бюро
        days_most_recent_bureau.append(df["DAYS_CREDIT"].max())
        # Дней с момента первой зарегистрированной заявки через бюро
        days_oldest_bureau.append(df["DAYS_CREDIT"].min())
        # Среднее время между заявками на кредит в бюро
        average_time_between_bureau.append(np.mean(np.diff(df["DAYS_CREDIT"].sort_values())))
        
        # Для всех кредитов с просроченными днями > 0 
        tmp = df[df["CREDIT_DAY_OVERDUE"] > 0]
        nb_credit_overdue_bureau.append(len(tmp))
        if len(df[df["CREDIT_DAY_OVERDUE"] > 0]) > 0:
            # Общее количество дней с просроченными кредитами через бюро
            total_days_credit_overdue.append(df[df["CREDIT_DAY_OVERDUE"] > 0]["CREDIT_DAY_OVERDUE"].sum())
            # Средняя доля времени с просроченным кредитом
            fr_days_credit_overdue.append(df[df["CREDIT_DAY_OVERDUE"] > 0]["CREDIT_DAY_OVERDUE"].sum() / -df[df["CREDIT_DAY_OVERDUE"] > 0]["DAYS_CREDIT"].sum())
        else:
            total_days_credit_overdue.append(0)
            fr_days_credit_overdue.append(0)
        
        # Для всех кредитов с максимальной просроченной задолженностью > 0    
        tmp = df[df["AMT_CREDIT_MAX_OVERDUE"] > 0]
        # Количество кредитов с максимальной просроченной задолженностью
        nb_max_overdue_bureau.append(len(tmp))
        # Количество активных кредитов с максимальной просроченной задолженностью
        nb_max_overdue_active_bureau.append(len(tmp[tmp["CREDIT_ACTIVE"] == "Active"]))
        if len(tmp) > 0:
            # Сумма максимальной просроченной
            sum_max_overdue_bureau.append(tmp["AMT_CREDIT_MAX_OVERDUE"].sum())
            if tmp["AMT_CREDIT_SUM"].sum() > 0:
                # Доля кредита с максимальной просроченной задолженностью
                fr_max_overdue_bureau.append(tmp["AMT_CREDIT_MAX_OVERDUE"].sum()/tmp["AMT_CREDIT_SUM"].sum())
            else:
                fr_max_overdue_bureau.append(np.NaN)
        else:
            sum_max_overdue_bureau.append(0)
            fr_max_overdue_bureau.append(0)
        
        # По всем кредитам с просроченной суммой > 0    
        tmp = df[df["AMT_CREDIT_SUM_OVERDUE"] > 0]
        # Количество кредитов с текущей просроченной задолженностью
        nb_sum_overdue_bureau.append(len(tmp))
        if len(tmp) > 0:
            # Сумма кредитов с текущей просроченной задолженностью
            sum_sum_overdue_bureau.append(tmp["AMT_CREDIT_SUM_OVERDUE"].sum())
            # Доля кредита с текущей просроченной задолженностью
            if tmp["AMT_CREDIT_SUM"].sum():
                fr_sum_overdue_bureau.append(tmp["AMT_CREDIT_SUM_OVERDUE"].sum()/tmp["AMT_CREDIT_SUM"].sum())
            else:
                fr_sum_overdue_bureau.append(np.NaN)
        else:
            sum_sum_overdue_bureau.append(0)
            fr_sum_overdue_bureau.append(0)
        
        # Количество раз, когда кредиты были продлены
        nb_prolong_bureau.append(df[df["CNT_CREDIT_PROLONG"] > 0]["CNT_CREDIT_PROLONG"].sum())
            
        # Для закрытых срочных кредитов
        tmp = df[(df["CREDIT_ACTIVE"] == "Closed") & (df["CREDIT_TYPE"] != "Credit card")]
        if len(tmp) > 0:
            # Общая сумма погашенного срочного кредита
            sum_closed_fx_repaid_bureau.append(tmp["AMT_CREDIT_SUM"].sum())
            # Средневзвешенная длина закрытого срочного кредита
            if tmp["AMT_CREDIT_SUM"].sum() > 0:
                weighted_avg_days_closed_fx_bureau.append((tmp["DAYS_CREDIT_ENDDATE"] - tmp["DAYS_CREDIT"]*tmp["AMT_CREDIT_SUM"]).sum()/tmp["AMT_CREDIT_SUM"].sum())
            else:
                weighted_avg_days_closed_fx_bureau.append(np.NaN)                    
        else:
            sum_closed_fx_repaid_bureau.append(0)
            weighted_avg_days_closed_fx_bureau.append(np.NaN)
            
        # Для активных срочных кредитов
        tmp = df[(df["CREDIT_ACTIVE"] == "Active") & (df["CREDIT_TYPE"] != "Credit card") & (df["DAYS_CREDIT_ENDDATE"] > 0)]
        if len(tmp) > 0:
            # Средневзвешенная длина закрытого срочного кредита
            if tmp["AMT_CREDIT_SUM"].sum() > 0:
                weighted_avg_days_active_fx_bureau.append((tmp["DAYS_CREDIT_ENDDATE"] - tmp["DAYS_CREDIT"]*tmp["AMT_CREDIT_SUM"]).sum()/tmp["AMT_CREDIT_SUM"].sum())
            else:
                weighted_avg_days_active_fx_bureau.append(np.NaN)
            # Общая сумма активного кредита (процентные ставки не учитываются)
            sum_active_fx_bureau.append(tmp["AMT_CREDIT_SUM"].sum())
            # Взвешенная доля оставшейся части кредита (процентные ставки не учитываются)
            if tmp["AMT_CREDIT_SUM"].sum() > 0:
                fr_active_fx_remaining_bureau.append(((tmp["DAYS_CREDIT_ENDDATE"]/(tmp["DAYS_CREDIT_ENDDATE"] - tmp["DAYS_CREDIT"])*tmp["AMT_CREDIT_SUM"]).sum()/tmp["AMT_CREDIT_SUM"].sum()))
            else:
                fr_active_fx_remaining_bureau.append(np.NaN)
            # Оставшаяся сумма кредита (процентные ставки не учитываются)
            sum_active_fx_remaining_bureau.append(fr_active_fx_remaining_bureau[-1] * sum_active_fx_bureau[-1])
        else:
            weighted_avg_days_active_fx_bureau.append(np.NaN)
            sum_active_fx_bureau.append(0)
            fr_active_fx_remaining_bureau.append(0)
            sum_active_fx_remaining_bureau.append(0)
            
        # Для всех срочных кредитов
        tmp = df[df["CREDIT_TYPE"] != "Credit card"]
        # Количество кредитов бюро с фиксированным сроком
        nb_fx_bureau.append(len(tmp))
        if len(tmp) > 0:
            # Общая сумма кредитов бюро с фиксированным сроком
            sum_fx_bureau.append(tmp["AMT_CREDIT_SUM"].sum())
            # Доля кредитов бюро с фиксированным сроком, подлежащих выплате
            if tmp["AMT_CREDIT_SUM"].sum() > 0:
                fr_sum_remaining_bureau.append(sum_active_fx_remaining_bureau[-1]/tmp["AMT_CREDIT_SUM"].sum())
            else:
                fr_sum_remaining_bureau.append(np.NaN)
        else:
            sum_fx_bureau.append(0)
            fr_sum_remaining_bureau.append(0)

        # Для всех кредитов по кредитным картам (возобновляемые кредиты)  
        tmp = df[df["CREDIT_TYPE"] == "Credit card"]
        # Количество кредитов по кредитным картам
        nb_cc_bureau.append(len(tmp))
        # Количество активных кредитов по кредитным картам
        nb_cc_active_bureau.append(len(tmp["CREDIT_ACTIVE"] == "Active"))
        if len(tmp) > 0:
            # Общая задолженность по кредитным картам
            sum_debt_cc_bureau.append(tmp["AMT_CREDIT_SUM_DEBT"].sum())
            # Доля долга по сравнению с доступными кредитами по кредитной карте
            if tmp["AMT_CREDIT_SUM"].sum() > 0:
                fr_debt_cc_bureau.append(tmp["AMT_CREDIT_SUM_DEBT"].sum()/tmp["AMT_CREDIT_SUM"].sum())
            else:
                fr_debt_cc_bureau.append(np.NaN)
            # Сумма, доступная на кредитах кредитной карты 
            sum_sum_cc_bureau.append(tmp["AMT_CREDIT_SUM"].sum())
        else:
            sum_debt_cc_bureau.append(0)
            fr_debt_cc_bureau.append(0)
            sum_sum_cc_bureau.append(0)
            
        # Общая текущая задолженность (включая новую заявку)
        total_debt_bureau_application.append(row["AMT_CREDIT"] + sum_debt_cc_bureau[-1] + sum_active_fx_remaining_bureau[-1])
        # Отношение дохода к общему долгу
        ratio_income_debt_bureau_application.append(row["AMT_INCOME_TOTAL"] / total_debt_bureau_application[-1])
        
    else:
        fr_closed_bureau.append(np.NaN)
        nb_active_bureau.append(0)
        fr_active_bureau.append(np.NaN)
        fr_sold_bureau.append(np.NaN)
        fr_bd_bureau.append(np.NaN)
        days_most_recent_bureau.append(np.NaN)
        days_oldest_bureau.append(np.NaN)
        average_time_between_bureau.append(np.NaN)
        nb_credit_overdue_bureau.append(0)
        total_days_credit_overdue.append(0)
        fr_days_credit_overdue.append(0)
        nb_max_overdue_bureau.append(0)
        nb_max_overdue_active_bureau.append(0)
        sum_max_overdue_bureau.append(0)
        fr_max_overdue_bureau.append(0)
        nb_sum_overdue_bureau.append(0)
        sum_sum_overdue_bureau.append(0)
        fr_sum_overdue_bureau.append(0)
        nb_prolong_bureau.append(0)
        sum_closed_fx_repaid_bureau.append(0)
        weighted_avg_days_closed_fx_bureau.append(np.NaN)
        weighted_avg_days_active_fx_bureau.append(np.NaN)
        sum_active_fx_bureau.append(0)
        fr_active_fx_remaining_bureau.append(0)
        sum_active_fx_remaining_bureau.append(0)
        nb_fx_bureau.append(0)
        sum_fx_bureau.append(0)
        fr_sum_remaining_bureau.append(0)
        nb_cc_bureau.append(0)
        nb_cc_active_bureau.append(0)
        sum_debt_cc_bureau.append(0)
        fr_debt_cc_bureau.append(0)
        sum_sum_cc_bureau.append(0)
        total_debt_bureau_application.append(row["AMT_CREDIT"])
        ratio_income_debt_bureau_application.append(row["AMT_INCOME_TOTAL"]/row["AMT_CREDIT"])


Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%Progress:0.0%

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Progress:100.0%

In [None]:
# Функция для отображения описания выбранного признака

def add_description(table, row, description):
    list_description = [np.NaN, table, row, description, np.NaN]
    homecredit_columns_description.loc[len(homecredit_columns_description)] = list_description
    return homecredit_columns_description

In [None]:
# Создание новых функций из bureau

application["nb_past_bureau"] = nb_past_bureau
add_description("bureau.csv", "nb_past_bureau", "Number of past bureau credits")

application["fr_closed_bureau"] = fr_closed_bureau
add_description("bureau.csv", "fr_closed_bureau", "Fraction of closed bureau credits")

application["nb_active_bureau"] = nb_active_bureau
add_description("bureau.csv", "nb_active_bureau", "Number of active bureau credits")

application["fr_active_bureau"] = fr_active_bureau
add_description("bureau.csv", "fr_active_bureau", "Fraction of active bureau credits")

application["fr_sold_bureau"] = fr_sold_bureau
add_description("bureau.csv", "fr_sold_bureau", "Fraction of sold bureau credits")

application["fr_bd_bureau"] = fr_bd_bureau
add_description("bureau.csv", "fr_bd_bureau", "Fraction of bad debt bureau credits")

application["days_most_recent_bureau"] = days_most_recent_bureau
add_description("bureau.csv", "days_most_recent_bureau", "Days since most recent credit application")

application["days_oldest_bureau"] = days_oldest_bureau
add_description("bureau.csv", "days_oldest_bureau", "Days since oldest credit application")

application["average_time_between_bureau"] = average_time_between_bureau
add_description("bureau.csv", "average_time_between_bureau", "Average time between credit applications")

application["nb_credit_overdue_bureau"] = nb_credit_overdue_bureau
add_description("bureau.csv", "nb_credit_overdue_bureau", "Number of overdue credits (days > 0)")

application["total_days_credit_overdue"] = total_days_credit_overdue
add_description("bureau.csv", "total_days_credit_overdue", "Total days overdue")

application["fr_days_credit_overdue"] = fr_days_credit_overdue
add_description("bureau.csv", "fr_days_credit_overdue", "Fraction of credit lenght overdue")

application["nb_max_overdue_bureau"] = nb_max_overdue_bureau
add_description("bureau.csv", "nb_max_overdue_bureau", "Number of credits with max overdue")

application["nb_max_overdue_active_bureau"] = nb_max_overdue_active_bureau
add_description("bureau.csv", "nb_max_overdue_active_bureau", "Number of active credits with max overdue")

application["sum_max_overdue_bureau"] = sum_max_overdue_bureau
add_description("bureau.csv", "sum_max_overdue_bureau", "Sum of max overdue")

application["fr_max_overdue_bureau"] = fr_max_overdue_bureau
add_description("bureau.csv", "fr_max_overdue_bureau", "Fration of credit with max overdue")

application["nb_sum_overdue_bureau"] = nb_sum_overdue_bureau
add_description("bureau.csv", "nb_sum_overdue_bureau", "Number of credits with current overdue")

application["sum_sum_overdue_bureau"] = sum_sum_overdue_bureau
add_description("bureau.csv", "sum_sum_overdue_bureau", "Sum of credits with current overdue")

application["fr_sum_overdue_bureau"] = fr_sum_overdue_bureau
add_description("bureau.csv", "fr_sum_overdue_bureau", "Fration of credit with current overdue")

application["nb_prolong_bureau"] = nb_prolong_bureau
add_description("bureau.csv", "nb_prolong_bureau", "Number of time credits have been prolonged")

application["sum_closed_fx_repaid_bureau"] = sum_closed_fx_repaid_bureau
add_description("bureau.csv", "sum_closed_fx_repaid_bureau", "Sum of closed fixed term credits")

application["weighted_avg_days_closed_fx_bureau"] = weighted_avg_days_closed_fx_bureau
add_description("bureau.csv", "weighted_avg_days_closed_fx_bureau", "Average lenght of closed fixed term credits")

application["weighted_avg_days_active_fx_bureau"] = weighted_avg_days_active_fx_bureau
add_description("bureau.csv", "weighted_avg_days_active_fx_bureau", "Average lenght of active fixed term credits")

application["sum_active_fx_bureau"] = sum_active_fx_bureau
add_description("bureau.csv", "sum_active_fx_bureau", "Sum of active fixed term credits")

application["fr_active_fx_remaining_bureau"] = fr_active_fx_remaining_bureau
add_description("bureau.csv", "fr_active_fx_remaining_bureau", "Fraction remaining on active fixed term credits")

application["sum_active_fx_remaining_bureau"] = sum_active_fx_remaining_bureau
add_description("bureau.csv", "sum_active_fx_remaining_bureau", "Sum remaining on active fixed term credits")

application["nb_fx_bureau"] = nb_fx_bureau
add_description("bureau.csv", "nb_fx_bureau", "Number of fixed term credits")

application["sum_fx_bureau"] = sum_fx_bureau
add_description("bureau.csv", "sum_fx_bureau", "Sum of fixed term credits")

application["fr_sum_remaining_bureau"] = fr_sum_remaining_bureau
add_description("bureau.csv", "fr_sum_remaining_bureau", "Fraction of the fixed term bureau credits remaining to be paid")

application["nb_cc_bureau"] = nb_cc_bureau
add_description("bureau.csv", "nb_cc_bureau", "Number of credit cards credits")

application["nb_cc_active_bureau"] = nb_cc_active_bureau
add_description("bureau.csv", "nb_cc_active_bureau", "Number of active credit cards credits")

application["sum_debt_cc_bureau"] = sum_debt_cc_bureau
add_description("bureau.csv", "sum_debt_cc_bureau", "Total debt on credit cards credits")

application["fr_debt_cc_bureau"] = fr_debt_cc_bureau
add_description("bureau.csv", "fr_debt_cc_bureau", "Fraction of debt over total available on credit cards credits")

application["sum_sum_cc_bureau"] = sum_sum_cc_bureau
add_description("bureau.csv", "sum_sum_cc_bureau", "Total available on credit cards credits")

application["total_debt_bureau_application"] = total_debt_bureau_application
add_description("bureau.csv", "total_debt_bureau_application", "Total debt including active credits and new application")

application["ratio_income_debt_bureau_application"] = ratio_income_debt_bureau_application
add_description("bureau.csv", "ratio_income_debt_bureau_application", "Ratio of income over total debt");

In [None]:
# Проверка

application.shape

In [None]:
# Сохранение application

application.to_csv("application.csv", index=False)

In [None]:
# Сохранение homecredit_columns_description

homecredit_columns_description.to_csv("homecredit_columns_description.csv", index=False)