In [1]:
import sys
from pathlib import Path
import subprocess
import os
import gc
from glob import glob
from datetime import datetime
import json
from os import listdir

import polars as pl
import numpy as np

from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings('ignore')

pl.Config.set_fmt_float("full")
pl.Config.set_float_precision(4)

RAND = 10

In [2]:
TRAIN_DIR = f"parquet_files/train"
TEST_DIR = f"parquet_files/test"

# Overview

Данные взяты из соревания на Kaggle. Основная информация по данным представлена здесь https://www.kaggle.com/competitions/home-credit-credit-risk-model-stability/data

Цель - предсказать, какие клиенты с большей вероятностью не смогут выплатить свои кредиты. Оценка должна быть стабильна с течением времени

Основные колонки:
- case_id -  уникальный идентификатор для каждого кредитного кейса. Этот идентификатор понадобится вам для объединения соответствующих таблиц с базовой таблицей.
- date_decision -  дата, когда было принято решение об одобрении кредита.
- WEEK_NUM - номер недели, используемый для агрегирования. Будет использоваться в оценке стабильности по Джини
- **target - целевое значение, определяемое по истечении определенного периода времени в зависимости от того, допустил ли клиент дефолт по конкретному кредитному делу (займу).**
- num_group1 - столбец индексации, используемый для исторических записей case_id как в таблицах depth=1, так и в таблицах depth=2..
- num_group2 - второй столбец индексации для исторических записей case_id в таблицах depth=2. Порядок следования num_group 1 и num_group2 важен и будет уточнен в определениях объектов.

Определения всех остальных столбцов можно найти в файле feature_definitions.csv. Для таблиц с глубиной=0 (depth=0) записи можно напрямую использовать в качестве фич. Однако для таблиц с глубиной > 0 (depth=1,2) потребуется использовать функции агрегирования, которые будут объединять исторические записи, связанные с каждым case_id, в единый объект. В случае, если num_group 1 или num_group2 обозначает индекс пользователя (это ясно из определений фичи), нулевой индекс имеет особое значение. Когда num_groupN=0, это заявитель (лицо, подавшее заявку на получение кредита).

Для загрузки и предобработки данных будем использовать polars, т.к. файлы большие и данных много

# Files research

Для начала посмотрим из каких файлов и данных состоит наш датасет

In [3]:
feature_definitions = pl.read_csv("feature_definitions.csv")
feature_definitions = dict(feature_definitions.iter_rows())

In [7]:
shapes, nan_total_count = [], []

file_names = listdir(TRAIN_DIR)
for file_name in tqdm(file_names):
    df = pl.read_parquet(f"{TRAIN_DIR}/{file_name}") 
    shapes.append(df.shape)
    nan_total_count.append(df.null_count().to_pandas().sum().sum())
    del df

train_disk_usage = pd.DataFrame()
train_disk_usage['files'] = file_names
train_disk_usage[['height', 'width']] = shapes
train_disk_usage['null_count'] = nan_total_count
train_disk_usage['isna_%'] = train_disk_usage.null_count / np.prod(shapes, 1) 
train_disk_usage

  0%|          | 0/32 [00:00<?, ?it/s]

Unnamed: 0,files,height,width,null_count,isna_%
0,train_applprev_1_0.parquet,3887684,41,49311059,0.3094
1,train_applprev_1_1.parquet,2638295,41,34503185,0.319
2,train_applprev_2.parquet,14075487,6,16236709,0.1923
3,train_base.parquet,1526659,5,0,0.0
4,train_credit_bureau_a_1_0.parquet,4108212,79,244050042,0.752
5,train_credit_bureau_a_1_1.parquet,6009192,79,308479374,0.6498
6,train_credit_bureau_a_1_2.parquet,3743810,79,187095066,0.6326
7,train_credit_bureau_a_1_3.parquet,2079323,79,101862954,0.6201
8,train_credit_bureau_a_2_0.parquet,5296031,19,37184092,0.3695
9,train_credit_bureau_a_2_1.parquet,7861809,19,53702386,0.3595


In [21]:
def print_sample_info(file_dir: str,
                      file_name: str,
                      columns_definitions: dict,
                      n_rows: int = 5) -> None:
    """
    Load data from file and print info.
    """

    df = pl.read_parquet(f"{file_dir}/{file_name}")
    display(df[:n_rows])
    display(df.describe())
    for column in df.columns:
        if column in columns_definitions.keys():
            print(f"{column}: {columns_definitions[column]}")
    return df

## Base file

In [15]:
train_base = print_sample_info(TRAIN_DIR, "train_base.parquet",
                               feature_definitions)

case_id,date_decision,MONTH,WEEK_NUM,target
i64,str,i64,i64,i64
0,"""2019-01-03""",201901,0,0
1,"""2019-01-03""",201901,0,0
2,"""2019-01-04""",201901,0,0
3,"""2019-01-03""",201901,0,0
4,"""2019-01-04""",201901,0,1


statistic,case_id,date_decision,MONTH,WEEK_NUM,target
str,f64,str,f64,f64,f64
"""count""",1526659.0,"""1526659""",1526659.0,1526659.0,1526659.0
"""null_count""",0.0,"""0""",0.0,0.0,0.0
"""mean""",1286076.5717,,201936.288,40.769,0.0314
"""std""",718946.5923,,44.736,23.798,0.1745
"""min""",0.0,"""2019-01-01""",201901.0,0.0,0.0
"""25%""",766198.0,,201906.0,23.0,0.0
"""50%""",1357358.0,,201910.0,40.0,0.0
"""75%""",1739023.0,,202001.0,55.0,0.0
"""max""",2703454.0,"""2020-10-05""",202010.0,91.0,1.0


In [16]:
train_base["target"].value_counts()

target,count
i64,u32
1,47994
0,1478665


- Пропусков данных в основном файле нет
- Выборка классов несбалансированная

## static_0

Properties: depth=0, internal data source

In [18]:
train_static_0_0 = print_sample_info(TRAIN_DIR, "train_static_0_0.parquet",
                                     feature_definitions)

case_id,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,applications30d_658L,applicationscnt_1086L,applicationscnt_464L,applicationscnt_629L,applicationscnt_867L,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,avgdbdtollast24m_4525197P,avgdpdtolclosure24_3658938P,avginstallast24m_3658937A,avglnamtstart24m_4525187A,avgmaxdpdlast9m_3716943P,avgoutstandbalancel6m_4187114A,avgpmtlast12m_4525200A,bankacctype_710L,cardtype_51L,clientscnt12m_3712952L,clientscnt3m_3712950L,clientscnt6m_3712949L,clientscnt_100L,clientscnt_1022L,clientscnt_1071L,clientscnt_1130L,clientscnt_136L,clientscnt_157L,clientscnt_257L,clientscnt_304L,clientscnt_360L,clientscnt_493L,clientscnt_533L,clientscnt_887L,…,numinstpaidearlyest_4493214L,numinstpaidlastcontr_4325080L,numinstpaidlate1d_3546852L,numinstregularpaid_973L,numinstregularpaidest_4493210L,numinsttopaygr_769L,numinsttopaygrest_4493213L,numinstunpaidmax_3546851L,numinstunpaidmaxest_4493212L,numnotactivated_1143L,numpmtchanneldd_318L,numrejects9m_859L,opencred_647L,paytype1st_925L,paytype_783L,payvacationpostpone_4187118D,pctinstlsallpaidearl3d_427L,pctinstlsallpaidlat10d_839L,pctinstlsallpaidlate1d_3546856L,pctinstlsallpaidlate4d_3546849L,pctinstlsallpaidlate6d_3546844L,pmtnum_254L,posfpd10lastmonth_333P,posfpd30lastmonth_3976960P,posfstqpd30lastmonth_3976962P,previouscontdistrict_112M,price_1097A,sellerplacecnt_915L,sellerplacescnt_216L,sumoutstandtotal_3546847A,sumoutstandtotalest_4493215A,totaldebt_9A,totalsettled_863A,totinstallast1m_4525188A,twobodfilling_608L,typesuite_864L,validfrom_1069D
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,bool,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str
0,,,1917.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,,"""OTHER""","""OTHER""",,,,,,,24.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""",,
1,,,3134.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,,"""OTHER""","""OTHER""",,,,,,,18.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""",,
2,,,4937.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,False,"""OTHER""","""OTHER""",,,,,,,36.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""","""AL""",
3,,,4643.6,0.0,0.0,1.0,0.0,2.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,1.0,False,"""OTHER""","""OTHER""",,,,,,,12.0,0.0,0.0,,"""a55475b1""",,1.0,1.0,,,0.0,0.0,,"""BO""","""AL""",
4,,,3390.2,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,False,"""OTHER""","""OTHER""",,,,,,,24.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""","""AL""",


statistic,case_id,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,applications30d_658L,applicationscnt_1086L,applicationscnt_464L,applicationscnt_629L,applicationscnt_867L,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,avgdbdtollast24m_4525197P,avgdpdtolclosure24_3658938P,avginstallast24m_3658937A,avglnamtstart24m_4525187A,avgmaxdpdlast9m_3716943P,avgoutstandbalancel6m_4187114A,avgpmtlast12m_4525200A,bankacctype_710L,cardtype_51L,clientscnt12m_3712952L,clientscnt3m_3712950L,clientscnt6m_3712949L,clientscnt_100L,clientscnt_1022L,clientscnt_1071L,clientscnt_1130L,clientscnt_136L,clientscnt_157L,clientscnt_257L,clientscnt_304L,clientscnt_360L,clientscnt_493L,clientscnt_533L,…,numinstpaidearlyest_4493214L,numinstpaidlastcontr_4325080L,numinstpaidlate1d_3546852L,numinstregularpaid_973L,numinstregularpaidest_4493210L,numinsttopaygr_769L,numinsttopaygrest_4493213L,numinstunpaidmax_3546851L,numinstunpaidmaxest_4493212L,numnotactivated_1143L,numpmtchanneldd_318L,numrejects9m_859L,opencred_647L,paytype1st_925L,paytype_783L,payvacationpostpone_4187118D,pctinstlsallpaidearl3d_427L,pctinstlsallpaidlat10d_839L,pctinstlsallpaidlate1d_3546856L,pctinstlsallpaidlate4d_3546849L,pctinstlsallpaidlate6d_3546844L,pmtnum_254L,posfpd10lastmonth_333P,posfpd30lastmonth_3976960P,posfstqpd30lastmonth_3976962P,previouscontdistrict_112M,price_1097A,sellerplacecnt_915L,sellerplacescnt_216L,sumoutstandtotal_3546847A,sumoutstandtotalest_4493215A,totaldebt_9A,totalsettled_863A,totinstallast1m_4525188A,twobodfilling_608L,typesuite_864L,validfrom_1069D
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str
"""count""",1003757.0,707071.0,574903.0,1003757.0,1003755.0,1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,577626.0,345754.0,218032.0,674485.0,570036.0,69807.0,488464.0,404763.0,196447.0,"""304690""","""125784""",1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,397.0,1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,1003757.0,…,295381.0,466841.0,683434.0,680415.0,295381.0,683433.0,295381.0,683433.0,295381.0,1003757.0,1003757.0,1003757.0,782997.0,"""1002696""","""1002696""","""1467""",679714.0,677866.0,679714.0,678977.0,678791.0,975103.0,984565.0,939963.0,911303.0,"""1003757""",869385.0,1003757.0,1003757.0,687938.0,295381.0,1003755.0,1003755.0,138692.0,"""1003712""","""288520""","""119405"""
"""null_count""",0.0,296686.0,428854.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,426131.0,658003.0,785725.0,329272.0,433721.0,933950.0,515293.0,598994.0,807310.0,"""699067""","""877973""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,1003360.0,0.0,0.0,0.0,0.0,0.0,0.0,…,708376.0,536916.0,320323.0,323342.0,708376.0,320324.0,708376.0,320324.0,708376.0,0.0,0.0,0.0,220760.0,"""1061""","""1061""","""1002290""",324043.0,325891.0,324043.0,324780.0,324966.0,28654.0,19192.0,63794.0,92454.0,"""0""",134372.0,0.0,0.0,315819.0,708376.0,2.0,2.0,865065.0,"""45""","""715237""","""884352"""
"""mean""",1216924.0563,0.0544,50692.5451,3875.5976,1348.4987,0.0,0.1262,0.42,1.2389,0.3164,2.5469,29.6159,29.9409,31.9704,44.5292,5117.127,43699.3874,0.7226,44570.3016,5935.0656,,,0.032,0.0148,0.0215,0.0541,0.0937,0.0364,0.0343,0.0529,0.0886,0.0033,0.0812,0.0036,0.012,0.0756,…,14.9711,7.7613,5.1938,26.7122,30.3953,5.9261,5.6273,5.3919,5.1116,0.018,0.0204,0.3215,0.043,,,,0.6038,0.0793,0.1775,0.1134,0.097,17.0641,0.0161,0.0077,0.0289,,33339.9415,0.1538,1.4497,26465.0858,26060.5744,18184.0555,79276.3788,10576.1926,,,
"""std""",696462.4416,8.744,65457.9386,2920.2372,2687.3415,0.0093,0.4458,2.6835,10.3164,2.6481,3.7417,316.8409,363.7942,328.8513,315.5864,6142.6886,43990.7011,4.6399,61632.9498,8646.086,,,0.2772,0.2281,0.2481,0.2884,0.4184,0.1956,0.2453,0.3549,0.5949,0.0608,0.8385,0.0631,0.6568,0.2827,…,20.4929,5.9844,8.3244,29.641,31.0362,10.0501,9.8885,8.8459,8.6761,0.1401,0.1474,1.0312,,,,,0.3587,0.1624,0.2301,0.1916,0.1784,9.5895,0.1257,0.0876,0.1676,,33206.1021,0.4338,1.86,55719.2095,57039.3069,47798.8946,125684.1403,16354.4178,,,
"""min""",0.0,0.0,0.0,83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1176.0,-908.0,-1176.0,0.0,0.0,0.0,0.0,-7588198.5,0.0,"""CA""","""INSTANT""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""","""2018-03-28""",0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,"""P103_91_133""",0.0,0.0,0.0,-2405.0,-2405.0,0.0,0.0,0.222,"""BO""","""AL""","""2018-09-22"""
"""25%""",725987.0,0.0,6453.0,1895.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-11.0,-11.0,-11.0,0.0,2445.0,15646.2,0.0,8864.466,2451.4001,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,2.0,3.0,0.0,7.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.3462,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,,13400.0,0.0,0.0,0.0,0.0,0.0,0.0,3142.4001,,,
"""50%""",1311700.0,0.0,26937.0,3000.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-5.0,-4.0,-5.0,0.0,3894.0,27978.0,0.0,22685.715,4114.8003,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,8.0,6.0,2.0,16.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.625,0.0,0.0833,0.0104,0.0,12.0,0.0,0.0,0.0,,24478.0,0.0,1.0,0.0,0.0,0.0,29780.0,6008.6,,,
"""75%""",1562639.0,0.0,68961.2,4989.8003,1866.6,0.0,0.0,0.0,0.0,0.0,3.0,-1.0,-1.0,-1.0,1.0,6209.6,54445.6,0.0,53911.402,6977.2,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,19.0,12.0,7.0,35.0,40.0,9.0,8.0,8.0,8.0,0.0,0.0,0.0,,,,,0.8571,0.0833,0.2766,0.1538,0.1177,24.0,0.0,0.0,0.0,,43956.0,0.0,2.0,27183.0,25700.916,11967.8,101874.11,11627.2,,,
"""max""",2651092.0,3676.0,992476.5,106007.0,85620.805,5.0,25.0,443.0,247.0,77.0,97.0,4467.0,4467.0,4467.0,4467.0,400000.0,513520.0,236.0,1131135.9,391795.22,"""CA""","""PERSONALIZED""",47.0,47.0,47.0,95.0,102.0,11.0,31.0,4.0,241.0,13.0,345.0,8.0,329.0,8.0,…,300.0,74.0,137.0,356.0,341.0,141.0,99.0,67.0,60.0,4.0,4.0,61.0,1.0,"""OTHER""","""OTHER""","""2019-12-28""",23.0,1.1111,1.0,2.0,1.0,60.0,1.0,1.0,1.0,"""a55475b1""",761867.44,8.0,33.0,1210629.1,1085048.1,1210629.1,7988198.5,794899.2,"""FO""","""AL""","""2019-12-30"""


actualdpdtolerance_344P: DPD of client with tolerance.
amtinstpaidbefduel24m_4187115A: Number of instalments paid before due date in the last 24 months.
annuity_780A: Monthly annuity amount.
annuitynextmonth_57A: Next month's amount of annuity.
applicationcnt_361L: Number of applications associated with the same email address as the client.
applications30d_658L: Number of applications made by the client in the last 30 days.
applicationscnt_1086L: Number of applications associated with the same phone number.
applicationscnt_464L: Number of applications made in the last 30 days by other clients with the same employer as the applicant.
applicationscnt_629L: Number of applications with the same employer in the last 7 days.
applicationscnt_867L: Number of applications associated with the same mobile phone.
avgdbddpdlast24m_3658932P: Average days past or before due of payment during the last 24 months.
avgdbddpdlast3m_4187120P: Average days past or before due of payment during the last 3 mon

Файл содержит внутреннюю банковскую и статистическую информацию по каждому case_id, например:
- Средний размер платежей, выплаченных клиентом за последние 24 месяца 
- Сумма ежемесячной ренты
- Тип банковского счета заявителя
- Тип кредитной карты
- Сумма кредита или лимит кредитной карты

## static_cb_0
Properties: depth=0, external data source

In [19]:
train_static_cb_0 = print_sample_info(TRAIN_DIR, "train_static_cb_0.parquet",
                                      feature_definitions)

case_id,assignmentdate_238D,assignmentdate_4527235D,assignmentdate_4955616D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,dateofbirth_342D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,for3years_128L,for3years_504L,for3years_584L,formonth_118L,formonth_206L,formonth_535L,forquarter_1017L,forquarter_462L,forquarter_634L,fortoday_1092L,forweek_1077L,forweek_528L,forweek_601L,foryear_618L,foryear_818L,foryear_850L,fourthquarter_440L,maritalst_385M,maritalst_893M,numberofqueries_373L,pmtaverage_3A,pmtaverage_4527227A,pmtaverage_4955615A,pmtcount_4527229L,pmtcount_4955617L,pmtcount_693L,pmtscount_423L,pmtssum_45A,requesttype_4525192L,responsedate_1012D,responsedate_4527233D,responsedate_4917613D,riskassesment_302T,riskassesment_940T,secondquarter_766L,thirdquarter_1082L
i64,str,str,str,str,f64,str,str,f64,f64,f64,f64,f64,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,str,str,f64,f64,f64
357,,,,"""1988-04-01""",,,,,,,,,"""a55475b1""","""a55475b1""","""a55475b1""",,,,,,,,,,,,,,,,,,,"""a55475b1""","""a55475b1""",,,,,,,,6.0,6301.4,,"""2019-01-25""",,,,,,
381,,,,"""1973-11-01""",,,,,,,,,"""a55475b1""","""a55475b1""","""a55475b1""",,,,,,,,,,,,,,,,,,,"""a55475b1""","""a55475b1""",,,,,,,,6.0,4019.6,,"""2019-01-25""",,,,,,
388,,,,"""1989-04-01""",,"""1989-04-01""",,6.0,8.0,2.0,10.0,4.0,"""a55475b1""","""a55475b1""","""a55475b1""",2.0,,,,,,,,,,,,,,,,,6.0,"""a55475b1""","""a55475b1""",10.0,,,,,,,6.0,14548.0,,"""2019-01-28""",,,,,3.0,5.0
405,,,,"""1974-03-01""",,"""1974-03-01""",,0.0,0.0,0.0,1.0,0.0,"""a55475b1""","""a55475b1""","""a55475b1""",0.0,,,,,,,,,,,,,,,,,4.0,"""a55475b1""","""a55475b1""",1.0,,,,,,,6.0,10498.24,,"""2019-01-21""",,,,,2.0,0.0
409,,,,"""1993-06-01""",,"""1993-06-01""",,2.0,3.0,0.0,3.0,1.0,"""a55475b1""","""717ddd49""","""a55475b1""",4.0,,,,,,,,,,,,,,,,,1.0,"""a7fcb6e5""","""a55475b1""",3.0,,,,,,,7.0,6344.8804,,"""2019-01-21""",,,,,0.0,4.0


statistic,case_id,assignmentdate_238D,assignmentdate_4527235D,assignmentdate_4955616D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,dateofbirth_342D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,for3years_128L,for3years_504L,for3years_584L,formonth_118L,formonth_206L,formonth_535L,forquarter_1017L,forquarter_462L,forquarter_634L,fortoday_1092L,forweek_1077L,forweek_528L,forweek_601L,foryear_618L,foryear_818L,foryear_850L,fourthquarter_440L,maritalst_385M,maritalst_893M,numberofqueries_373L,pmtaverage_3A,pmtaverage_4527227A,pmtaverage_4955615A,pmtcount_4527229L,pmtcount_4955617L,pmtcount_693L,pmtscount_423L,pmtssum_45A,requesttype_4525192L,responsedate_1012D,responsedate_4527233D,responsedate_4917613D,riskassesment_302T,riskassesment_940T,secondquarter_766L,thirdquarter_1082L
str,f64,str,str,str,str,f64,str,str,f64,f64,f64,f64,f64,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,str,str,f64,f64,f64
"""count""",1500476.0,"""136996""","""114978""","""71633""","""607871""",157329.0,"""1385691""","""36500""",1385691.0,1385691.0,1385691.0,1385691.0,1385691.0,"""1500476""","""1500476""","""1500476""",1385691.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,36514.0,1385691.0,"""1500476""","""1500476""",1385691.0,143589.0,114978.0,71845.0,114978.0,71845.0,146406.0,572638.0,572638.0,"""673264""","""720000""","""660327""","""224912""","""53559""",53560.0,1385691.0,1385691.0
"""null_count""",0.0,"""1363480""","""1385498""","""1428843""","""892605""",1343147.0,"""114785""","""1463976""",114785.0,114785.0,114785.0,114785.0,114785.0,"""0""","""0""","""0""",114785.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,1463962.0,114785.0,"""0""","""0""",114785.0,1356887.0,1385498.0,1428631.0,1385498.0,1428631.0,1354070.0,927838.0,927838.0,"""827212""","""780476""","""840149""","""1275564""","""1446917""",1446916.0,114785.0,114785.0
"""mean""",1284031.7228,,,,,641604.4177,,,1.6077,2.3887,0.5177,4.7771,1.2114,,,,2.8606,0.0001,4.3823,0.0077,0.0,0.0004,0.2368,0.001,0.0,0.6139,0.0369,0.0001,0.0901,0.0,0.0001,0.0021,2.4207,2.8512,,,4.7771,9303.1717,10033.5561,17651.7325,6.598,13.0611,5.715,5.8393,13199.936,,,,,,0.226,2.6885,2.9183
"""std""",716088.1235,,,,,980327.2971,,,2.083,2.8911,0.8992,5.1689,1.6559,,,,3.611,0.0091,5.8155,0.0911,0.0,0.0189,0.5355,0.0335,0.0,1.15,0.1892,0.0074,0.2913,0.0,0.0074,0.049,3.5525,3.4317,,,5.1689,5562.387,5455.8436,6871.6423,2.189,1.8552,1.7581,4.1483,18117.2183,,,,,,0.9762,3.3245,3.4239
"""min""",357.0,"""1974-05-30""","""2019-09-13""","""1984-12-05""","""1943-03-01""",0.0,"""1900-01-01""","""1915-01-01""",0.0,0.0,0.0,0.0,0.0,"""2fc785b2""","""39a0853f""","""6b2ae0fa""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""3439d993""","""1a19667c""",0.0,0.0,4.2,4.4,1.0,1.0,0.0,0.0,0.0,"""DEDUCTION_6""","""2019-01-02""","""2019-09-13""","""2020-03-26""","""1% - 1%""",-3.6704,0.0,0.0
"""25%""",768509.0,,,,,78531.95,,,0.0,0.0,0.0,1.0,0.0,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0,6590.6,7192.0,13664.601,6.0,12.0,6.0,3.0,3156.4001,,,,,,-0.228,0.0,0.0
"""50%""",1361879.0,,,,,307282.4,,,1.0,2.0,0.0,3.0,1.0,,,,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,,,3.0,7305.9,7553.0,15765.2,6.0,14.0,6.0,6.0,8392.0,,,,,,0.3718,2.0,2.0
"""75%""",1737010.0,,,,,802114.08,,,2.0,3.0,1.0,7.0,2.0,,,,4.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,,,7.0,13023.9,13464.4,21840.0,6.0,14.0,6.0,7.0,16992.0,,,,,,0.9717,4.0,4.0
"""max""",2703454.0,"""2019-10-22""","""2020-10-19""","""2020-09-22""","""1998-09-01""",31296759.11,"""2020-01-01""","""2015-09-01""",109.0,110.0,22.0,115.0,41.0,"""a55475b1""","""c8e1a1d0""","""c8e1a1d0""",76.0,1.0,57.0,2.0,0.0,1.0,11.0,2.0,0.0,19.0,2.0,1.0,4.0,0.0,1.0,2.0,41.0,66.0,"""ecd83604""","""ecd83604""",115.0,145257.4,205848.61,99085.4,15.0,16.0,66.0,121.0,476843.4,"""SOCIAL_6""","""2019-10-22""","""2020-10-19""","""2020-10-19""","""8% - 11%""",2.1191,109.0,62.0


assignmentdate_238D: Tax authority data - date of assignment.
assignmentdate_4527235D: Tax authority data - Date of assignment.
assignmentdate_4955616D: Tax authority assignment date.
birthdate_574D: Client's date of birth (credit bureau data).
contractssum_5085716L: Total sum of values of contracts retrieved from external credit bureau.
dateofbirth_337D: Client's date of birth.
dateofbirth_342D: Client's date of birth.
days120_123L: Number of credit bureau queries for the last 120 days.
days180_256L: Number of credit bureau queries for last 180 days.
days30_165L: Number of credit bureau queries for the last 30 days.
days360_512L: Number of Credit Bureau queries for last 360 days.
days90_310L: Number of credit bureau queries for the last 90 days.
description_5085714M: Categorization of clients by credit bureau.
education_1103M: Level of education of the client provided by external source.
education_88M: Education level of the client.
firstquarter_103L: Number of results obtained from c

Различная информацию по case_id из внешних источников, например:
 - Дата рождения
 - Количество запросов в кредитное бюро за последние N дней
 - Сумма налоговых вычетов для клиента
 - Оценка кредитоспособности клиента

## applprev_1
Properties: depth=1, internal data source

In [20]:
train_applprev_1_0 = print_sample_info(TRAIN_DIR, "train_applprev_1_0.parquet",
                                       feature_definitions)

case_id,actualdpd_943P,annuity_853A,approvaldate_319D,byoccupationinc_3656910L,cancelreason_3545846M,childnum_21L,creationdate_885D,credacc_actualbalance_314A,credacc_credlmt_575A,credacc_maxhisbal_375A,credacc_minhisbal_90A,credacc_status_367L,credacc_transactions_402L,credamount_590A,credtype_587L,currdebt_94A,dateactivated_425D,district_544M,downpmt_134A,dtlastpmt_581D,dtlastpmtallstes_3545839D,education_1138M,employedfrom_700D,familystate_726L,firstnonzeroinstldate_307D,inittransactioncode_279L,isbidproduct_390L,isdebitcard_527L,mainoccupationinc_437A,maxdpdtolerance_577P,num_group1,outstandingdebt_522A,pmtnum_8L,postype_4733339M,profession_152M,rejectreason_755M,rejectreasonclient_4145042M,revolvingaccount_394A,status_219L,tenor_203L
i64,f64,f64,str,f64,str,f64,str,f64,f64,f64,f64,str,f64,f64,str,f64,str,str,f64,str,str,str,str,str,str,str,bool,bool,f64,f64,i64,f64,f64,str,str,str,str,f64,str,f64
2,0.0,640.2,,,"""a55475b1""",0.0,"""2013-04-03""",,0.0,,,,,10000.0,"""CAL""",,,"""P136_108_173""",0.0,,,"""P97_36_170""","""2010-02-15""","""SINGLE""","""2013-05-04""","""CASH""",False,,8200.0,,0,,24.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""D""",24.0
2,0.0,1682.4,,,"""a55475b1""",0.0,"""2013-04-03""",,0.0,,,,,16000.0,"""CAL""",,,"""P136_108_173""",0.0,,,"""P97_36_170""","""2010-02-15""","""SINGLE""","""2013-05-04""","""CASH""",False,,8200.0,,1,,12.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""D""",12.0
3,0.0,6140.0,,,"""P94_109_143""",,"""2019-01-07""",,0.0,,,,,59999.8,"""CAL""",,,"""P131_33_167""",0.0,,,"""P97_36_170""","""2018-05-15""","""MARRIED""","""2019-02-07""","""CASH""",False,,11000.0,,0,,12.0,"""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""",,"""D""",12.0
4,0.0,2556.6,,,"""P24_27_36""",,"""2019-01-08""",,0.0,,,,,40000.0,"""CAL""",,,"""P194_82_174""",0.0,,,"""a55475b1""",,,"""2019-02-08""","""CASH""",False,,16000.0,,0,,24.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""T""",24.0
5,0.0,,,,"""P85_114_140""",,"""2019-01-16""",,,,,,,,,,,"""P54_133_26""",,,,"""a55475b1""",,,,,False,,62000.0,,0,,,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""T""",


statistic,case_id,actualdpd_943P,annuity_853A,approvaldate_319D,byoccupationinc_3656910L,cancelreason_3545846M,childnum_21L,creationdate_885D,credacc_actualbalance_314A,credacc_credlmt_575A,credacc_maxhisbal_375A,credacc_minhisbal_90A,credacc_status_367L,credacc_transactions_402L,credamount_590A,credtype_587L,currdebt_94A,dateactivated_425D,district_544M,downpmt_134A,dtlastpmt_581D,dtlastpmtallstes_3545839D,education_1138M,employedfrom_700D,familystate_726L,firstnonzeroinstldate_307D,inittransactioncode_279L,isbidproduct_390L,isdebitcard_527L,mainoccupationinc_437A,maxdpdtolerance_577P,num_group1,outstandingdebt_522A,pmtnum_8L,postype_4733339M,profession_152M,rejectreason_755M,rejectreasonclient_4145042M,revolvingaccount_394A,status_219L,tenor_203L
str,f64,f64,f64,str,f64,str,f64,str,f64,f64,f64,f64,str,f64,f64,str,f64,str,str,f64,str,str,str,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,str,str,str,str,f64,str,f64
"""count""",3887684.0,3885450.0,3731833.0,"""2121663""",991660.0,"""3887684""",1933791.0,"""3887649""",168178.0,3768614.0,168178.0,168178.0,"""168178""",168178.0,3764355.0,"""3764355""",2617307.0,"""2042982""","""3887684""",3764355.0,"""1027309""","""1453529""","""3887684""","""1706815""","""2642483""","""3522509""","""3764355""",3887649.0,250134.0,3851072.0,2070306.0,3887684.0,2609762.0,3574851.0,"""3887684""","""3887684""","""3887684""","""3887684""",156651.0,"""3887649""",3574851.0
"""null_count""",0.0,2234.0,155851.0,"""1766021""",2896024.0,"""0""",1953893.0,"""35""",3719506.0,119070.0,3719506.0,3719506.0,"""3719506""",3719506.0,123329.0,"""123329""",1270377.0,"""1844702""","""0""",123329.0,"""2860375""","""2434155""","""0""","""2180869""","""1245201""","""365175""","""123329""",35.0,3637550.0,36612.0,1817378.0,0.0,1277922.0,312833.0,"""0""","""0""","""0""","""0""",3731033.0,"""35""",312833.0
"""mean""",1397916.1846,0.0106,3413.1665,,19796.484,,0.8435,,20269.5803,3254.6766,-3288.8872,-6554.7842,,0.5221,38657.8551,,5229.1007,,,457.8899,,,,,,,,0.0563,0.1496,40029.6938,13.2658,3.9103,6994.5878,15.7821,,,,,740354832.4212,,15.7821
"""std""",760159.4198,3.7543,2828.269,,30687.6525,,1.2094,,26002.7817,14061.3495,28086.1133,16888.8624,,2.9517,37544.3362,,19278.4226,,,2697.2258,,,,,,,,,,31396.3667,134.8893,4.1014,29162.4843,10.4621,,,,,54986858.0278,,10.4621
"""min""",2.0,0.0,0.0,"""2005-12-31""",0.0,"""P107_145_100""",0.0,"""2005-12-31""",-114086.0,0.0,-196108.17,-206808.17,"""AC""",0.0,0.0,"""CAL""",0.0,"""2006-01-01""","""P0_149_171""",0.0,"""2008-11-13""","""2008-08-20""","""P106_81_188""","""1961-09-15""","""DIVORCED""","""2006-01-31""","""CASH""",0.0,0.0,0.0,0.0,0.0,0.0,3.0,"""P140_48_169""","""P0_102_76""","""P121_60_164""","""P129_162_80""",540342400.0,"""A""",3.0
"""25%""",1251506.0,0.0,1710.6,,1.0,,0.0,,3.03,0.0,0.0,-1042.4,,0.0,13998.0,,0.0,,,0.0,,,,,,,,,,18000.0,0.0,1.0,0.0,6.0,,,,,740688450.0,,6.0
"""50%""",1451959.0,0.0,2761.2,,5000.0,,0.0,,12586.0,0.0,0.0,0.0,,0.0,27000.0,,0.0,,,0.0,,,,,,,,,,34000.0,0.0,3.0,0.0,12.0,,,,,760248700.0,,12.0
"""75%""",1641585.0,0.0,4396.4,,30000.0,,1.0,,30446.0,0.0,0.0,0.0,,0.0,50000.0,,0.0,,,0.0,,,,,,,,,,52000.0,1.0,6.0,0.0,24.0,,,,,760725000.0,,24.0
"""max""",2651092.0,3676.0,105130.2,"""2019-12-30""",200000.0,"""a55475b1""",20.0,"""2019-12-30""",2540730.0,400000.0,7988198.5,199567.0,"""PO""",155.0,715392.0,"""REL""",507429.72,"""2019-12-30""","""a55475b1""",320400.0,"""2019-12-28""","""2019-12-30""","""a55475b1""","""2019-11-18""","""WIDOWED""","""2020-01-30""","""POS""",1.0,1.0,196000.0,4058.0,19.0,1210629.1,62.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",780865400.0,"""T""",62.0


actualdpd_943P: Days Past Due (DPD) of previous contract (actual).
annuity_853A: Monthly annuity for previous applications.
approvaldate_319D: Approval Date of Previous Application
byoccupationinc_3656910L: Applicant's income from previous applications.
cancelreason_3545846M: Application cancellation reason.
childnum_21L: Number of children in the previous application.
creationdate_885D: Date when previous application was created.
credacc_actualbalance_314A: Actual balance on credit account.
credacc_credlmt_575A: Credit card credit limit provided for previous applications.
credacc_maxhisbal_375A: Maximal historical balance of previous credit account
credacc_minhisbal_90A: Minimum historical balance of previous credit accounts.
credacc_status_367L: Account status of previous credit applications.
credacc_transactions_402L: Number of transactions made with the previous credit account of the applicant.
credamount_590A: Loan amount or card limit of previous applications.
credtype_587L: Cred

История по предыдущим одобрениям/отказам по заявкам:
- Дни просрочки (DPD) по предыдущему контракту (фактические)
- Ежемесячная рента по предыдущим заявкам
- Дата утверждения предыдущей заявки
- Предыдущий статус заявки

## other_1
Properties: depth=1, internal data source

In [22]:
train_other_1 = print_sample_info(TRAIN_DIR, "train_other_1.parquet",
                                  feature_definitions)

case_id,amtdebitincoming_4809443A,amtdebitoutgoing_4809440A,amtdepositbalance_4809441A,amtdepositincoming_4809444A,amtdepositoutgoing_4809442A,num_group1
i64,f64,f64,f64,f64,f64,i64
43801,12466.601,12291.2,914.2,0.0,304.8,0
43991,3333.4001,3273.4001,0.0,0.0,0.0,0
44001,10000.0,10000.0,0.0,0.0,0.0,0
44053,0.0,0.0,2586.4001,0.0,88.8,0
44130,63.8,60.8,0.0,0.0,0.0,0


statistic,case_id,amtdebitincoming_4809443A,amtdebitoutgoing_4809440A,amtdepositbalance_4809441A,amtdepositincoming_4809444A,amtdepositoutgoing_4809442A,num_group1
str,f64,f64,f64,f64,f64,f64,f64
"""count""",51109.0,51109.0,51109.0,51109.0,51109.0,51109.0,51109.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",1419514.2787,7552.9017,7462.3843,9967.413,2949.3959,3586.8751,0.0
"""std""",924509.4909,34625.7058,35065.2869,89393.4214,41467.7261,48274.9364,0.0
"""min""",43801.0,0.0,0.0,-335718.0,0.0,0.0,0.0
"""25%""",242241.0,0.0,0.0,0.0,0.0,0.0,0.0
"""50%""",1811468.0,0.0,0.0,0.0,0.0,1.8,0.0
"""75%""",1916206.0,8000.0,7740.0,288.0,0.0,5.4,0.0
"""max""",2703453.0,4957852.0,5168004.5,4256314.5,4180150.5,4622917.5,0.0


amtdebitincoming_4809443A: Incoming debit card transactions amount.
amtdebitoutgoing_4809440A: Outgoing debit card transactions amount.
amtdepositbalance_4809441A: Deposit balance of client.
amtdepositincoming_4809444A: Amount of incoming deposits to client's account.
amtdepositoutgoing_4809442A: Amount of outgoing deposits from client's account.


Информация по транзакциям:
- Сумма входящих/исходящих транзакций по дебетовой карте
- Депозитный баланс клиента
- Сумма поступающих/исходящих  депозитов на счет клиента

## tax_registry
tax_registry_a_1
Properties: depth=1, external data source, Tax registry provider A

tax_registry_b_1
Properties: depth=1, external data source, Tax registry provider B
    
tax_registry_c_1
Properties: depth=1, external data source, Tax registry provider C

In [23]:
train_tax_registry_a_1 = print_sample_info(TRAIN_DIR,
                                           "train_tax_registry_a_1.parquet",
                                           feature_definitions)

case_id,amount_4527230A,name_4527232M,num_group1,recorddate_4527225D
i64,f64,str,i64,str
28631,711.0,"""f980a1ea""",3,"""2019-09-13"""
28631,1946.0,"""f980a1ea""",2,"""2019-09-13"""
28631,2600.0,"""f980a1ea""",1,"""2019-09-13"""
28631,3616.4001,"""f980a1ea""",0,"""2019-09-13"""
28632,400.0,"""5f9b74f5""",6,"""2019-09-13"""


statistic,case_id,amount_4527230A,name_4527232M,num_group1,recorddate_4527225D
str,f64,f64,str,f64,str
"""count""",3275770.0,3275770.0,"""3275770""",3275770.0,"""3275770"""
"""null_count""",0.0,0.0,"""0""",0.0,"""0"""
"""mean""",1341396.6531,2360.4214,,4.0956,
"""std""",649263.4513,3254.8712,,3.9333,
"""min""",28631.0,0.0,"""000025c1""",0.0,"""2019-09-13"""
"""25%""",877291.0,850.0,,1.0,
"""50%""",1571545.0,1400.0,,3.0,
"""75%""",1728466.0,2778.0,,5.0,
"""max""",2702290.0,87115.6,"""ffffa404""",98.0,"""2020-10-19"""


amount_4527230A: Tax deductions amount tracked by the government registry.
name_4527232M: Name of employer.
recorddate_4527225D: Date of tax deduction record.


In [24]:
train_tax_registry_b_1 = print_sample_info(TRAIN_DIR,
                                           "train_tax_registry_b_1.parquet",
                                           feature_definitions)

case_id,amount_4917619A,deductiondate_4917603D,name_4917606M,num_group1
i64,f64,str,str,i64
49435,6885.0,"""2019-10-16""","""6b730375""",0
49435,6885.0,"""2019-10-16""","""6b730375""",1
49435,6885.0,"""2019-10-16""","""6b730375""",2
49435,6885.0,"""2019-10-16""","""6b730375""",3
49435,6885.0,"""2019-10-16""","""6b730375""",4


statistic,case_id,amount_4917619A,deductiondate_4917603D,name_4917606M,num_group1
str,f64,f64,str,str,f64
"""count""",1107933.0,1107933.0,"""1107933""","""1107933""",1107933.0
"""null_count""",0.0,0.0,"""0""","""0""",0.0
"""mean""",1469876.1218,20104.9657,,,4.1447
"""std""",705344.7771,25201.7451,,,4.108
"""min""",49435.0,0.0,"""2019-09-27""","""00011206""",0.0
"""25%""",997668.0,6885.0,,,1.0
"""50%""",1854645.0,13130.2,,,3.0
"""75%""",1907416.0,24300.0,,,5.0
"""max""",2703452.0,344250.0,"""2020-10-16""","""ffff44de""",100.0


amount_4917619A: Tax deductions amount tracked by the government registry.
deductiondate_4917603D: Tax deduction date.
name_4917606M: Name of employer.


In [25]:
train_tax_registry_c_1 = print_sample_info(TRAIN_DIR,
                                           "train_tax_registry_c_1.parquet",
                                           feature_definitions)

case_id,employername_160M,num_group1,pmtamount_36A,processingdate_168D
i64,str,i64,f64,str
357,"""c91b12ff""",0,1200.0,"""2019-01-04"""
357,"""c91b12ff""",1,1200.0,"""2018-11-28"""
357,"""c91b12ff""",2,972.8,"""2018-11-01"""
357,"""c91b12ff""",3,628.6,"""2018-10-08"""
357,"""c91b12ff""",4,1200.0,"""2018-09-10"""


statistic,case_id,employername_160M,num_group1,pmtamount_36A,processingdate_168D
str,f64,str,f64,f64,str
"""count""",3343800.0,"""3343800""",3343800.0,3343800.0,"""3343800"""
"""null_count""",0.0,"""0""",0.0,0.0,"""0"""
"""mean""",1161306.3801,,3.8931,2260.5374,
"""std""",657994.9559,,3.7368,3161.2941,
"""min""",357.0,"""000025c1""",0.0,0.0,"""2018-07-11"""
"""25%""",700623.0,,1.0,745.46,
"""50%""",1301411.0,,3.0,1365.454,
"""75%""",1471673.0,,5.0,2632.2,
"""max""",2629815.0,"""ffffaf43""",120.0,87115.6,"""2019-10-22"""


employername_160M: Employer's name.
pmtamount_36A: Tax deductions amount for credit bureau payments.
processingdate_168D: Date when the tax deduction is processed.


Информация по налоговым вычетам от разных провайдеров

## credit_bureau
credit_bureau_a_1
Properties: depth=1, external data source, Credit bureau provider A

credit_bureau_b_1
Properties: depth=1, external data source, Credit bureau provider B

In [26]:
train_credit_bureau_a_1_0 = print_sample_info(
    TRAIN_DIR, "train_credit_bureau_a_1_0.parquet", feature_definitions)

case_id,annualeffectiverate_199L,annualeffectiverate_63L,classificationofcontr_13M,classificationofcontr_400M,contractst_545M,contractst_964M,contractsum_5085717L,credlmt_230A,credlmt_935A,dateofcredend_289D,dateofcredend_353D,dateofcredstart_181D,dateofcredstart_739D,dateofrealrepmt_138D,debtoutstand_525A,debtoverdue_47A,description_351M,dpdmax_139P,dpdmax_757P,dpdmaxdatemonth_442T,dpdmaxdatemonth_89T,dpdmaxdateyear_596T,dpdmaxdateyear_896T,financialinstitution_382M,financialinstitution_591M,instlamount_768A,instlamount_852A,interestrate_508L,lastupdate_1112D,lastupdate_388D,monthlyinstlamount_332A,monthlyinstlamount_674A,nominalrate_281L,nominalrate_498L,num_group1,numberofcontrsvalue_258L,…,numberofoverdueinstlmax_1039L,numberofoverdueinstlmax_1151L,numberofoverdueinstlmaxdat_148D,numberofoverdueinstlmaxdat_641D,numberofoverdueinstls_725L,numberofoverdueinstls_834L,outstandingamount_354A,outstandingamount_362A,overdueamount_31A,overdueamount_659A,overdueamountmax2_14A,overdueamountmax2_398A,overdueamountmax2date_1002D,overdueamountmax2date_1142D,overdueamountmax_155A,overdueamountmax_35A,overdueamountmaxdatemonth_284T,overdueamountmaxdatemonth_365T,overdueamountmaxdateyear_2T,overdueamountmaxdateyear_994T,periodicityofpmts_1102L,periodicityofpmts_837L,prolongationcount_1120L,prolongationcount_599L,purposeofcred_426M,purposeofcred_874M,refreshdate_3813885D,residualamount_488A,residualamount_856A,subjectrole_182M,subjectrole_93M,totalamount_6A,totalamount_996A,totaldebtoverduevalue_178A,totaldebtoverduevalue_718A,totaloutstanddebtvalue_39A,totaloutstanddebtvalue_668A
i64,f64,f64,str,str,str,str,f64,f64,f64,str,str,str,str,str,f64,f64,str,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,str,str,f64,f64,f64,f64,i64,f64,…,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,f64,f64,str,str,f64,f64,f64,f64,f64,f64
388,,,"""ea6782cc""","""a55475b1""","""7241344e""","""a55475b1""",,,135806.0,"""2020-08-06""",,,"""2018-08-06""",,,,"""a55475b1""",0.0,,,8.0,2018.0,,"""a55475b1""","""P204_66_73""",8742.8,,,"""2019-01-11""",,8742.8,,,,1,,…,0.0,,,,0.0,,,,,0.0,0.0,,,,0.0,,,8.0,2018.0,,,,,,"""60c73645""","""a55475b1""",,,114325.805,"""a55475b1""","""a55475b1""",,,,,,
388,,,"""4408ff0f""","""a55475b1""","""7241344e""","""a55475b1""",,,,"""2023-06-20""",,,"""2018-06-20""",,374419.5,0.0,"""a55475b1""",0.0,,,7.0,2018.0,,"""a55475b1""","""55b002a9""",,,,"""2019-01-24""",,7811.4463,,,,0,2.0,…,0.0,,,,0.0,,,260093.7,,0.0,0.0,,,,0.0,,,7.0,2018.0,,,30.0,,,"""96a8fdfe""","""a55475b1""",,,,"""ab3c25cf""","""ab3c25cf""",,268897.62,0.0,0.0,374419.5,0.0
388,,,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,,,,,,,,,,"""a55475b1""",,,,,,,"""a55475b1""","""a55475b1""",,,,,,,,,,2,,…,,,,,,,,,,,,,,,,,,,,,,,,,"""a55475b1""","""a55475b1""","""2019-01-28""",,,"""a55475b1""","""a55475b1""",,,,,,
388,,,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,,,,,,,,,,"""a55475b1""",,,,,,,"""a55475b1""","""a55475b1""",,,,,,,,,,3,,…,,,,,,,,,,,,,,,,,,,,,,,,,"""a55475b1""","""a55475b1""","""2019-01-28""",,,"""a55475b1""","""a55475b1""",,,,,,
388,,,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,,,,,,,,,,"""a55475b1""",,,,,,,"""a55475b1""","""a55475b1""",,,,,,,,,,4,,…,,,,,,,,,,,,,,,,,,,,,,,,,"""a55475b1""","""a55475b1""","""2019-01-28""",,,"""a55475b1""","""a55475b1""",,,,,,


statistic,case_id,annualeffectiverate_199L,annualeffectiverate_63L,classificationofcontr_13M,classificationofcontr_400M,contractst_545M,contractst_964M,contractsum_5085717L,credlmt_230A,credlmt_935A,dateofcredend_289D,dateofcredend_353D,dateofcredstart_181D,dateofcredstart_739D,dateofrealrepmt_138D,debtoutstand_525A,debtoverdue_47A,description_351M,dpdmax_139P,dpdmax_757P,dpdmaxdatemonth_442T,dpdmaxdatemonth_89T,dpdmaxdateyear_596T,dpdmaxdateyear_896T,financialinstitution_382M,financialinstitution_591M,instlamount_768A,instlamount_852A,interestrate_508L,lastupdate_1112D,lastupdate_388D,monthlyinstlamount_332A,monthlyinstlamount_674A,nominalrate_281L,nominalrate_498L,num_group1,…,numberofoverdueinstlmax_1039L,numberofoverdueinstlmax_1151L,numberofoverdueinstlmaxdat_148D,numberofoverdueinstlmaxdat_641D,numberofoverdueinstls_725L,numberofoverdueinstls_834L,outstandingamount_354A,outstandingamount_362A,overdueamount_31A,overdueamount_659A,overdueamountmax2_14A,overdueamountmax2_398A,overdueamountmax2date_1002D,overdueamountmax2date_1142D,overdueamountmax_155A,overdueamountmax_35A,overdueamountmaxdatemonth_284T,overdueamountmaxdatemonth_365T,overdueamountmaxdateyear_2T,overdueamountmaxdateyear_994T,periodicityofpmts_1102L,periodicityofpmts_837L,prolongationcount_1120L,prolongationcount_599L,purposeofcred_426M,purposeofcred_874M,refreshdate_3813885D,residualamount_488A,residualamount_856A,subjectrole_182M,subjectrole_93M,totalamount_6A,totalamount_996A,totaldebtoverduevalue_178A,totaldebtoverduevalue_718A,totaloutstanddebtvalue_39A,totaloutstanddebtvalue_668A
str,f64,f64,f64,str,str,str,str,f64,f64,f64,str,str,str,str,str,f64,f64,str,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,str,str,f64,f64,f64,f64,f64,…,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,f64,f64,str,str,f64,f64,f64,f64,f64,f64
"""count""",4108212.0,36981.0,72337.0,"""4108212""","""4108212""","""4108212""","""4108212""",0.0,64294.0,298053.0,"""658397""","""456718""","""456721""","""658397""","""453846""",334815.0,334815.0,"""4108212""",655589.0,440293.0,440293.0,655589.0,655589.0,440293.0,"""4108212""","""4108212""",295139.0,41056.0,5237.0,"""658397""","""456706""",655289.0,420877.0,244636.0,138408.0,4108212.0,…,658397.0,456721.0,"""136918""","""167959""",655528.0,456198.0,392677.0,360331.0,456313.0,655531.0,658397.0,456721.0,"""135457""","""169610""",658397.0,440768.0,440768.0,658397.0,658397.0,440768.0,359666.0,352760.0,26455.0,5992.0,"""4108212""","""4108212""","""2678254""",63760.0,295139.0,"""4108212""","""4108212""",392791.0,360344.0,337574.0,298602.0,337574.0,298602.0
"""null_count""",0.0,4071231.0,4035875.0,"""0""","""0""","""0""","""0""",4108212.0,4043918.0,3810159.0,"""3449815""","""3651494""","""3651491""","""3449815""","""3654366""",3773397.0,3773397.0,"""0""",3452623.0,3667919.0,3667919.0,3452623.0,3452623.0,3667919.0,"""0""","""0""",3813073.0,4067156.0,4102975.0,"""3449815""","""3651506""",3452923.0,3687335.0,3863576.0,3969804.0,0.0,…,3449815.0,3651491.0,"""3971294""","""3940253""",3452684.0,3652014.0,3715535.0,3747881.0,3651899.0,3452681.0,3449815.0,3651491.0,"""3972755""","""3938602""",3449815.0,3667444.0,3667444.0,3449815.0,3449815.0,3667444.0,3748546.0,3755452.0,4081757.0,4102220.0,"""0""","""0""","""1429958""",4044452.0,3813073.0,"""0""","""0""",3715421.0,3747868.0,3770638.0,3809610.0,3770638.0,3809610.0
"""mean""",1170392.4104,612.0496,84.0508,,,,,,35626.1477,99771.4078,,,,,,198169.0396,1819.273,,12.8633,44.0582,6.498,6.233,2017.9888,2014.0585,,,3259.7674,575.6078,66.4944,,,5226.8227,5896.0027,33.5355,65.4153,6.2505,…,16.5461,51.786,,,8.684,0.0537,4.066,160062.0501,17.7984,932.6236,3275.3281,5635.9606,,,2432.9062,4512.7661,6.5367,6.2602,2017.9765,2014.0109,30.1158,30.317,0.5802,1.0272,,,,0.84,29451.4513,,,79191.4281,205977.3162,1807.5123,104.7931,196536.969,107.9046
"""std""",715611.9713,5408.4986,1860.2024,,,,,,551563.3402,4577044.5901,,,,,,3552509.0211,109560.5174,,127.5709,321.3554,3.4425,3.4072,0.7392,3.7363,,,5205.8892,1800.8025,830.6237,,,42058.7726,107159.2225,153.4283,300.563,5.8532,…,164.5328,553.7247,,,124.9429,4.5239,815.2671,3052952.9913,1847.8761,74356.7359,116946.836,169765.1202,,,91318.3617,91143.6339,3.4675,3.4145,0.7359,3.7119,5.4296,6.8295,2.2572,3.3389,,,,212.0949,97608.6474,,,599747.9128,2558844.2764,108044.733,5031.547,3379200.5433,32373.8368
"""min""",388.0,0.0,0.0,"""00135d9c""","""00135d9c""","""02699f0c""","""02699f0c""",,0.0,0.0,"""2004-05-29""","""1900-01-15""","""1999-09-15""","""1999-09-15""","""1900-01-15""",0.0,0.0,"""0349102c""",0.0,-9.0,1.0,1.0,2015.0,2003.0,"""001f4b0b""","""04d45dcc""",0.0,0.0,0.0,"""2017-09-22""","""2004-08-30""",0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,"""2004-04-12""","""2007-07-15""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""2004-10-01""","""2007-04-30""",0.0,0.0,1.0,1.0,2015.0,2003.0,1.0,1.0,0.0,0.0,"""28bfa260""","""27b6de28""","""2018-11-02""",0.0,0.0,"""0c42a10e""","""0c42a10e""",0.0,5.0,0.0,0.0,0.0,0.0
"""25%""",662301.0,9.13,0.55,,,,,,0.0,0.0,,,,,,12336.4,0.0,,0.0,0.0,3.0,3.0,2017.0,2012.0,,,0.0,0.0,18.0,,,1165.2001,0.0,18.1,18.2,3.0,…,0.0,0.0,,,0.0,0.0,0.0,13903.737,0.0,0.0,0.0,0.0,,,0.0,0.0,4.0,3.0,2017.0,2011.0,30.0,30.0,0.0,0.0,,,,0.0,0.0,,,12639.601,28213.201,0.0,0.0,14662.644,0.0
"""50%""",1290768.0,35.7,22.85,,,,,,12789.8,20000.0,,,,,,54350.105,0.0,,0.0,0.0,7.0,6.0,2018.0,2015.0,,,1131.0,0.0,20.0,,,3167.2,0.0,40.0,43.3,6.0,…,0.0,0.0,,,0.0,0.0,0.0,37362.926,0.0,0.0,0.0,0.0,,,0.0,0.0,7.0,6.0,2018.0,2015.0,30.0,30.0,0.0,0.0,,,,0.0,5040.0,,,27547.201,61076.0,0.0,0.0,57494.324,0.0
"""75%""",1382857.0,96.3,35.88,,,,,,40000.0,53865.8,,,,,,159303.12,0.0,,0.0,1.0,9.0,9.0,2019.0,2017.0,,,4591.6,353.376,24.0,,,6297.2,3292.8,43.3,45.0,9.0,…,1.0,1.0,,,0.0,0.0,0.0,108527.75,0.0,0.0,5.8,695.4,,,0.0,464.4,10.0,9.0,2019.0,2017.0,30.0,30.0,0.0,1.0,,,,0.0,28316.0,,,60000.0,154000.0,0.0,0.0,161289.5,0.0
"""max""",2588481.0,73000.0,73000.0,"""ea6782cc""","""ffee884a""","""fd624e63""","""fec76166""",,100000000.0,1848000100.0,"""2098-01-15""","""2098-01-15""","""2019-07-05""","""2019-07-06""","""2032-03-23""",1688617600.0,49005736.0,"""f8e51f8d""",4877.0,117000.0,12.0,12.0,2019.0,2019.0,"""ffc154eb""","""fd828f59""",376510.5,139346.98,32917.0,"""2019-07-08""","""2019-07-08""",20000000.0,59077588.0,30341.1,30341.1,280.0,…,61133.0,260000.0,"""2019-07-03""","""2019-07-08""",5419.0,868.0,351940.3,1688617600.0,421656.6,49005736.0,49261336.0,60940892.0,"""2019-07-03""","""2019-07-08""",49261336.0,38038588.0,12.0,12.0,2019.0,2019.0,360.0,360.0,96.0,63.0,"""e8f3b178""","""ee7d1eb8""","""2019-07-08""",53555.54,40000000.0,"""daf49a8a""","""daf49a8a""",159574000.0,1391240100.0,49005736.0,433225.0,1688617600.0,16952312.0


annualeffectiverate_199L: Interest rate of the closed contracts.
annualeffectiverate_63L: Interest rate for the active contracts.
classificationofcontr_13M: Classificiation of the active contract.
classificationofcontr_400M: Classificiation of the closed contract.
contractst_545M: Contract status.
contractst_964M: Contract status of terminated credit contract.
contractsum_5085717L: Sum of other contract values.
credlmt_230A: Credit limit of the closed credit contracts from credit bureau.
credlmt_935A: Credit limit for active loan.
dateofcredend_289D: End date of an active credit contract.
dateofcredend_353D: End date of a closed credit contract.
dateofcredstart_181D: Date when the credit contract was closed.
dateofcredstart_739D: Start date of a closed credit contract.
dateofrealrepmt_138D: Date of credit's closure (contract termination date).
debtoutstand_525A: Outstanding amount of existing contract.
debtoverdue_47A: Amount that is currently past due on a client's existing credit con

In [27]:
train_credit_bureau_b_1 = print_sample_info(TRAIN_DIR,
                                            "train_credit_bureau_b_1.parquet",
                                            feature_definitions)

case_id,amount_1115A,classificationofcontr_1114M,contractdate_551D,contractmaturitydate_151D,contractst_516M,contracttype_653M,credlmt_1052A,credlmt_228A,credlmt_3940954A,credor_3940957M,credquantity_1099L,credquantity_984L,debtpastduevalue_732A,debtvalue_227A,dpd_550P,dpd_733P,dpdmax_851P,dpdmaxdatemonth_804T,dpdmaxdateyear_742T,installmentamount_644A,installmentamount_833A,instlamount_892A,interesteffectiverate_369L,interestrateyearly_538L,lastupdate_260D,maxdebtpduevalodued_3940955A,num_group1,numberofinstls_810L,overdueamountmax_950A,overdueamountmaxdatemonth_494T,overdueamountmaxdateyear_432T,periodicityofpmts_997L,periodicityofpmts_997M,pmtdaysoverdue_1135P,pmtmethod_731M,pmtnumpending_403L,purposeofcred_722M,residualamount_1093A,residualamount_127A,residualamount_3940956A,subjectrole_326M,subjectrole_43M,totalamount_503A,totalamount_881A
i64,f64,str,str,str,str,str,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,i64,f64,f64,f64,f64,str,str,f64,str,f64,str,f64,f64,f64,str,str,f64,f64
467,78000.0,"""ea6782cc""","""2016-10-25""","""2019-10-25""","""7241344e""","""4257cbed""",,,,"""c5a72b57""",,,0.0,26571.969,,,0.0,11.0,2016.0,,,2898.76,,,"""2019-01-10""",0.0,2,36.0,0.0,11.0,2016.0,,"""a0b598e4""",0.0,"""e914c86c""",10.0,"""96a8fdfe""",,,,"""a55475b1""","""a55475b1""",,
467,,"""ea6782cc""","""2011-06-15""","""2031-06-13""","""7241344e""","""724be82a""",3000000.0,10000.0,3000000.0,"""P164_34_168""",2.0,1.0,,,0.0,0.0,,,,0.0,0.0,,,,"""2019-01-20""",,0,,,,,,"""a55475b1""",,"""a55475b1""",,"""96a8fdfe""",0.0,0.0,,"""fa4f56f1""","""ab3c25cf""",3000000.0,10000.0
467,,"""ea6782cc""","""2019-01-04""","""2021-08-04""","""7241344e""","""724be82a""",,,130365.0,"""P164_34_168""",1.0,2.0,,,0.0,0.0,,,,0.0,26571.969,,,,"""2019-01-20""",,1,,,,,,"""a55475b1""",,"""a55475b1""",,"""96a8fdfe""",,,,"""ab3c25cf""","""ab3c25cf""",78000.0,960000.0
1445,12000.0,"""ea6782cc""","""2018-12-31""","""2019-01-29""","""7241344e""","""4257cbed""",,,,"""0aebc0bb""",,,0.0,19066.64,,,0.0,1.0,2019.0,,,19571.412,,,"""2019-01-27""",0.0,2,1.0,0.0,1.0,2019.0,,"""d479a207""",0.0,"""dbcbe8f8""",1.0,"""96a8fdfe""",,,,"""a55475b1""","""a55475b1""",,
1445,31400.0,"""01f63ac8""","""2018-07-25""","""2019-12-25""","""7241344e""","""4257cbed""",,,,"""50babcd4""",,,0.0,23390.16,,,0.0,8.0,2018.0,,,2124.142,,,"""2019-01-28""",0.0,3,17.0,0.0,8.0,2018.0,,"""a0b598e4""",0.0,"""dbcbe8f8""",12.0,"""60c73645""",,,,"""a55475b1""","""a55475b1""",,


statistic,case_id,amount_1115A,classificationofcontr_1114M,contractdate_551D,contractmaturitydate_151D,contractst_516M,contracttype_653M,credlmt_1052A,credlmt_228A,credlmt_3940954A,credor_3940957M,credquantity_1099L,credquantity_984L,debtpastduevalue_732A,debtvalue_227A,dpd_550P,dpd_733P,dpdmax_851P,dpdmaxdatemonth_804T,dpdmaxdateyear_742T,installmentamount_644A,installmentamount_833A,instlamount_892A,interesteffectiverate_369L,interestrateyearly_538L,lastupdate_260D,maxdebtpduevalodued_3940955A,num_group1,numberofinstls_810L,overdueamountmax_950A,overdueamountmaxdatemonth_494T,overdueamountmaxdateyear_432T,periodicityofpmts_997L,periodicityofpmts_997M,pmtdaysoverdue_1135P,pmtmethod_731M,pmtnumpending_403L,purposeofcred_722M,residualamount_1093A,residualamount_127A,residualamount_3940956A,subjectrole_326M,subjectrole_43M,totalamount_503A,totalamount_881A
str,f64,f64,str,str,str,str,str,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,str,str,f64,str,f64,str,f64,f64,f64,str,str,f64,f64
"""count""",85791.0,43681.0,"""85791""","""81899""","""81712""","""85791""","""85791""",27581.0,16130.0,38218.0,"""85791""",53018.0,46228.0,81217.0,43681.0,53018.0,46228.0,81224.0,81224.0,81224.0,46228.0,53018.0,43493.0,9506.0,28825.0,"""81899""",81224.0,85791.0,43493.0,81224.0,81224.0,81224.0,"""2027""","""82519""",81217.0,"""85791""",43680.0,"""85791""",16125.0,27581.0,37519.0,"""85791""","""85791""",53018.0,46228.0
"""null_count""",0.0,42110.0,"""0""","""3892""","""4079""","""0""","""0""",58210.0,69661.0,47573.0,"""0""",32773.0,39563.0,4574.0,42110.0,32773.0,39563.0,4567.0,4567.0,4567.0,39563.0,32773.0,42298.0,76285.0,56966.0,"""3892""",4567.0,0.0,42298.0,4567.0,4567.0,4567.0,"""83764""","""3272""",4574.0,"""0""",42111.0,"""0""",69666.0,58210.0,48272.0,"""0""","""0""",32773.0,39563.0
"""mean""",1218998.1157,214110.4892,,,,,,178935.6498,52317.5229,130360.3435,,1.5447,4.5652,3791.8784,165118.3306,25696.7525,372.2601,35378.9608,6.6103,2018.2262,347.658,156888.8613,9814.8327,504.6124,52.4933,,15.0741,1.0117,30.4963,18.2516,6.6307,2018.2908,,,71.4786,,20.2647,,0.02,57942.2991,43011.0673,,,257008.9193,293763.1638
"""std""",686332.0161,691019.6024,,,,,,5274021.5533,128082.4582,2570305.4879,,0.8642,5.1704,158238.0605,550497.1326,968643.4449,71871.3675,877296.7937,3.4856,1.7835,54110.2623,626662.6669,211973.8004,5337.9281,636.0472,,1036.1068,1.2343,36.0601,1036.7725,3.4948,1.7414,,,4664.5111,,29.0174,,2.5357,110621.4657,93146.1953,,,3876513.3248,1202831.3119
"""min""",467.0,0.2,"""00135d9c""","""2000-01-15""","""2003-04-11""","""04bf6e27""","""07162370""",0.0,0.0,0.0,"""00a59564""",1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1900.0,0.0,0.0,0.0,-1.1,0.0,"""2015-03-30""",0.0,0.0,0.0,0.0,1.0,1900.0,"""В день истечен…","""0a59e5b4""",0.0,"""10984579""",0.0,"""164ee705""",0.0,0.0,0.0,"""15f04f45""","""15f04f45""",0.0,0.0
"""25%""",727216.0,25998.0,,,,,,0.0,0.0,0.0,,1.0,1.0,0.0,12946.0,0.0,0.0,0.0,4.0,2018.0,0.0,6115.6,2372.6,5.35,7.0,,0.0,0.0,12.0,0.0,4.0,2018.0,,,0.0,,5.0,,0.0,0.0,0.0,,,20000.0,25726.0
"""50%""",1413976.0,60000.0,,,,,,36184.0,22600.0,20000.0,,1.0,3.0,0.0,35893.414,0.0,0.0,0.0,7.0,2019.0,0.0,36265.227,4228.2,23.55,39.0,,0.0,1.0,22.0,0.0,7.0,2019.0,,,0.0,,11.0,,0.0,14461.327,7483.786,,,68382.0,84441.2
"""75%""",1778253.0,160000.0,,,,,,121778.0,60000.0,78000.0,,2.0,6.0,0.0,110801.625,0.0,0.0,7073.0,10.0,2019.0,0.0,128582.4,7804.8003,40.59,42.0,,0.2,2.0,36.0,0.4,10.0,2019.0,,,0.0,,24.0,,0.0,70925.4,41323.73,,,200000.0,276099.0
"""max""",2703436.0,54833332.0,"""ea6782cc""","""2020-10-14""","""2045-08-17""","""fd624e63""","""f4e17141""",796800000.0,4420000.0,300000000.0,"""ff11387f""",16.0,146.0,41138710.0,41619050.0,207823776.0,15443942.0,185124192.0,12.0,2020.0,11418603.0,69658536.0,41138710.0,73000.0,46334.1,"""2020-10-19""",147470.61,20.0,358.0,147470.61,12.0,2020.0,"""Полугодовые пл…","""f50a4e2c""",663618.0,"""f6e26148""",300.0,"""e8f3b178""",322.0,2187568.2,2022909.2,"""fa4f56f1""","""fa4f56f1""",796800000.0,139080000.0


amount_1115A: Credit amount of the active contract provided by the credit bureau.
classificationofcontr_1114M: Classificiation of the active contract.
contractdate_551D: Contract date of the active contract
contractmaturitydate_151D: End date of active contract.
contractst_516M: Contract status.
contracttype_653M: Contract Type
credlmt_1052A: Credit limit of an active loan.
credlmt_228A: Credit limit for closed loans.
credlmt_3940954A: Credit limit for active loan.
credor_3940957M: Creditor's name
credquantity_1099L: Number of credits in credit bureau
credquantity_984L: Number of closed credits in credit bureau.
debtpastduevalue_732A: Amount of unpaid debt for existing contracts.
debtvalue_227A: Outstanding amount for existing debt contracts.
dpd_550P: The number of days past due for active loans where a guarantee has been provided.
dpd_733P: Days past due (DPD) for guaranteed loans that were terminated according to credit bureau data.
dpdmax_851P: Maximal past due days for active cont

Информация из кредитного бюро

## deposit_1
Properties: depth=1, internal data source

In [28]:
train_deposit_1 = print_sample_info(TRAIN_DIR, "train_deposit_1.parquet",
                                    feature_definitions)

case_id,amount_416A,contractenddate_991D,num_group1,openingdate_313D
i64,f64,str,i64,str
225,0.0,,0,"""2016-08-16"""
331,260.374,"""2018-03-18""",0,"""2015-03-19"""
358,0.0,,0,"""2014-09-02"""
390,203.602,"""2017-09-30""",1,"""2015-10-01"""
390,223.68,,2,"""2016-06-08"""


statistic,case_id,amount_416A,contractenddate_991D,num_group1,openingdate_313D
str,f64,f64,str,f64,str
"""count""",145086.0,145086.0,"""65404""",145086.0,"""145086"""
"""null_count""",0.0,0.0,"""79682""",0.0,"""0"""
"""mean""",1466214.0495,8422.3045,,0.5225,
"""std""",886528.9589,86232.1205,,1.621,
"""min""",225.0,-40000.0,"""2002-02-27""",0.0,"""2001-11-19"""
"""25%""",660041.0,0.0,,0.0,
"""50%""",1556939.0,223.658,,0.0,
"""75%""",2530539.0,478.34,,1.0,
"""max""",2703453.0,12213286.0,"""2020-03-18""",64.0,"""2017-07-31"""


amount_416A: Deposit amount.
contractenddate_991D: End date of deposit contract.
openingdate_313D: Deposit account opening date.


Информация по депозитным договорам клиента

## person_1
Properties: depth=1, internal data source

Each credit application can have information about several persons (e.g. client him/her-self, contact references). Role describe type of connection to client.

In [29]:
train_person_1 = print_sample_info(TRAIN_DIR, "train_person_1.parquet",
                                   feature_definitions)

case_id,birth_259D,birthdate_87D,childnum_185L,contaddr_district_15M,contaddr_matchlist_1032L,contaddr_smempladdr_334L,contaddr_zipcode_807M,education_927M,empl_employedfrom_271D,empl_employedtotal_800L,empl_industry_691L,empladdr_district_926M,empladdr_zipcode_114M,familystate_447L,gender_992L,housetype_905L,housingtype_772L,incometype_1044T,isreference_387L,language1_981M,mainoccupationinc_384A,maritalst_703L,num_group1,personindex_1023L,persontype_1072L,persontype_792L,registaddr_district_1083M,registaddr_zipcode_184M,relationshiptoclient_415T,relationshiptoclient_642T,remitter_829L,role_1084L,role_993L,safeguarantyflag_411L,sex_738L,type_25L
i64,str,str,f64,str,bool,bool,str,str,str,str,str,str,str,str,str,str,str,str,bool,str,f64,str,i64,f64,f64,f64,str,str,str,str,bool,str,str,bool,str,str
0,"""1986-07-01""",,,"""P88_18_84""",False,False,"""P167_100_165""","""P97_36_170""","""2017-09-15""","""MORE_FIVE""","""OTHER""","""P142_57_166""","""P167_100_165""","""MARRIED""",,,,"""SALARIED_GOVT""",,"""P10_39_147""",10800.0,,0,0.0,1.0,1.0,"""P88_18_84""","""P167_100_165""",,,,"""CL""",,True,"""F""","""PRIMARY_MOBILE…"
0,,,,"""a55475b1""",,,"""a55475b1""","""a55475b1""",,,,"""a55475b1""","""a55475b1""",,,,,,,"""a55475b1""",,,1,1.0,1.0,4.0,"""a55475b1""","""a55475b1""","""SPOUSE""",,False,"""EM""",,,,"""PHONE"""
0,,,,"""a55475b1""",,,"""a55475b1""","""a55475b1""",,,,"""a55475b1""","""a55475b1""",,,,,,,"""a55475b1""",,,2,2.0,4.0,5.0,"""a55475b1""","""a55475b1""","""COLLEAGUE""","""SPOUSE""",False,"""PE""",,,,"""PHONE"""
0,,,,"""a55475b1""",,,"""a55475b1""","""a55475b1""",,,,"""a55475b1""","""a55475b1""",,,,,,,"""a55475b1""",,,3,,5.0,,"""a55475b1""","""a55475b1""",,"""COLLEAGUE""",,"""PE""",,,,"""PHONE"""
1,"""1957-08-01""",,,"""P103_93_94""",False,False,"""P176_37_166""","""P97_36_170""","""2008-10-29""","""MORE_FIVE""","""OTHER""","""P49_46_174""","""P160_59_140""","""DIVORCED""",,,,"""SALARIED_GOVT""",,"""P10_39_147""",10000.0,,0,0.0,1.0,1.0,"""P103_93_94""","""P176_37_166""",,,,"""CL""",,True,"""M""","""PRIMARY_MOBILE…"


statistic,case_id,birth_259D,birthdate_87D,childnum_185L,contaddr_district_15M,contaddr_matchlist_1032L,contaddr_smempladdr_334L,contaddr_zipcode_807M,education_927M,empl_employedfrom_271D,empl_employedtotal_800L,empl_industry_691L,empladdr_district_926M,empladdr_zipcode_114M,familystate_447L,gender_992L,housetype_905L,housingtype_772L,incometype_1044T,isreference_387L,language1_981M,mainoccupationinc_384A,maritalst_703L,num_group1,personindex_1023L,persontype_1072L,persontype_792L,registaddr_district_1083M,registaddr_zipcode_184M,relationshiptoclient_415T,relationshiptoclient_642T,remitter_829L,role_1084L,role_993L,safeguarantyflag_411L,sex_738L,type_25L
str,f64,str,str,f64,str,f64,f64,str,str,str,str,str,str,str,str,str,str,str,str,f64,str,f64,str,f64,f64,f64,f64,str,str,str,str,f64,str,str,f64,str,str
"""count""",2973991.0,"""1526659""","""24916""",9907.0,"""2973991""",1526218.0,1526218.0,"""2973991""","""2973991""","""566701""","""528315""","""522236""","""2973991""","""2973991""","""728613""","""24916""","""100818""","""9815""","""1526659""",24916.0,"""2973991""",1526659.0,"""11345""",2973991.0,2331708.0,2967874.0,2331708.0,"""2973991""","""2973991""","""805049""","""805942""",805049.0,"""2967874""","""24916""",1526657.0,"""1526659""","""2967874"""
"""null_count""",0.0,"""1447332""","""2949075""",2964084.0,"""0""",1447773.0,1447773.0,"""0""","""0""","""2407290""","""2445676""","""2451755""","""0""","""0""","""2245378""","""2949075""","""2873173""","""2964176""","""1447332""",2949075.0,"""0""",1447332.0,"""2962646""",0.0,642283.0,6117.0,642283.0,"""0""","""0""","""2168942""","""2168049""",2168942.0,"""6117""","""2949075""",1447334.0,"""1447332""","""6117"""
"""mean""",1055195.6115,,,0.616,,0.0,0.0045,,,,,,,,,,,,,0.5,,57707.4835,,0.7965,0.4384,2.0349,2.3157,,,,,0.0,,,0.9466,,
"""std""",724571.3851,,,0.9661,,,,,,,,,,,,,,,,,,33348.3028,,0.9778,0.6597,1.7072,1.8264,,,,,,,,,,
"""min""",0.0,"""1943-03-01""","""1911-12-01""",0.0,"""P0_111_121""",0.0,0.0,"""P0_101_75""","""P106_81_188""","""1963-06-15""","""LESS_ONE""","""AGRICULTURE""","""P0_140_162""","""P0_101_75""","""DIVORCED""","""F""","""COMPANY_FLAT""","""COMPANY_FLAT""","""EMPLOYED""",0.0,"""P10_39_147""",0.0,"""DIVORCED""",0.0,0.0,1.0,1.0,"""P0_111_121""","""P0_101_75""","""CHILD""","""CHILD""",0.0,"""CL""","""FULL""",0.0,"""F""","""ALTERNATIVE_PH…"
"""25%""",637354.0,,,0.0,,,,,,,,,,,,,,,,,,36000.0,,0.0,0.0,1.0,1.0,,,,,,,,,,
"""50%""",890817.0,,,0.0,,,,,,,,,,,,,,,,,,50000.0,,0.0,0.0,1.0,1.0,,,,,,,,,,
"""75%""",1568334.0,,,1.0,,,,,,,,,,,,,,,,,,70000.0,,1.0,1.0,4.0,5.0,,,,,,,,,,
"""max""",2703454.0,"""1999-10-01""","""2015-12-01""",11.0,"""a55475b1""",0.0,1.0,"""a55475b1""","""a55475b1""","""2020-09-15""","""MORE_ONE""","""WELNESS""","""a55475b1""","""a55475b1""","""WIDOWED""","""M""","""STATE_FLAT""","""STATE_FLAT""","""SELFEMPLOYED""",1.0,"""a55475b1""",200000.0,"""WIDOWED""",9.0,6.0,5.0,5.0,"""a55475b1""","""a55475b1""","""SPOUSE""","""SPOUSE""",0.0,"""PE""","""FULL""",1.0,"""M""","""WHATSAPP"""


birth_259D: Date of birth of the person.
birthdate_87D: Birth date of the person.
childnum_185L: Number of children of the applicant.
contaddr_district_15M: Zip code of a contact person's address.
contaddr_matchlist_1032L: Indicates whether the contact address is found in a code list.
contaddr_smempladdr_334L: Indicates whether the contact address is the same as the employment address.
contaddr_zipcode_807M: Zip code of contact address.
education_927M: Education level of the person.
empl_employedfrom_271D: Start date of employment.
empl_employedtotal_800L: Employment length of a person.
empl_industry_691L: Employment Industry of the person.
empladdr_district_926M: District where the employer's address is located.
empladdr_zipcode_114M: Zipcode of employer's address.
familystate_447L: Family state of the person.
gender_992L: Gender of a person.
housetype_905L: House type of the person.
housingtype_772L: Type of housing of the person.
incometype_1044T: Type of income of the person
isrefe

Персональная информация о клиенте - дата рождения, пол, образование и т.д.

**When num_groupN=0 it is the applicant (the person who applied for a loan)**

**Поэтому в данном случае будем фильтровать по num_group1=0 - это и будут данные лица, которое берет кредит**

## debitcard_1
Properties: depth=1, internal data source

In [30]:
train_debitcard_1 = print_sample_info(TRAIN_DIR, "train_debitcard_1.parquet",
                                      feature_definitions)

case_id,last180dayaveragebalance_704A,last180dayturnover_1134A,last30dayturnover_651A,num_group1,openingdate_857D
i64,f64,f64,f64,i64,str
225,,,,0,"""2016-08-16"""
331,,,,0,"""2015-03-19"""
358,,,,0,"""2014-09-02"""
390,,,,0,"""2014-07-23"""
390,,,,1,"""2015-10-01"""


statistic,case_id,last180dayaveragebalance_704A,last180dayturnover_1134A,last30dayturnover_651A,num_group1,openingdate_857D
str,f64,f64,f64,f64,f64,str
"""count""",157302.0,12216.0,11081.0,11081.0,157302.0,"""144591"""
"""null_count""",0.0,145086.0,146221.0,146221.0,0.0,"""12711"""
"""mean""",1468783.7827,109.6359,38494.5085,4955.3835,0.5493,
"""std""",888331.5764,949.9975,41400.589,19217.7369,1.6391,
"""min""",225.0,-308.7941,-187780.0,-477.506,0.0,"""2001-11-19"""
"""25%""",649173.0,0.0,7878.0,0.0,0.0,
"""50%""",1560138.0,0.0,30000.0,0.0,0.0,
"""75%""",2531591.0,1.0538,60000.0,0.0,1.0,
"""max""",2703453.0,67777.77,1161820.0,390000.0,65.0,"""2017-07-31"""


last180dayaveragebalance_704A: Average balance on debit card in the last 180 days.
last180dayturnover_1134A: Debit card's turnover within the last 180 days.
last30dayturnover_651A: Debit card turnover for the last 30 days.
openingdate_857D: Debit card opening date.


Информация по дебетовой карте:
- Оборот по дебетовой карте за последние N дней
- Средний остаток средств на дебетовой карте за последние 180 дней.

## applprev_2
Properties: depth=2, internal data source

In [31]:
train_applprev_2 = print_sample_info(TRAIN_DIR, "train_applprev_2.parquet",
                                     feature_definitions)

case_id,cacccardblochreas_147M,conts_type_509L,credacc_cards_status_52L,num_group1,num_group2
i64,str,str,str,i64,i64
2,,"""PRIMARY_MOBILE…",,0,0
2,,"""EMPLOYMENT_PHO…",,0,1
2,,"""PRIMARY_MOBILE…",,1,0
2,,"""EMPLOYMENT_PHO…",,1,1
3,,"""PHONE""",,0,0


statistic,case_id,cacccardblochreas_147M,conts_type_509L,credacc_cards_status_52L,num_group1,num_group2
str,f64,str,str,str,f64,f64
"""count""",14075487.0,"""13966238""","""11681431""","""342083""",14075487.0,14075487.0
"""null_count""",0.0,"""109249""","""2394056""","""13733404""",0.0,0.0
"""mean""",1454198.0168,,,,4.5515,0.7403
"""std""",787508.3638,,,,4.3904,0.8026
"""min""",2.0,"""P127_74_114""","""ALTERNATIVE_PH…","""ACTIVE""",0.0,0.0
"""25%""",1237440.0,,,,1.0,0.0
"""50%""",1575626.0,,,,3.0,1.0
"""75%""",1861301.0,,,,7.0,1.0
"""max""",2703454.0,"""a55475b1""","""WHATSAPP""","""UNCONFIRMED""",19.0,11.0


cacccardblochreas_147M: Card blocking reason.
conts_type_509L: Person contact type in previous application.
credacc_cards_status_52L: Card status of the previous credit account.


Информация по предыдущим заявкам
- Причина блокировки карты
- Контактное лицо в предыдущей заявке
- Статус карты предыдущего кредитного счета

**Сгруппируем по case_id и возьмем наиболее часто встречающееся значение cacccardblochreas_147M.
Остальные колонки пропустим**

## person_2
Properties: depth=2, internal data source

In [32]:
train_person_2 = print_sample_info(TRAIN_DIR, "train_person_2.parquet",
                                   feature_definitions)

case_id,addres_district_368M,addres_role_871L,addres_zip_823M,conts_role_79M,empls_economicalst_849M,empls_employedfrom_796D,empls_employer_name_740M,num_group1,num_group2,relatedpersons_role_762T
i64,str,str,str,str,str,str,str,i64,i64,str
5,"""a55475b1""",,"""a55475b1""","""a55475b1""","""a55475b1""",,"""a55475b1""",0,0,
6,"""P55_110_32""","""CONTACT""","""P10_68_40""","""P38_92_157""","""P164_110_33""",,"""a55475b1""",0,0,
6,"""P55_110_32""","""PERMANENT""","""P10_68_40""","""a55475b1""","""a55475b1""",,"""a55475b1""",0,1,
6,"""P204_92_178""","""CONTACT""","""P65_136_169""","""P38_92_157""","""P164_110_33""",,"""a55475b1""",1,0,"""OTHER_RELATIVE…"
6,"""P191_109_75""","""CONTACT""","""P10_68_40""","""P7_147_157""","""a55475b1""",,"""a55475b1""",1,1,"""OTHER_RELATIVE…"


statistic,case_id,addres_district_368M,addres_role_871L,addres_zip_823M,conts_role_79M,empls_economicalst_849M,empls_employedfrom_796D,empls_employer_name_740M,num_group1,num_group2,relatedpersons_role_762T
str,f64,str,str,str,str,str,str,str,f64,f64,str
"""count""",1643410.0,"""1643410""","""67674""","""1643410""","""1643410""","""1643410""","""5757""","""1643410""",1643410.0,1643410.0,"""28726"""
"""null_count""",0.0,"""0""","""1575736""","""0""","""0""","""0""","""1637653""","""0""",0.0,0.0,"""1614684"""
"""mean""",1264005.0153,,,,,,,,0.1115,0.1237,
"""std""",699545.4755,,,,,,,,0.3224,0.7612,
"""min""",5.0,"""P0_111_121""","""CONTACT""","""P0_101_75""","""P115_147_77""","""P112_86_147""","""1972-08-15""","""P0_112_117""",0.0,0.0,"""CHILD"""
"""25%""",761958.0,,,,,,,,0.0,0.0,
"""50%""",1323516.0,,,,,,,,0.0,0.0,
"""75%""",1695937.0,,,,,,,,0.0,0.0,
"""max""",2703454.0,"""a55475b1""","""TEMPORARY""","""a55475b1""","""a55475b1""","""a55475b1""","""2020-01-19""","""a55475b1""",4.0,31.0,"""SPOUSE"""


addres_district_368M: District of the person's address.
addres_role_871L: Role of person's address.
addres_zip_823M: Zip code of the address.
conts_role_79M: Type of contact role of a person.
empls_economicalst_849M: The economical status of the person (num_group1 - person, num_group2 - employment).
empls_employedfrom_796D: Start of employment (num_group1 - person, num_group2 - employment).
empls_employer_name_740M: Employer's name (num_group1 - person, num_group2 - employment).
relatedpersons_role_762T: Relationship type of a client's related person (num_group1 - person, num_group2 - related person).


Информация о клиентах и их контактных лицах:
    - адреса 
    - информация о работе

**When num_groupN=0 it is the applicant (the person who applied for a loan)
Отфильтруем данные по num_group1=0, сгрупируем по case_id и возьмем наиболее часто встречающееся значение для empls_economicalst_849M**

## credit_bureau

credit_bureau_a_2
Properties: depth=2, external data source, Credit bureau provider A

credit_bureau_b_2
Properties: depth=2, external data source, Credit bureau provider B

In [33]:
train_credit_bureau_a_2_0 = print_sample_info(
    TRAIN_DIR, "train_credit_bureau_a_2_0.parquet", feature_definitions)

case_id,collater_typofvalofguarant_298M,collater_typofvalofguarant_407M,collater_valueofguarantee_1124L,collater_valueofguarantee_876L,collaterals_typeofguarante_359M,collaterals_typeofguarante_669M,num_group1,num_group2,pmts_dpd_1073P,pmts_dpd_303P,pmts_month_158T,pmts_month_706T,pmts_overdue_1140A,pmts_overdue_1152A,pmts_year_1139T,pmts_year_507T,subjectroles_name_541M,subjectroles_name_838M
i64,str,str,f64,f64,str,str,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
388,"""8fd95e4b""","""a55475b1""",0.0,,"""a55475b1""","""c7a5ad39""",0,0,,,2.0,,,,2018.0,,"""a55475b1""","""ab3c25cf"""
388,"""9a0c095e""","""a55475b1""",0.0,,"""a55475b1""","""c7a5ad39""",1,0,,,2.0,,,,2018.0,,"""a55475b1""","""ab3c25cf"""
388,"""a55475b1""","""a55475b1""",,,"""a55475b1""","""a55475b1""",0,1,,,3.0,,,,2018.0,,"""a55475b1""","""a55475b1"""
388,"""a55475b1""","""a55475b1""",,,"""a55475b1""","""a55475b1""",0,2,,,4.0,,,,2018.0,,"""a55475b1""","""a55475b1"""
388,"""a55475b1""","""a55475b1""",,,"""a55475b1""","""a55475b1""",0,3,,,5.0,,,,2018.0,,"""a55475b1""","""a55475b1"""


statistic,case_id,collater_typofvalofguarant_298M,collater_typofvalofguarant_407M,collater_valueofguarantee_1124L,collater_valueofguarantee_876L,collaterals_typeofguarante_359M,collaterals_typeofguarante_669M,num_group1,num_group2,pmts_dpd_1073P,pmts_dpd_303P,pmts_month_158T,pmts_month_706T,pmts_overdue_1140A,pmts_overdue_1152A,pmts_year_1139T,pmts_year_507T,subjectroles_name_541M,subjectroles_name_838M
str,f64,str,str,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
"""count""",5296031.0,"""5296031""","""5296031""",202915.0,0.0,"""5296031""","""5296031""",5296031.0,5296031.0,2489613.0,0.0,5296008.0,0.0,2491674.0,0.0,5296008.0,0.0,"""5296031""","""5296031"""
"""null_count""",0.0,"""0""","""0""",5093116.0,5296031.0,"""0""","""0""",0.0,0.0,2806418.0,5296031.0,23.0,5296031.0,2804357.0,5296031.0,23.0,5296031.0,"""0""","""0"""
"""mean""",1063781.5213,,,1863340.5995,,,,0.7215,14.0234,13.1622,,6.5,,1438.9585,,2018.2643,,,
"""std""",662226.6264,,,33559092.645,,,,1.1026,9.3128,138.7311,,3.4521,,39508.2694,,0.7833,,,
"""min""",388.0,"""06fb9ba8""","""a55475b1""",0.0,,"""a55475b1""","""0e63c0f0""",0.0,0.0,0.0,,1.0,,0.0,,2015.0,,"""a55475b1""","""15f04f45"""
"""25%""",622237.0,,,0.0,,,,0.0,6.0,0.0,,4.0,,0.0,,2018.0,,,
"""50%""",1259890.0,,,0.0,,,,0.0,13.0,0.0,,7.0,,0.0,,2018.0,,,
"""75%""",1284811.0,,,0.0,,,,1.0,21.0,0.0,,9.0,,0.0,,2019.0,,,
"""max""",2548729.0,"""a55475b1""","""a55475b1""",3200000000.0,,"""a55475b1""","""f4d8a027""",46.0,35.0,4877.0,,12.0,,15237162.0,,2020.0,,"""a55475b1""","""daf49a8a"""


collater_typofvalofguarant_298M: Collateral valuation type (active contract).
collater_typofvalofguarant_407M: Collateral valuation type (closed contract).
collater_valueofguarantee_1124L: Value of collateral for active contract.
collater_valueofguarantee_876L: Value of collateral for closed contract.
collaterals_typeofguarante_359M: Type of collateral that was used as a guarantee for a closed contract.
collaterals_typeofguarante_669M: Collateral type for the active contract.
pmts_dpd_1073P: Days past due of the payment for the active contract (num_group1 - existing contract, num_group2 - payment).
pmts_dpd_303P: Days past due of the payment for terminated contract according to credit bureau (num_group1 - terminated contract, num_group2 - payment).
pmts_month_158T: Month of payment for a closed contract (num_group1 - existing contract, num_group2 - payment).
pmts_month_706T: Month of payment for active contract (num_group1 - terminated contract, num_group2 - payment).
pmts_overdue_1140

согласно https://www.kaggle.com/competitions/home-credit-credit-risk-model-stability/data:
- pmts_month_158T is for active contract
- pmts_month_706T is for closed contract

In [34]:
train_credit_bureau_b_2 = print_sample_info(TRAIN_DIR,
                                            "train_credit_bureau_b_2.parquet",
                                            feature_definitions)

case_id,num_group1,num_group2,pmts_date_1107D,pmts_dpdvalue_108P,pmts_pmtsoverdue_635A
i64,i64,i64,str,f64,f64
467,0,0,"""2018-11-15""",,
467,0,1,"""2018-12-15""",,
467,1,0,"""2018-12-15""",,
467,2,0,"""2016-10-15""",0.0,0.0
467,2,1,"""2016-11-15""",0.0,0.0


statistic,case_id,num_group1,num_group2,pmts_date_1107D,pmts_dpdvalue_108P,pmts_pmtsoverdue_635A
str,f64,f64,f64,str,f64,f64
"""count""",1286755.0,1286755.0,1286755.0,"""1286755""",1281394.0,1281394.0
"""null_count""",0.0,0.0,0.0,"""0""",5361.0,5361.0
"""mean""",1229443.91,0.7467,12.3197,,24370.4546,11.8595
"""std""",679992.3043,1.1217,10.0171,,574795.5394,455.1762
"""min""",467.0,0.0,0.0,"""2016-01-15""",0.0,0.0
"""25%""",741898.0,0.0,4.0,,0.0,0.0
"""50%""",1416105.0,0.0,10.0,,0.0,0.0
"""75%""",1781534.0,1.0,19.0,,0.0,0.0
"""max""",2703436.0,20.0,36.0,"""2020-10-15""",185124192.0,147470.61


pmts_date_1107D: Payment date for an active contract according to credit bureau (num_group1 - contract, num_group2 - payment).
pmts_dpdvalue_108P: Value of past due payment for active contract (num_group1 - existing contract, num_group2 - payment).
pmts_pmtsoverdue_635A: Active contract that has overdue payments (num_group1 - existing contract, num_group2 - payment).


train_credit_bureau_a_2_0 - Информация платежам по активным и по закрытым кредитным договорам:
- Дней просрочки платежа по активному/закрытому контракту.
- Стоимость обеспечения по действующему/закрытому контракту.
- Вид обеспечения, который был использован в качестве гарантии по действующему/закрытому контракту.

train_credit_bureau_b_2 - Информация по платежам по активным кредитным договорам:
- Сумма просроченного платежа по действующему контракту
- Активный контракт с просроченными платежами

# Columns research

In [35]:
int_patterns = [
    "num", "cnt", "quantity", "days", "count", "_month", "_year", "today"
]
numeric_patterns = [
    "num", "cnt", "quantity", "days", "count", "quarter", "year", "month",
    "amount", "rate", "transactions", "week", "sum", "pct"
]

for col, definition in feature_definitions.items():
    if col[-1] in ("T", "L"):
        if any(s in col for s in int_patterns):
            print(f"Int col: {col} - {definition}")
        elif any(s in col for s in numeric_patterns):
            print(f"Float col: {col} - {definition}")
        else:
            print(f"Str col: {col} - {definition}")

Str col: addres_role_871L - Role of person's address.
Float col: annualeffectiverate_199L - Interest rate of the closed contracts.
Float col: annualeffectiverate_63L - Interest rate for the active contracts.
Int col: applicationcnt_361L - Number of applications associated with the same email address as the client.
Str col: applications30d_658L - Number of applications made by the client in the last 30 days.
Int col: applicationscnt_1086L - Number of applications associated with the same phone number.
Int col: applicationscnt_464L - Number of applications made in the last 30 days by other clients with the same employer as the applicant.
Int col: applicationscnt_629L - Number of applications with the same employer in the last 7 days.
Int col: applicationscnt_867L - Number of applications associated with the same mobile phone.
Str col: bankacctype_710L - Type of applicant's bank account.
Str col: byoccupationinc_3656910L - Applicant's income from previous applications.
Str col: cardtype_5