In [1]:
import os
from tqdm import tqdm
from typing import List, Optional

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
train = pd.read_csv("previous_application.csv")
train.head(n=2)

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,...,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,...,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,...,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0


In [3]:
train.shape[0]

1670214

In [5]:
client_data = train[train['SK_ID_CURR'] == 427000]

In [6]:
if client_data.empty:
    print("Нет данных для клиента с идентификатором 427000.")

In [8]:
average_payment = client_data.groupby('SK_ID_CURR')['AMT_ANNUITY'].mean()
average_payment

SK_ID_CURR
427000    3898.9125
Name: AMT_ANNUITY, dtype: float64

In [9]:
# Округление до второго знака после запятой
rounded_average_payment = round(average_payment[427000], 2)

# Вывод результата
print(f"Среднее значение платежа для клиента с идентификатором 427000: {rounded_average_payment}")

Среднее значение платежа для клиента с идентификатором 427000: 3898.91


In [11]:
approved_applications = train[train['NAME_CONTRACT_STATUS'] == 'Approved']
approved_applications

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,...,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.430,17145.0,17145.0,0.0,17145.0,SATURDAY,15,...,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,...,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,...,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,,450000.0,MONDAY,7,...,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
5,1383531,199383,Cash loans,23703.930,315000.0,340573.5,,315000.0,SATURDAY,8,...,XNA,18.0,low_normal,Cash X-Sell: low,365243.0,-654.0,-144.0,-144.0,-137.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670209,2300464,352015,Consumer loans,14704.290,267295.5,311400.0,0.0,267295.5,WEDNESDAY,12,...,Furniture,30.0,low_normal,POS industry with interest,365243.0,-508.0,362.0,-358.0,-351.0,0.0
1670210,2357031,334635,Consumer loans,6622.020,87750.0,64291.5,29250.0,87750.0,TUESDAY,15,...,Furniture,12.0,middle,POS industry with interest,365243.0,-1604.0,-1274.0,-1304.0,-1297.0,0.0
1670211,2659632,249544,Consumer loans,11520.855,105237.0,102523.5,10525.5,105237.0,MONDAY,12,...,Consumer electronics,10.0,low_normal,POS household with interest,365243.0,-1457.0,-1187.0,-1187.0,-1181.0,0.0
1670212,2785582,400317,Cash loans,18821.520,180000.0,191880.0,,180000.0,WEDNESDAY,9,...,XNA,12.0,low_normal,Cash X-Sell: low,365243.0,-1155.0,-825.0,-825.0,-817.0,1.0


In [13]:
active_credits = approved_applications[approved_applications['DAYS_LAST_DUE'] == 365243]
active_credits

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,...,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,...,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,...,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
17,1285768,142748,Revolving loans,9000.000,180000.0,180000.0,,180000.0,FRIDAY,13,...,XNA,0.0,XNA,Card X-Sell,-277.0,-257.0,365243.0,365243.0,365243.0,0.0
21,1182516,267782,Cash loans,20361.600,405000.0,451777.5,,405000.0,SATURDAY,4,...,XNA,30.0,low_normal,Cash X-Sell: low,365243.0,-656.0,214.0,365243.0,365243.0,1.0
34,1629736,255331,Revolving loans,10125.000,202500.0,202500.0,,202500.0,SATURDAY,14,...,XNA,0.0,XNA,Card X-Sell,-265.0,-234.0,365243.0,365243.0,365243.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669924,1568029,442032,Consumer loans,11730.825,123547.5,123547.5,0.0,123547.5,SUNDAY,10,...,Furniture,12.0,low_normal,POS industry with interest,365243.0,-313.0,17.0,365243.0,365243.0,0.0
1669925,2126826,366227,Cash loans,30205.350,450000.0,491580.0,,450000.0,WEDNESDAY,10,...,XNA,24.0,middle,Cash Street: middle,365243.0,-327.0,363.0,365243.0,365243.0,0.0
1669945,1669699,158720,Consumer loans,11204.865,113940.0,113940.0,0.0,113940.0,MONDAY,13,...,Tourism,12.0,middle,POS other with interest,365243.0,-170.0,160.0,365243.0,365243.0,0.0
1670192,2101747,339383,Revolving loans,3375.000,0.0,67500.0,,,FRIDAY,18,...,XNA,0.0,XNA,Card X-Sell,-474.0,-438.0,365243.0,365243.0,365243.0,0.0


In [14]:
num_active_approved_credits = len(active_credits)
num_active_approved_credits

211221

In [19]:
application_train = pd.read_csv("application_train.csv")
application_train

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307506,456251,0,Cash loans,M,N,N,0,157500.0,254700.0,27558.0,...,0,0,0,0,,,,,,
307507,456252,0,Cash loans,F,N,Y,0,72000.0,269550.0,12001.5,...,0,0,0,0,,,,,,
307508,456253,0,Cash loans,F,N,Y,0,153000.0,677664.0,29979.0,...,0,0,0,0,1.0,0.0,0.0,1.0,0.0,1.0
307509,456254,1,Cash loans,F,N,Y,0,171000.0,370107.0,20205.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
# Шаг 3: Агрегация AMT_ANNUITY по SK_ID_CURR
aggregated_annuity = active_credits.groupby('SK_ID_CURR')['AMT_ANNUITY'].sum().reset_index()
aggregated_annuity.rename(columns={'AMT_ANNUITY': 'TOTAL_ANNUITY'}, inplace=True)

In [23]:
# Шаг 4: Объединение с основным источником данных
merged_data = pd.merge(application_train, aggregated_annuity, left_on='SK_ID_CURR', right_on='SK_ID_CURR', how='left')

In [24]:
# Шаг 5: Заполнение пропущенных значений (если у клиента нет активных кредитов)
merged_data['TOTAL_ANNUITY'] = merged_data['TOTAL_ANNUITY'].fillna(0)

In [25]:
# Шаг 6: Вычисление отношения TOTAL_ANNUITY к AMT_INCOME_TOTAL
merged_data['ANNUITY_TO_INCOME_RATIO'] = merged_data['TOTAL_ANNUITY'] / merged_data['AMT_INCOME_TOTAL']

In [26]:
# Шаг 7: Получение значения для клиента с номером заявки 456254
client_456254_ratio = merged_data[merged_data['SK_ID_CURR'] == 456254]['ANNUITY_TO_INCOME_RATIO'].values[0]

# Округление до 4-го знака после запятой
result = round(client_456254_ratio, 4)

print(result)

0.1249
