In [1]:
!python --version

Python 3.11.10


In [2]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
#import random
import numpy as np
import pandas as pd

In [4]:
from line_profiler import LineProfiler

In [5]:
import matplotlib.pyplot as plt

In [6]:
%matplotlib inline

In [30]:


#                             0    1+   31+   61+   91+   WOF
dod_migration = np.array([[0.95, 0.05, 0.00, 0.00, 0.00, 0.00], #  0 
                          [0.90, 0.05, 0.05, 0.00, 0.00, 0.00], #  1+
                          [0.10, 0.05, 0.05, 0.80, 0.00, 0.00], # 31+
                          [0.05, 0.05, 0.05, 0.05, 0.80, 0.00], # 61+
                          [0.01, 0.01, 0.02, 0.02, 0.04, 0.90], # 91+
                          [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]  # WOF
#                          [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]  # TODO - добавить досрочное и часичнодочсрочное погашение кредита 
                         ])
[i.sum() for i in dod_migration]

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

In [9]:
class World():
    """Class World - Макромир, который задает начало отсчета времени,
    законы макроэкономики, ограничения регуляторов/ЦБ и остальное окружение.
    """

    def __init__(self):
        self.World_Time = 0

In [12]:
class DWH_DB():
    """Class DWH - база данных
    """
    def __init__(self):
        self.LI = pd.DataFrame(columns = ['CNTR_ID',
                                          'SD',
                                          'DOD_ID',
                                          'MOB',
                                          'WRTOFF_ID',
                                          'CLOSED_ID'
                                          ])
        self.DMContract = pd.DataFrame(columns = ['CNTR_ID',
                                                  'ISSUE_DT',
                                                  'WRTOFF_DT',
                                                  'CLOSED_DT'
                                                 ])
        


In [31]:
class Contract():
    """Class Contract
       issue_dt - issue of contract
       duration - duration in months
    """

    dod_dic = {0: '0',
               1: '1+',
               2: '31+',
               3: '61+',
               4: '91+',
               5: 'WOF'
              }
    dod_cnt = 6 # кол-во состояний
    dod_states = np.eye(dod_cnt) # матрица состояний (для удобства использована единичная матрица)

    def __init__(self, cntr_id = 0, issue_dt = 0, duration = 0,
                 dod_migration = None):
        self.cntr_id = cntr_id
        self.dod_id = 0        # начальное состояние контракта при выдачи: DOD = 0
        self.dod_state = self.dod_states[0] # np.array([1,0,0,0,0]) 
        self.dod_migration = dod_migration
        self.issue_dt = issue_dt
        self.mob = 0
        self.duration = duration
        self.closed_id = 0       # 0 - контратк открыт, 1 - закрыт
        self.wrtoff_id = 0       # 0 - контратк несписан, 1 - списан
        
    def next_month(self):
        if self.closed_id == 1:
            return None
           
        self.mob = self.mob + 1
        p = self.dod_migration.T.dot(self.dod_state) # array of probabilities
        self.dod_id = np.random.choice(self.dod_cnt,1,p=p)[0] # new state
        self.dod_state = self.dod_states[self.dod_id]

        if self.dod_id == 0 and self.mob >= self.duration: # погашение либо выздоровление с возвращением в график
            self.closed_id = 1
        
        if self.dod_id == 5 and self.mob >= self.duration + 12: # списание
            self.wrtoff_id = 1

        if self.wrtoff_id == 1 and self.mob >= self.duration + 24: # закрытие списанного контракта
            self.closed_id = 1
            

In [58]:
class Portfolio():
    """Class Portfolio - Портфель - динамика 
        N - первая выдача при создании портфеля
        start_portfolio_dt - привязка портфеля к мировому времени - важно при наличии нескольких портфелей
    
    """
    def __init__(self, N = 10, duration = 36, start_portfolio_dt = 0):
        self.cntr_id = 0                                # счетчик контрактов
        self.start_portfolio_dt = start_portfolio_dt    # дата создания портфеля
        self.cntr_list = []                             # сам портфель - список контрактов
        self.portfolio_age = 0                          # возрвст портфеля

        # проведем первую выдачу - инициализация портфеля
        self.issue(N, duration)
        # Заполним LI
        self.fix_in_dwh()

    def issue(self, N = 10, duration = 36):
        for i in range(N):
            self.cntr_id += 1
            self.cntr_list.append(Contract(cntr_id = self.cntr_id, 
                                           issue_dt = self.start_portfolio_dt,
                                           duration = duration,
                                           dod_migration = dod_migration))

    def next_month(self, N = 10, duration = 36):
        self.portfolio_age +=1

        # Для проверки - выведем все закрытые на этот момент контракты
        test = [cntr.cntr_id for cntr in self.cntr_list if cntr.closed_id == 1 ]

        # Перезапишем список только открытыми контрактами 
        self.cntr_list = [cntr for cntr in self.cntr_list if cntr.closed_id == 0 ]
        
        # сдвинем существующий портфель, потом проведем выдачу новых         
        for cntr in self.cntr_list:
            cntr.next_month()
            
        # проведем выдачи
        self.issue(N, duration)

        # Заполним LI
        self.fix_in_dwh()

        print('%04i' % self.portfolio_age, len(self.cntr_list), 'out ->',  test)    

    def fix_in_dwh_old(self): # Пример медленной вставки
        ix = len(DWH.LI.index)
        for cnt in self.cntr_list:
            DWH.LI.loc[ix] = [cnt.cntr_id, self.portfolio_age, cnt.dod_id, cnt.mob]
            ix += 1

    def fix_in_dwh(self):
        fix_data = [[cnt.cntr_id, self.portfolio_age, cnt.dod_id, cnt.mob, cnt.wrtoff_id, cnt.closed_id] for cnt in self.cntr_list]
        DWH.LI = pd.concat([DWH.LI,
                            pd.DataFrame(data=fix_data,
                                         columns=DWH.LI.columns)
                           ])


In [59]:
%time
N_const = 10 # Пусть будут постоянные ежемесячные выдачи 
duration = 24
DWH = DWH_DB()
GP = Portfolio(N_const, duration)

CPU times: user 4 μs, sys: 1 μs, total: 5 μs
Wall time: 9.3 μs


In [77]:
for t in range(100):
    GP.next_month(N_const, duration)

0101 261 out -> [761, 762, 763, 764, 765, 766, 767, 768, 769, 770]
0102 262 out -> [771, 772, 773, 774, 776, 777, 778, 779, 780]
0103 263 out -> [781, 782, 784, 785, 786, 787, 788, 789, 790]
0104 264 out -> [791, 793, 794, 795, 796, 797, 798, 799, 800]
0105 264 out -> [792, 801, 802, 803, 804, 805, 806, 808, 809, 810]
0106 263 out -> [579, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820]
0107 263 out -> [821, 822, 823, 824, 825, 826, 827, 828, 829, 830]
0108 263 out -> [831, 832, 833, 834, 835, 836, 837, 838, 839, 840]
0109 262 out -> [602, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850]
0110 263 out -> [851, 852, 853, 854, 855, 856, 857, 858, 859]
0111 263 out -> [629, 861, 862, 864, 865, 866, 867, 868, 869, 870]
0112 262 out -> [636, 863, 871, 872, 873, 874, 875, 876, 877, 879, 880]
0113 263 out -> [881, 882, 883, 884, 885, 886, 887, 888, 890]
0114 262 out -> [651, 657, 889, 891, 893, 894, 895, 896, 898, 899, 900]
0115 261 out -> [663, 667, 901, 902, 903, 904, 905, 906, 907, 908, 

In [97]:
T = DWH.LI.reset_index(drop=True)
print(T.shape)
T.tail()

(49211, 6)


Unnamed: 0,CNTR_ID,SD,DOD_ID,MOB,WRTOFF_ID,CLOSED_ID
49206,2006,200,0,0,0,0
49207,2007,200,0,0,0,0
49208,2008,200,0,0,0,0
49209,2009,200,0,0,0,0
49210,2010,200,0,0,0,0


In [79]:
T[T['CNTR_ID']==110]

Unnamed: 0,CNTR_ID,SD,DOD_ID,MOB,WRTOFF_ID,CLOSED_ID
659,110,10,0,0,0,0
769,110,11,0,1,0,0
889,110,12,0,2,0,0
1019,110,13,0,3,0,0
1159,110,14,0,4,0,0
1309,110,15,0,5,0,0
1469,110,16,0,6,0,0
1639,110,17,0,7,0,0
1819,110,18,0,8,0,0
2009,110,19,0,9,0,0


In [80]:
T

Unnamed: 0,CNTR_ID,SD,DOD_ID,MOB,WRTOFF_ID,CLOSED_ID
0,1,0,0,0,0,0
1,2,0,0,0,0,0
2,3,0,0,0,0,0
3,4,0,0,0,0,0
4,5,0,0,0,0,0
...,...,...,...,...,...,...
49206,2006,200,0,0,0,0
49207,2007,200,0,0,0,0
49208,2008,200,0,0,0,0
49209,2009,200,0,0,0,0


In [81]:
G = T.groupby('SD')[['MOB']].agg(['count','mean']).reset_index()
G.columns = ['SD','CNT','MEAN']
G

Unnamed: 0,SD,CNT,MEAN
0,0,10,0.0
1,1,20,0.5
2,2,30,1.0
3,3,40,1.5
4,4,50,2.0
...,...,...,...
196,196,262,13.110687
197,197,263,13.201521
198,198,263,13.247148
199,199,261,13.019157


In [83]:
G.groupby('CNT')['SD'].agg(['count','min']).tail(20)

Unnamed: 0_level_0,count,min
CNT,Unnamed: 1_level_1,Unnamed: 2_level_1
240,1,23
250,2,24
252,2,27
253,2,26
254,1,34
255,7,30
256,1,37
257,1,39
258,10,65
259,17,40


In [98]:
m_lag = 1  # период переката

ix_sd   = T['SD'] >= 40 # для кредитов сроком 24 мес
ix_dod  = T['DOD_ID'] < 4 # не дефолты
ix_open = T['CLOSED_ID'] == 0 # не закрытые
ix_mlag = T['SD'] <= (T['SD'].max()-m_lag) # ограничение на вызревание
D_F = T.loc[ix_sd & ix_dod & ix_mlag & ix_open, :]

In [99]:
D_F

Unnamed: 0,CNTR_ID,SD,DOD_ID,MOB,WRTOFF_ID,CLOSED_ID
7074,164,40,1,24,0,0
7081,171,40,0,23,0,0
7082,172,40,0,23,0,0
7083,173,40,0,23,0,0
7084,174,40,0,23,0,0
...,...,...,...,...,...,...
48945,1996,199,0,0,0,0
48946,1997,199,0,0,0,0
48947,1998,199,0,0,0,0
48948,1999,199,0,0,0,0


In [100]:
D_N = T.copy()
D_N['SD_OLD'] = D_N['SD']
D_N['SD'] = D_N['SD']-1

In [101]:
D_N

Unnamed: 0,CNTR_ID,SD,DOD_ID,MOB,WRTOFF_ID,CLOSED_ID,SD_OLD
0,1,-1,0,0,0,0,0
1,2,-1,0,0,0,0,0
2,3,-1,0,0,0,0,0
3,4,-1,0,0,0,0,0
4,5,-1,0,0,0,0,0
...,...,...,...,...,...,...,...
49206,2006,199,0,0,0,0,200
49207,2007,199,0,0,0,0,200
49208,2008,199,0,0,0,0,200
49209,2009,199,0,0,0,0,200


In [104]:
D = D_F.merge(D_N, on = ['CNTR_ID','SD'], how = 'left', suffixes=['_F','_N'])

In [105]:
D

Unnamed: 0,CNTR_ID,SD,DOD_ID_F,MOB_F,WRTOFF_ID_F,CLOSED_ID_F,DOD_ID_N,MOB_N,WRTOFF_ID_N,CLOSED_ID_N,SD_OLD
0,164,40,1,24,0,0,0,25,0,1,41
1,171,40,0,23,0,0,0,24,0,1,41
2,172,40,0,23,0,0,0,24,0,1,41
3,173,40,0,23,0,0,0,24,0,1,41
4,174,40,0,23,0,0,0,24,0,1,41
...,...,...,...,...,...,...,...,...,...,...,...
37854,1996,199,0,0,0,0,0,1,0,0,200
37855,1997,199,0,0,0,0,0,1,0,0,200
37856,1998,199,0,0,0,0,0,1,0,0,200
37857,1999,199,0,0,0,0,0,1,0,0,200


In [109]:
M = D.fillna(-1).groupby([#'SD',
           'DOD_ID_F','WRTOFF_ID_F','CLOSED_ID_F','DOD_ID_N','WRTOFF_ID_N','CLOSED_ID_N'])['CNTR_ID'].count()
M

  M = D.fillna(-1).groupby([#'SD',


DOD_ID_F  WRTOFF_ID_F  CLOSED_ID_F  DOD_ID_N  WRTOFF_ID_N  CLOSED_ID_N
0         0            0            0         0            0              32462
                                                           1               1388
                                    1         0            0               1845
1         0            0            0         0            0               1613
                                                           1                136
                                    1         0            0                 91
                                    2         0            0                103
2         0            0            0         0            0                  9
                                                           1                  1
                                    1         0            0                  5
                                    2         0            0                  5
                                    3         0  