# Модель долгосрочного погашения

<details>
<table align=left border="1">
<tr><td style="color:maroon;text-align:center;text-size:10;font-weight:bold">Загружаем из-вне</td><td style="color:maroon;text-align:center;text-size:10;font-weight:bold">Загружаем и считаем</td><td style="color:maroon;text-align:center;text-size:10;font-weight:bold">Считаем</td></tr>
<tr><td style="vertical-align:top">
    <table> 
    <tr><td style="color:navy;text-align:center;text-size:9">Переменная</td><td style="color:navy;text-align:center;text-size:9">Значение</td></tr> 
    <tr><td style="text-align:left;font-weight:bold">CPIAv</td><td style="text-align:left">ИПЦ, в среднем за год</td></tr>
    <tr><td style="text-align:left;font-weight:bold">LevelRate</td><td style="text-align:left">Уровень реальных ставок)</td></tr>
    <tr><td style="text-align:left;font-weight:bold">loan_rate</td><td style="text-align:left">Средняя ставка по выданным ипотечным ссудам, %</td></tr>
    <tr><td style="text-align:left;font-weight:bold">p_MortgLifeAv_x</td><td style="text-align:left">Средний срок выданных ипотечных ссуд, лет</td></tr>
    <tr><td style="text-align:left;font-weight:bold">loans_and_ref_vol_MKD</td><td style="text-align:left">Объем выданных ипотечных ссуд, млн. руб.</td></tr>
    </table>
</td><td style="vertical-align:top">
    <table> 
    <tr><td style="color:navy;text-align:center;text-size:9">Переменная</td><td style="color:navy;text-align:center;text-size:9">Значение</td></tr> 
    <tr><td style="text-align:left;font-weight:bold">CPR</td><td style="text-align:left">Модель: Досрочные погашения</td></tr>
    </table>
</td>
    <td style="vertical-align:top">
<table> 
    <tr><td style="color:navy;text-align:center;text-size:9">Переменная</td><td style="color:navy;text-align:center;text-size:9">Значение</td></tr> 
    <tr><td style="text-align:left;font-weight:bold">PMT</td><td style="text-align:left">Сумма периодического платежа для аннуитета на основе постоянства сумм платежей и постоянной процентной ставки</td></tr>
    </table>
    </td></tr>
</table>
</details>

In [1]:
from IPython.display import Markdown as md
import pandas as pd
import numpy as np

import patsy
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats.mstats import gmean 
from datetime import datetime as dt

%run ../COMMON/common.ipynb # загрузка общих функций и констант, все, что оттуда, должно иметь префикс common.

conWork = sa.create_engine('sqlite+pysqlite:///{db_name}'.format(db_name=common.strYearDBPath)) # connection к рабочей базе данных
conWorkEx = sa.create_engine('sqlite+pysqlite:///{db_name}'.format(db_name=common.strExYearDBPath)) # connection к к рабочей базе данных экзогенных переменных
conWorkExH=sa.create_engine('sqlite+pysqlite:///{db_name}'.format(db_name=common.strExParamDBPath)) # connection к рабочей базе данных экзогенных параметров
conWorkSvod=sa.create_engine('sqlite+pysqlite:///{db_name}'.format(db_name=common.strSvodDBPath)) # connection к рабочей базе данных СВОД


iFirstFactYear=2008
iLastFactYear=2019

iFirstForecastYear=iLastFactYear+1
iLastForecastYear=2030

In [2]:
# strU='UPDATE headers SET code2="LevelRate" where code=239'

# with conWork.connect() as connection:
#     result = connection.execute(strU)

In [3]:
class repay_e:
    pdfWork=None
    
    lstYearCodes=['CPIAv', 'LevelRate', 'loan_rate', 'p_MortgLifeAv_x', 'loans_and_ref_vol_MKD', 'CPR'] # список экзогенных и фактических данных
 
    def __init__(self):
        assert False, 'you can\'t create variables of repay_e class!'
    
    def MakeWorkFrame():
        ''' Загрузка исходных данных
            Исходные данные:
                Фактические значения - из базы данных year.sqlite3;
                Экзогенные - из базы данных exog_year.sqlite3
                Задаваемые вручную - из базы данных exog_param.sqlite3
                Результаты других моделей - из базы данных svod.sqlite3. 
        '''
        
        pdfAct=(pd.read_sql(common.make_SELECT_YEAR_string(repay_e.lstYearCodes), con=conWork)
        .pipe(common.make_frame)
        .pipe(common.scale, list_fields=['LevelRate', 'loan_rate', 'CPR'], multiplier=100)
        .pipe(common.scale, list_fields=['loans_and_ref_vol_MKD',], multiplier=1e6)) 
        pdfExog=pd.read_sql(common.make_SELECT_YEAR_string(repay_e.lstYearCodes), con=conWorkEx).pipe(common.make_frame)
        PdfExogHandle=pd.read_sql(common.make_SELECT_YEAR_string(repay_e.lstYearCodes), con=conWorkExH).pipe(common.make_frame)

        repay_e.pdfWork=pdfAct.combine_first(pdfExog).combine_first(PdfExogHandle)
        repay_e.pdfWork['pmt'] =  np.pmt(repay_e.pdfWork['loan_rate'] / 1200, 
                                         repay_e.pdfWork['p_MortgLifeAv_x']*12, 
                                         -repay_e.pdfWork['loans_and_ref_vol_MKD'])
        return repay_e.pdfWork

In [4]:
repay_e.MakeWorkFrame().sample(10).sort_index()

Unnamed: 0_level_0,CPIAv,CPR,LevelRate,loan_rate,loans_and_ref_vol_MKD,p_MortgLifeAv_x,pmt
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2008,1.141048,11.127132,-5.975708,12.9,655808.0,17.941667,7833.664403
2009,1.116977,11.014075,-0.387689,14.3,152501.0,16.458333,2011.100001
2011,1.084634,12.22418,-0.343393,11.9,716944.0,14.908333,8577.537766
2016,1.070664,11.701338,3.518333,12.48,1473467.0,15.25,18040.258268
2018,1.028759,15.03184,4.543877,9.56,3013115.0,16.30938,30447.566328
2023,1.042907,,,,,22.171805,
2025,1.040099,,,,,23.5,
2028,1.039971,,,,,25.0,
2029,1.03861,,,,,25.0,
2031,1.039028,,,,,,


## Промежуточные расчеты

Для каждого года расчитываем подфрейм длительности процедуры погашения поколений ипотечных кредитов:
  - \_prc
  - \_notional	
  - \_ntl	
  - \_debt	
  - \_cpr

Для у - год расчета подфрейма, i - месяц внутри подфрейма   

$
\begin{equation*}
    \_prc_{y,i} =  
     \begin{cases}
       \frac{\large loans\_and\_ref\_vol\_MKD_y * loan\_rate_y}{\large 1200} \quad ; \quad \text{при i=0} \\
       \frac{\large \_debt_{y, i-1} * loan\_rate_y}{\large 1200} \quad ; \quad \text{при i>0} 
     \end{cases}
\end{equation*}
$  

$\_notional_{y, i} = PMT_y - \_prc_{y, i}  $

$
\begin{equation*}
    \_ntl_{y, i} =  
     \begin{cases}
       loans\_and\_ref\_vol\_MKD_y \quad ; \quad \text{при i=0} \\
       \_debt_{y, i-1} - \_notional_{y, i} \quad ; \quad \text{при i>0} 
     \end{cases}
\end{equation*}
$  


$
\begin{equation*}
    \_debt_{y,i} =  
     \begin{cases}
       \_ntl_{y, i} - \_cpr_{y, i} \quad ; \quad \text{при } \_debt_{y, i} > 0 \\
       0 \quad ; \quad \text{при } \_debt_{y, i} <= 0 
     \end{cases}
\end{equation*}
$  


$
\begin{equation*}
    \_cpr_{y,i} =  
     \begin{cases}
       MIN\left( \_ntl_{y, i} * \left(1 + \frac{PMT_y}{100} \right)^{\frac{1}{12}} - 1, \quad \_ntl_{y, i} \right) \quad \text{при } \_cpr_{y, i} > 0 \\
       0 \quad ; \quad \text{при } \_cpr_{y, i} <= 0
     \end{cases}
\end{equation*}
$   

**для каждого года расчет выполняется до полногосписания долга, то есть до:**   $ \_debt_{y, i}=0$

**В дальнейших расчетах используются данные долга (debt) этих фреймов, усредненные по годам ( $ AverageFrame_{y, i} $)** где y - год, на который расчитывался фрейм, и i - год внутри фрейма.
Например, фрейм расчитан на 2008 год, внутри фрейма нам нужен 2010 год - $ AverageFrame_{2008, 2010} $

In [5]:
repay_e.pdfWork.loc[2009]

CPIAv                         1.116977
CPR                          11.014075
LevelRate                    -0.387689
loan_rate                    14.300000
loans_and_ref_vol_MKD    152501.000000
p_MortgLifeAv_x              16.458333
pmt                        2011.100001
Name: 2009, dtype: float64

In [9]:
class _rgml():
    """класс погашения поколений ипотечных кредитов"""
    
    def __init__(self, year_data:pd.Series):
        self._cprpow = (1+year_data['CPR']/100) ** (1/12) - 1
        self._loan_rate_corr=year_data['loan_rate'] / 1200
        self._pmt=year_data['pmt']
        self._debt=[year_data['loans_and_ref_vol_MKD'],]
        self._prc =[0]
        self._notional=[0 ]

        self._ntl=[ 0]
        self._cpr=[0]
        self._year=year_data.name
        self._pdf=self.calc_frame().groupby(pd.Grouper(level=0, freq="Y"))[['debt', 'cpr']].mean()
        
    def print(self):
        print('debt=', len(self._debt))
        print('prc=', len(self._prc))
        print('notional=', len(self._notional))
        print('ntl=', len(self._ntl))
        print('cpr=', len(self._cpr))

    def _calc_cpr(self, ntl):
        return min( ntl * self._cprpow, ntl)
        
    def _calc_prc(self, debt_1):
        return debt_1 * self._loan_rate_corr

    def _calc_ntl(self, debt_1, notional):
        return debt_1 - notional

    def _calc_notional(self, prc):
        return self._pmt-prc

    def _step(self, i):
#         print(i)
        self._prc.append(self._calc_prc(self._debt[i-1]))
        self._notional.append(self._calc_notional(self._prc[-1]))
        self._ntl.append(self._calc_ntl(self._debt[i-1], self._notional[-1]))
        self._cpr.append(self._calc_cpr(self._ntl[-1]))
        self._debt.append(self._ntl[-1] - self._cpr[-1])
#         self.print()

    
    def calc_frame(self):
        i=1
        while self._debt[-1]>0:
            self._step(i)
            i+=1
        self._pdf = pd.DataFrame({'debt':self._debt, 'cpr':self._cpr, 'prc':self._prc, 'notional':self._notional}).iloc[1:-1]

        self._pdf.index=pd.date_range(dt.date(self._year, 1, 1), periods=self._pdf.shape[0], freq='MS')
        self._pdf.index.name='date'
        return self._pdf
    
    @property
    def Frame(self):
        return self._pdf
    
    @property
    def AverageFrame(self):
#         return self._pdf.groupby(pd.Grouper(level=0, freq="Y"))[['debt', 'cpr']].mean()
        return self.Frame

    def AverageYear(self, iYear):
        if self._year>iYear: return 0
#         print(_pdf)
        try:
#             return self._pdf.loc[_pdf.index.year==iYear, 'debt'].values[0]
            return self._pdf.loc[self._pdf.index.year == iYear, 'debt'].values[0]
        except (IndexError, KeyError):
            return 0
    
_rgml_dic={repay_e.pdfWork.loc[y].name: _rgml(repay_e.pdfWork.loc[y]) for y in range(iFirstFactYear, iFirstForecastYear)}
# _rgml_dic

## Расчет предикторов модели

$$ \_IPCgeo = \left( \prod _{i=y-5}^{y} CPIAv_{i}  \right)^{\frac {1}{n}} $$

$$ 
\begin{equation*}
    \_waropml_y = 
    \begin{cases}
       loan\_rate \quad \text{при y=iFirstFactYear} \\ \\
       \large {\frac{loan\_rate_y * loans\_and\_ref\_vol\_MKD_y}{loans\_and\_ref\_vol\_MKD_y + \sum_{i=iFirstFactYear}^{y-1} \, \_rgml_i.AveragFrame_{y-1}} + \\ \sum_{i=iFirstFactYear+1}^{y-1}  \frac{loan\_rate_{i} * \_rgml_{i}.AveragFrame_{y-1}}{loans\_and\_ref\_vol\_MKD_y + \sum_{i=iFirstFactYear}^{y-1} \, \_rgml_i.AveragFrame_{i, y-1}} }
       \quad \small \text{при iFirstFactYear < y <= iLastFactYear} 
        \end{cases}
\end{equation*}
$$




In [8]:
repay_e.pdfWork['_IPCgeo']=repay_e.pdfWork['CPIAv'].rolling(5).apply(gmean)

def _calc_waropml_x1(x):
    _x1=((x['loan_rate'] * x['loans_and_ref_vol_MKD']) / 
         (x['loans_and_ref_vol_MKD'] + _calc_year_average(x)))
    return _x1

def _calc_year_average(x):
    y = x if type(x)!=pd.Series else x.name
    return sum([_rgml_dic[i].AverageYear(y-1) for i in range(iFirstFactYear, y)])

def _calc_waropml_x2(x):
    _x1=((x['loan_rate'] * _rgml_dic[x.name].AverageYear(x.name)) / 
        (x['loans_and_ref_vol_MKD'] + x['_sum_year_average']))
    return _x1

def _calc_wrpml_y(x):
    lrf=repay_e.pdfWork.loc[x, 'loans_and_ref_vol_MKD']
    x2=sum([(repay_e.pdfWork.loc[i, 'loan_rate'] * _rgml_dic[i].AverageYear(x-1)) / (lrf + repay_e.pdfWork.loc[x, '_sum_year_average']) for i in range(iFirstFactYear, x)])
    return repay_e.pdfWork.loc[x, '_waropml_x1'] + x2

repay_e.pdfWork.loc[iFirstFactYear:iLastFactYear, '_sum_year_average'] =  repay_e.pdfWork.loc[iFirstFactYear:iLastFactYear].apply(_calc_year_average, axis=1)
repay_e.pdfWork.loc[iFirstFactYear:iLastFactYear, '_waropml_x1'] =  repay_e.pdfWork.loc[iFirstFactYear:iLastFactYear].apply(_calc_waropml_x1, axis=1)
repay_e.pdfWork.loc[iFirstFactYear:iLastFactYear, '_waropml']=[repay_e.pdfWork.loc[iFirstFactYear, 'loan_rate']] + [_calc_wrpml_y(y) for y in range(iFirstFactYear+1, iLastFactYear+1)]


repay_e.pdfWork

Unnamed: 0_level_0,CPIAv,CPR,LevelRate,loan_rate,loans_and_ref_vol_MKD,p_MortgLifeAv_x,pmt,_IPCgeo,_waropml_x1,_sum_year_average,_waropml
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2004,1.108812,,2.63876,,18500.0,,,,,,
2005,1.127055,,0.274468,14.9,56341.0,14.55,791.320598,,,,
2006,1.09683,,-0.788789,13.7,263561.0,,,,,,
2007,1.089881,,-1.475535,12.6,556489.0,,,,,,
2008,1.141048,11.127132,-5.975708,12.9,655808.0,17.941667,7833.664403,1.112564,12.9,0.0,12.9
2009,1.116977,11.014075,-0.387689,14.3,152501.0,16.458333,2011.100001,1.114198,2.849446,612828.3,13.178967
2010,1.068508,10.625308,1.179214,13.1,380061.0,16.358333,4707.707311,1.102374,4.72015,674735.9,13.161161
2011,1.084634,12.22418,-0.343393,11.9,716944.0,14.908333,8577.537766,1.099911,5.184172,928764.2,12.612582
2012,1.050667,11.408883,3.003275,12.29,1031992.0,14.958333,12591.056712,1.09188,5.12898,1440855.0,12.461264
2013,1.067608,13.210223,0.589188,12.44,1353926.0,14.7,16751.936708,1.077448,4.815333,2143826.0,12.442544


In [444]:
def _calc_wrpml_y(x):
    lrf=repay_e.pdfWork.loc[x, 'loans_and_ref_vol_MKD']
    x2=sum([(repay_e.pdfWork.loc[i, 'loan_rate'] * _rgml_dic[i].AverageYear(x-1)) / (lrf + repay_e.pdfWork.loc[x, '_sum_year_average']) for i in range(iFirstFactYear, x)])
    return repay_e.pdfWork.loc[x, '_waropml_x1'] + x2

# for y in range(iFirstFactYear+1, 2021):
#     lrf=repay_e.pdfWork.loc[y, 'loans_and_ref_vol_MKD']
#     x2=sum([(repay_e.pdfWork.loc[i, 'loan_rate'] * _rgml_dic[i].AverageYear(y-1)) / (lrf + repay_e.pdfWork.loc[y, '_sum_year_average']) for i in range(iFirstFactYear, y)])
#     print(y, repay_e.pdfWork.loc[y, '_waropml_x1'] + x2)

repay_e.pdfWork.loc[iFirstFactYear:iLastFactYear, '_waropml']=[repay_e.pdfWork.loc[iFirstFactYear, 'loan_rate']] + [_calc_wrpml_y(y) for y in range(iFirstFactYear+1, iLastFactYear+1)]
repay_e.pdfWork

Unnamed: 0_level_0,CPIAv,CPR,LevelRate,loan_rate,loans_and_ref_vol_MKD,p_MortgLifeAv_x,pmt,_IPCgeo,_sum_year_average,_waropml_x1,_waropml
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2004,1.108812,,2.63876,,18500.0,,,,,,
2005,1.127055,,0.274468,14.9,56341.0,14.55,791.320598,,,,
2006,1.09683,,-0.788789,13.7,263561.0,,,,,,
2007,1.089881,,-1.475535,12.6,556489.0,,,,,,
2008,1.141048,11.127132,-5.975708,12.9,655808.0,17.941667,7833.664403,1.112564,0.0,12.9,12.9
2009,1.116977,11.014075,-0.387689,14.3,152501.0,16.458333,2011.100001,1.114198,612828.3,2.849446,13.178967
2010,1.068508,10.625308,1.179214,13.1,380061.0,16.358333,4707.707311,1.102374,674735.9,4.72015,13.161161
2011,1.084634,12.22418,-0.343393,11.9,716944.0,14.908333,8577.537766,1.099911,928764.2,5.184172,12.612582
2012,1.050667,11.408883,3.003275,12.29,1031992.0,14.958333,12591.056712,1.09188,1440855.0,5.12898,12.461264
2013,1.067608,13.210223,0.589188,12.44,1353926.0,14.7,16751.936708,1.077448,2143826.0,4.815333,12.442544


In [436]:
_calc_year_average(2009)

2009


612828.2759451084

In [342]:
repay_e.pdfWork


Unnamed: 0_level_0,CPIAv,CPR,LevelRate,loan_rate,loans_and_ref_vol_MKD,p_MortgLifeAv_x,pmt,_IPCgeo,_sum_year_average
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2004,1.108812,,2.63876,,18500.0,,,,
2005,1.127055,,0.274468,14.9,56341.0,14.55,791.320598,,
2006,1.09683,,-0.788789,13.7,263561.0,,,,
2007,1.089881,,-1.475535,12.6,556489.0,,,,
2008,1.141048,11.127132,-5.975708,12.9,655808.0,17.941667,7833.664403,1.112564,0.0
2009,1.116977,11.014075,-0.387689,14.3,152501.0,16.458333,2011.100001,1.114198,612828.3
2010,1.068508,10.625308,1.179214,13.1,380061.0,16.358333,4707.707311,1.102374,674735.9
2011,1.084634,12.22418,-0.343393,11.9,716944.0,14.908333,8577.537766,1.099911,928764.2
2012,1.050667,11.408883,3.003275,12.29,1031992.0,14.958333,12591.056712,1.09188,1440855.0
2013,1.067608,13.210223,0.589188,12.44,1353926.0,14.7,16751.936708,1.077448,2143826.0
