## investigations
Conclusions on mods:

- Freddie data only flags mod on the month it occurs, Fannie keeps the flag for the duration of the mod.
- The mod_sticky_flg unifies the data: it is always Y after a mod
- Mods have significant prior dq

Conclusions on borr_asst_plan:

- Flags are sticky
- Can have multiple episodes of plans
- Some but not a lot of overlap with mods
- F: most were had at most months_dq=1
- R: most were 1+ months_dq prior, mostly used by Freddie
- Tend to be short term in nature
- Fannie only reporting since 7/2020

Conclusions on interest rate reductions:

- Almost all ir reductions are marked as mods
- mods involve ir reduction about 60% of the time

Conclusions on Deferrals

- Fannie started reporting 7/2020. Freddie goes back in time.
- Some overlap with borr_asst_plan when defrl_amt > 0 first time

In [57]:
import mortgage_imports.clickhouse_utilities as cu
import pandas as pd
import numpy as np
from muti import chu

In [58]:
# pandas options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.max_rows', 1000)
client = chu.make_connection()

In [59]:
qry = \
"""
SELECT
    src_data,
    m.mod_flg AS mf,
    m.mod_sticky_flg AS msf,
    COUNT(*) AS ln_mon
FROM
    unified.frannie ARRAY JOIN monthly AS m
GROUP BY
  src_data,
  mf,
  msf
ORDER BY
  src_data,
  mf,
  msf;
"""

df1 = chu.run_query(qry, client, return_df=True)
df1.head(n=100)

Unnamed: 0,src_data,mf,msf,ln_mon
0,fannie,!,!,33987606
1,fannie,!,Y,368597
2,fannie,N,N,2386167365
3,fannie,N,Y,96590
4,fannie,Y,Y,38026197
5,freddie:non-standard,N,N,329102871
6,freddie:non-standard,N,Y,16940777
7,freddie:non-standard,Y,Y,307970
8,freddie:standard,N,N,2002063990
9,freddie:standard,N,Y,25872646


In [60]:
# look at relations between mod_flg and dq on that date and max prior dq
# most frequently they are reset to current at the mod date and were 6+ months dq prior
qry = \
"""
SELECT
  src_data,
  prior_dq > 6 ? 6 : prior_dq AS prior_dq,
  count(*) AS nl
FROM (
    SELECT
        src_data,
        arrayMax(arraySlice(monthly.months_dq, 1, indexOf(monthly.mod_flg, 'Y'))) AS prior_dq
    FROM
        unified.frannie
    WHERE
        has(monthly.mod_flg, 'Y'))
GROUP BY src_data, prior_dq
ORDER BY src_data, prior_dq
"""

df2 = chu.run_query(qry, client, return_df=True)
df2.head(n=1000)

Unnamed: 0,src_data,prior_dq,nl
0,fannie,0,599
1,fannie,1,3064
2,fannie,2,18248
3,fannie,3,45688
4,fannie,4,74425
5,fannie,5,82729
6,fannie,6,368971
7,freddie:non-standard,0,1102
8,freddie:non-standard,1,5397
9,freddie:non-standard,2,11783


you can see that the effect of the sticky mod flag--little effect on Fannie, big effect of Freddie
where the flag only appears on the month of the mod

modded loans tend to be very dq in their history

In [61]:
# look at the relationship between mod_flg and borrower assistance plan

# R = repayment plan
# F = forbearance
# T = Trial
# N = no plan
# 7/9 = Not applicable/available (fannie)

# repayment plans: Freddie does more, about 1/3 are Y (standard) for mod_sticky_flg. None for fannie.
# forbearance: vast majority are not listed as mod -- perhaps coincidental overlap (ie forbearance on
#              a loan that happened to be modified)
qry = \
"""
SELECT
    src_data,
    m.mod_flg AS mf,
    m.mod_sticky_flg AS msf,
    m.borr_asst_plan AS bap,
    count(*) AS ln_mon
FROM
    unified.frannie ARRAY JOIN monthly AS m
GROUP BY
  src_data,
  mf,
  msf,
  bap
ORDER BY
  src_data,
  mf,
  msf,
  bap;
"""

df = chu.run_query(qry, client, return_df=True)
df.sort_values(['src_data', 'bap', 'mf', 'msf'])[['src_data', 'bap', 'mf', 'msf', 'ln_mon']].head(n=1000)

Unnamed: 0,src_data,bap,mf,msf,ln_mon
0,fannie,!,!,!,30623409
2,fannie,!,!,Y,333358
4,fannie,!,N,N,2250206298
10,fannie,!,N,Y,77256
16,fannie,!,Y,Y,35892258
1,fannie,7,!,!,3364197
3,fannie,7,!,Y,35239
5,fannie,7,N,N,129464692
11,fannie,7,N,Y,14887
17,fannie,7,Y,Y,1458903


In [62]:

# look at sticky-ness of borr_asst_plan: Fannie
# sticky
# code '7' seems to indicate end of forbearance

qry = \
"""
SELECT
    arraySlice(monthly.borr_asst_plan, indexOf(monthly.borr_asst_plan, 'F')) AS bap
FROM
    fannie.final
WHERE
    has(monthly.borr_asst_plan, 'F')
ORDER BY rand32(1)
LIMIT 35
"""

df3 = chu.run_query(qry, client, return_df=True)
print(np.asarray(df3['bap']))

[list(['F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', '7'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', '7', '7'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'F', '7', '7', '7', '7', '7', '7'])
 list(['F', 'F', 'F', 'F', 'N', 'N', 'N', '7', '7']) list(['F', '7', '7'])
 list(['F', 'F', 'F', '7', '7', '7', '7', '7', '7'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'N', '7', '7'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', '7', '7'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F', '7', '7', '7']) list(['F', 'F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'N', 'F', 'F', 'F', '7', 'N', '7'])
 list(['F', '7', '7', 'F', '7'])
 list(['F', 'F', 'N', '7', '7', '7', '7', '7', '7'])
 list(['F', 'F', 'F', 'F', 'F', '7', '7', '7', 'N']) list(['F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', '7', '7', '7'])
 list(['F', 'F', 'F', 'F', '7', 'F', 'F', 'F', 'F'])
 list(['F', 'F'

In [63]:

# look at sticky-ness of borr_asst_plan: Freddie, sticky
# becomes unpopulated after forbearance ends


qry = \
"""
SELECT
    arraySlice(monthly.borr_asst_plan, indexOf(monthly.borr_asst_plan, 'F')) AS bap
FROM
    freddie.final
WHERE
    has(monthly.borr_asst_plan, 'F')
ORDER BY rand32(1)
LIMIT 35
"""

df4 = chu.run_query(qry, client, return_df=True)
print(np.asarray(df4['bap']))

[list(['F', 'F', 'F', 'F', 'F', 'F']) list(['F', 'F', '!', '!', '!'])
 list(['F', 'F', 'F', '!', '!', '!', '!', '!'])
 list(['F', 'F', 'F', '!', '!', '!', '!', '!'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['F', 'F', 'F']) list(['F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', '!'])
 list(['F', 'F', '!', '!', '!', '!', '!', '!', '!'])
 list(['F', 'F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['F', 'F', 'F', 'F', 'F', 'F', '!'])
 list(['F', 'F', 'F', 'F', 'F', '!', '!', '!', '!'])
 list(['F', 'F', 'F', 'F', '!', '!'])
 list(['F', '!', '!', '!', '!', '!', '!', '!', '!', '!', 'T', 'T', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['F', 'F', '!']) list(['F', 'F', 'F', 'F', 'F', 'F'

In [64]:

# look at sticky-ness of borr_asst_plan: Fannie
# sticky
# code '7' seems to indicate end of repayment

qry = \
"""
SELECT
    arraySlice(monthly.borr_asst_plan, indexOf(monthly.borr_asst_plan, 'R')) AS bap
FROM
    fannie.final
WHERE
    has(monthly.borr_asst_plan, 'R')
ORDER BY rand32(1)
LIMIT 35
"""

df5 = chu.run_query(qry, client, return_df=True)
print(np.asarray(df5['bap']))

[list(['R', 'R', 'N', 'N', 'N', 'R', 'R'])
 list(['R', 'R', 'N', 'N', 'N', 'N', 'N', '7'])
 list(['R', 'R', 'R', 'R', 'R', 'R', '7', '7', '7'])
 list(['R', 'N', 'N', '7', '7', '7', '7', '7', '7'])
 list(['R', 'R', 'R', 'R', 'R', '7']) list(['R', 'R', 'N'])
 list(['R', 'R']) list(['R', '7', '7', '7', '7', '7', '7', '7', '7'])
 list(['R', 'F', '7', '7']) list(['R', 'R'])
 list(['R', 'R', 'R', '7', '7', '7', '7', '7', '7'])
 list(['R', 'R', 'N', 'N', 'R', 'R']) list(['R', 'R', 'R', 'R', 'R', 'R'])
 list(['R', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F'])
 list(['R', 'R', 'R', 'R', 'R', '7']) list(['R', 'R', 'R', 'R', 'R', 'N'])
 list(['R', 'R', 'R', 'R', 'R', 'R', '7']) list(['R', 'R', 'R', 'F'])
 list(['R', 'R'])
 list(['R', 'R', 'F', 'F', 'F', 'F', 'F', 'R', 'N', 'T', 'T'])
 list(['R', 'R', 'R', 'R', 'R', 'R']) list(['R', '7']) list(['R', '7'])
 list(['R', 'R', 'R', 'R', 'R', 'R']) list(['R', 'R', 'N', 'N'])
 list(['R', 'R', 'R', 'R', 'R']) list(['R', 'R', 'R', 'R'])
 list(['R', 'N', 'F', 'F

In [65]:

# look at sticky-ness of borr_asst_plan: Freddie
# sticky


qry = \
"""
SELECT
    arraySlice(monthly.borr_asst_plan, indexOf(monthly.borr_asst_plan, 'R')) AS bap
FROM
    freddie.final
WHERE
    has(monthly.borr_asst_plan, 'R')
ORDER BY rand32(1)
LIMIT 35
"""

df6 = chu.run_query(qry, client, return_df=True)
print(np.asarray(df6['bap']))

[list(['R', 'R', 'R', 'R', 'R', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['R', 'R', 'R', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['R', 'R', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', '!', '!', '!', '!', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', 'R', 'R', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['R', 'R', 'R', 'R', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['R', 'F', 'F'])
 list(['R', 'R', '!', '!', '!', '!', '!', '!', '!', 

In [66]:

# look at sticky-ness of borr_asst_plan: Fannie
# sticky
# code '7' seems to indicate end of trial

qry = \
"""
SELECT
    arraySlice(monthly.borr_asst_plan, indexOf(monthly.borr_asst_plan, 'T')) AS bap
FROM
    fannie.final
WHERE
    has(monthly.borr_asst_plan, 'T')
ORDER BY rand32(1)
LIMIT 35
"""

df7 = chu.run_query(qry, client, return_df=True)
print(np.asarray(df7['bap']))

[list(['T', 'N', '7', '7', '7', '7', 'N', 'N', 'N'])
 list(['T', 'T', 'T', 'T', 'T'])
 list(['T', 'T', 'N', 'N', 'N', 'N', 'F', 'F', 'F']) list(['T'])
 list(['T', 'N']) list(['T', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'])
 list(['T', 'T', 'T', 'N', 'N', 'T', 'T', 'T', 'N'])
 list(['T', 'T', 'N', 'N', 'N']) list(['T']) list(['T', 'T', 'T'])
 list(['T', 'T', 'T', 'N', 'N', '7', '7', '7', '7'])
 list(['T', 'T', '7', 'N', 'N', 'N', 'N', 'N', 'N'])
 list(['T', 'N', 'N', '7']) list(['T', 'T', 'T', 'T', 'T'])
 list(['T', 'T', 'T', 'N']) list(['T', 'T', 'N', 'T', 'T'])
 list(['T', 'T', 'N', '7', 'N'])
 list(['T', 'T', 'T', 'N', 'N', '7', '7', '7'])
 list(['T', 'T', 'T', 'N', '7', '7', '7']) list(['T', 'T', 'T', '7'])
 list(['T', '7']) list(['T', 'N'])
 list(['T', 'N', 'T', 'T', 'T', 'N', '7']) list(['T', 'T', 'T', 'T'])
 list(['T', 'N', 'N', '7', '7', 'N', '7', '7', '7'])
 list(['T', 'T', 'T', '7', '7'])
 list(['T', 'T', '7', '7', '7', '7', 'N', '7', 'N'])
 list(['T', 'T', 'T', 'T', 'N', 'N', '

In [67]:

# look at sticky-ness of borr_asst_plan: Fannie
# sticky
# code '7' seems to indicate end of trial

qry = \
"""
SELECT
    arraySlice(monthly.borr_asst_plan, indexOf(monthly.borr_asst_plan, 'T')) AS bap
FROM
    freddie.final
WHERE
    has(monthly.borr_asst_plan, 'T')
ORDER BY rand32(1)
LIMIT 35
"""

df8 = chu.run_query(qry, client, return_df=True)
print(np.asarray(df8['bap']))

[list(['T', 'F', 'F', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['T', 'T', 'T', 'T', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['T', 'T', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', 'T', 'T', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', 'T', 'T', 'T', 'T', '!', '!', '!', '!', 'R', 'R', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'])
 list(['T', 'T', 'T', 'T', 'T', 'T', 'T', '!', '!', '!', '!', '!'])
 list(['T', 'T', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', 'R', 'R', '!', '!', '!', '!', '!', '!', '!', 'R', '!', '!', '!', '!', '!', '!', '!'])
 list(['T', 'T', 'T', 'T', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!'

In [68]:
# loans active 12/2020
qry = \
"""
SELECT
    has(monthly.dt, toDate('2020-12-01')) AS dec2020,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY dec2020
"""

df8b = chu.run_query(qry, client, return_df=True)
df8b.head()

Unnamed: 0,dec2020,nl
0,0,68031587
1,1,26004490


In [69]:
# loans with bap = R
qry = \
"""
SELECT
    has(monthly.borr_asst_plan, 'R') AS bap,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY bap
"""

df8c = chu.run_query(qry, client, return_df=True)
df8c.head()

Unnamed: 0,bap,nl
0,0,93877028
1,1,159049


In [70]:

# loans with bap = F: first date distribution
qry = \
"""
SELECT
    src_data,
    arrayElement(monthly.dt, indexOf(monthly.borr_asst_plan, 'F')) AS dt,
    count(*) AS nl
FROM
    unified.frannie
WHERE
    has(monthly.borr_asst_plan, 'F')
GROUP BY src_data, dt
ORDER BY src_data, nl DESC
"""

df8d = chu.run_query(qry, client, return_df=True)
df8d.head(n=1000)

Unnamed: 0,src_data,dt,nl
0,fannie,2020-04-01,647614
1,fannie,2020-05-01,161401
2,fannie,2020-06-01,48741
3,fannie,2020-07-01,34086
4,fannie,2020-08-01,26188
5,fannie,2020-09-01,22855
6,fannie,2020-03-01,21363
7,fannie,2020-12-01,19278
8,fannie,2020-10-01,18554
9,fannie,2020-11-01,16413


In [71]:
# loans with bap = F
qry = \
"""
SELECT
    has(monthly.borr_asst_plan, 'F') AS bap,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY bap
"""

df8b = chu.run_query(qry, client, return_df=True)
df8b.head()

Unnamed: 0,bap,nl
0,0,92202937
1,1,1833140


In [72]:
# loans with bap = T
qry = \
"""
SELECT
    has(monthly.borr_asst_plan, 'T') AS bap,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY bap
"""

df8a = chu.run_query(qry, client, return_df=True)
df8a.head()

Unnamed: 0,bap,nl
0,0,93691931
1,1,344146


In [73]:

# look at relations between borr_asst_plan=F and max prior dq

qry = \
"""
SELECT
  src_data,
  prior_dq > 6 ? 6 : prior_dq AS prior_dq,
  count(*) AS nl
FROM (
    SELECT
        src_data,
        arrayMax(arraySlice(monthly.months_dq, 1, indexOf(monthly.borr_asst_plan, 'F'))) AS prior_dq
    FROM
        unified.frannie
    WHERE
        has(monthly.borr_asst_plan, 'F'))
GROUP BY src_data, prior_dq
ORDER BY src_data, prior_dq
"""

df9 = chu.run_query(qry, client, return_df=True)
df9.head(n=1000)

Unnamed: 0,src_data,prior_dq,nl
0,fannie,0,389598
1,fannie,1,459036
2,fannie,2,65115
3,fannie,3,22216
4,fannie,4,13521
5,fannie,5,12748
6,fannie,6,54371
7,freddie:non-standard,0,3210
8,freddie:non-standard,1,27499
9,freddie:non-standard,2,7696


In [74]:

# look at relations between borr_asst_plan=R and max prior dq

qry = \
"""
SELECT
  src_data,
  prior_dq > 6 ? 6 : prior_dq AS prior_dq,
  count(*) AS nl
FROM (
    SELECT
        src_data,
        arrayMax(arraySlice(monthly.months_dq, 1, indexOf(monthly.borr_asst_plan, 'R'))) AS prior_dq
    FROM
        unified.frannie
    WHERE
        has(monthly.borr_asst_plan, 'R'))
GROUP BY src_data, prior_dq
ORDER BY src_data, prior_dq
"""

df9 = chu.run_query(qry, client, return_df=True)
df9.head(n=1000)

Unnamed: 0,src_data,prior_dq,nl
0,fannie,0,638
1,fannie,1,3370
2,fannie,2,5087
3,fannie,3,3896
4,fannie,4,1061
5,fannie,5,1055
6,fannie,6,3455
7,freddie:non-standard,0,19
8,freddie:non-standard,1,2872
9,freddie:non-standard,2,4829


In [75]:
# of months bap=F distribution

qry = \
"""
SELECT
  avg(num_months)
FROM (
    SELECT
        arrayElement(monthly.borr_asst_plan, length(monthly.borr_asst_plan)) AS last_element,
        countEqual(monthly.borr_asst_plan, 'F') AS num_months
    FROM
        unified.frannie
    WHERE
        has(monthly.borr_asst_plan, 'F')
        AND last_element != 'F')
"""

df9a = chu.run_query(qry, client, return_df=True)
df9a.head(n=1000)


Unnamed: 0,avg_num_months_
0,3.872851


In [76]:
# of months bap=F distribution

qry = \
"""
SELECT
  num_months,
  count(*) AS num_loans
FROM (
    SELECT
        arrayElement(monthly.borr_asst_plan, length(monthly.borr_asst_plan)) AS last_element,
        countEqual(monthly.borr_asst_plan, 'F') AS num_months
    FROM
        unified.frannie
    WHERE
        has(monthly.borr_asst_plan, 'F')
        AND last_element != 'F')
GROUP BY num_months
ORDER BY num_months
"""

df9a = chu.run_query(qry, client, return_df=True)
df9a.head(n=1000)


Unnamed: 0,num_months,num_loans
0,1,138364
1,2,178454
2,3,226399
3,4,169206
4,5,132913
5,6,130924
6,7,75306
7,8,38321
8,9,4190
9,10,2179


In [77]:
# of months bap=R distribution

qry = \
"""
SELECT
  num_months,
  count(*) AS num_loans
FROM (
    SELECT
        arrayElement(monthly.borr_asst_plan, length(monthly.borr_asst_plan)) AS last_element,
        countEqual(monthly.borr_asst_plan, 'R') AS num_months
    FROM
        unified.frannie
    WHERE
        has(monthly.borr_asst_plan, 'R')
        AND last_element != 'R')
GROUP BY num_months
ORDER BY num_months
"""

df9a = chu.run_query(qry, client, return_df=True)
df9a.head(n=1000)


Unnamed: 0,num_months,num_loans
0,1,39228
1,2,25574
2,3,19281
3,4,15455
4,5,15901
5,6,11138
6,7,5652
7,8,3811
8,9,2856
9,10,2361


In [78]:
# of months bap=R distribution

qry = \
"""
SELECT
  num_months,
  count(*) AS num_loans
FROM (
    SELECT
        arrayElement(monthly.borr_asst_plan, length(monthly.borr_asst_plan)) AS last_element,
        countEqual(monthly.borr_asst_plan, 'T') AS num_months
    FROM
        unified.frannie
    WHERE
        has(monthly.borr_asst_plan, 'T')
        AND last_element != 'T')
GROUP BY num_months
ORDER BY num_months
"""

df9a = chu.run_query(qry, client, return_df=True)
df9a.head(n=1000)



Unnamed: 0,num_months,num_loans
0,1,36330
1,2,52697
2,3,113712
3,4,46518
4,5,22781
5,6,16199
6,7,10603
7,8,7142
8,9,4581
9,10,3050


Conclusions on borr_asst_plan:

- Flags are sticky
- Can have multiple episodes of plans
- Some but not of overlap with mods
- F: most were had at most months_dq=1
- R: most were 1+ months_dq prior, mostly used by Freddie
- Tend to be short term in nature

In [79]:

# look at relations between mod_flg and interest reductions
qry = \
"""
SELECT
    src_data,
    msf,
    COUNT(*) AS ln
FROM (
    SELECT
        ln_id,
        src_data,
        ln_amort_cd,
        arrayMap((ir, zb) -> IF(zb='!', ln_orig_ir - ir, 0.0), monthly.ir, monthly.zb_cd) AS delta_ir,
        arrayMax(delta_ir) AS md,
        arrayFirstIndex(x-> IF(x > 0.25, 1, 0), delta_ir) AS fbig,
        arrayElement(monthly.mod_sticky_flg, fbig) AS msf
    FROM
        unified.frannie
    WHERE
        md > 0.25
        AND ln_amort_cd = 'FRM')
GROUP BY src_data, msf
ORDER BY src_data, msf
"""

dfa = chu.run_query(qry, client, return_df=True)
dfa.head(n=1000)

Unnamed: 0,src_data,msf,ln
0,fannie,N,6885
1,fannie,Y,367806
2,freddie:non-standard,N,12971
3,freddie:non-standard,Y,134208
4,freddie:standard,N,14897
5,freddie:standard,Y,256644


In [80]:
# look at relations between mod_flg and interest reductions
qry = \
"""
SELECT
    src_data,
    sum(IF(max_delta_ir > 0.25, 1, 0)) AS ir_reduction,
    count(*) AS ln_mods,
    sum(IF(max_delta_ir > 0.25, 1, 0)) / count(*) AS ir_reduction_rate
FROM (
    SELECT
        ln_id,
        src_data,
        ln_amort_cd,
        arrayMap((ir, zb) -> IF(zb='!', ln_orig_ir - ir, 0.0), monthly.ir, monthly.zb_cd) AS delta_ir,
        arrayMax(delta_ir) AS max_delta_ir
    FROM
        unified.frannie
    WHERE
        has(monthly.mod_sticky_flg, 'Y')
        AND ln_amort_cd = 'FRM')
GROUP BY src_data
ORDER BY src_data
"""

dfb = chu.run_query(qry, client, return_df=True)
dfb.head(n=1000)

Unnamed: 0,src_data,ir_reduction,ln_mods,ir_reduction_rate
0,fannie,368559,593724,0.620758
1,freddie:non-standard,140750,174341,0.807326
2,freddie:standard,270713,434767,0.622662


Almost all ir reductions are marked as mods
mods involve ir reduction about 60% of the time

freddie/fannie comparisons

In [81]:
# ln_purp_cd
qry = \
"""
SELECT
    src_data,
    ln_purp_cd,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY src_data, ln_purp_cd
ORDER BY src_data, ln_purp_cd
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

dfx = dfx1.groupby('src_data')['nl'].sum()
dfx.rename('tots')
dfx.name='tot'
dfx1 = dfx1.merge(dfx, on='src_data')
dfx1['distr'] = 100.0 * dfx1['nl'] / dfx1['tot']
dfx1[['src_data', 'ln_purp_cd', 'nl', 'distr']].head(n=100)

Unnamed: 0,src_data,ln_purp_cd,nl,distr
0,fannie,C,13563437,27.72044
1,fannie,P,17538824,35.845186
2,fannie,R,17812585,36.404688
3,fannie,U,14525,0.029686
4,freddie:non-standard,C,1284985,19.565635
5,freddie:non-standard,P,3454285,52.596162
6,freddie:non-standard,R,1819708,27.707516
7,freddie:non-standard,U,8583,0.130688
8,freddie:standard,C,9687698,25.137294
9,freddie:standard,P,12879922,33.420363


In [82]:
# ln_hrprog_flg
qry = \
"""
SELECT
    src_data,
    ln_hrprog_flg,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY src_data, ln_hrprog_flg
ORDER BY src_data, ln_hrprog_flg
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

dfx = dfx1.groupby('src_data')['nl'].sum()
dfx.rename('tots')
dfx.name='tot'
dfx1 = dfx1.merge(dfx, on='src_data')
dfx1['distr'] = 100.0 * dfx1['nl'] / dfx1['tot']
dfx1[['src_data', 'ln_hrprog_flg', 'nl', 'distr']].head(n=100)

Unnamed: 0,src_data,ln_hrprog_flg,nl,distr
0,fannie,!,37651429,76.950568
1,fannie,N,10688365,21.844477
2,fannie,Y,589577,1.204955
3,freddie:non-standard,H,152004,2.314467
4,freddie:non-standard,N,6415557,97.685533
5,freddie:standard,H,544867,1.413801
6,freddie:standard,N,37994278,98.586199


In [83]:

# prop_type_cd
qry = \
"""
SELECT
    src_data,
    prop_type_cd,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY src_data, prop_type_cd
ORDER BY src_data, prop_type_cd
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

dfx = dfx1.groupby('src_data')['nl'].sum()
dfx.rename('tots')
dfx.name='tot'
dfx1 = dfx1.merge(dfx, on='src_data')
dfx1['distr'] = 100.0 * dfx1['nl'] / dfx1['tot']
dfx1[['src_data', 'prop_type_cd', 'nl', 'distr']].head(n=100)

Unnamed: 0,src_data,prop_type_cd,nl,distr
0,fannie,CO,3989227,8.153031
1,fannie,CP,216089,0.441635
2,fannie,MH,337740,0.69026
3,fannie,PU,9196313,18.795077
4,fannie,SF,35190002,71.919997
5,freddie:non-standard,!,69,0.001051
6,freddie:non-standard,CO,754873,11.493963
7,freddie:non-standard,CP,13501,0.205571
8,freddie:non-standard,MH,31420,0.478412
9,freddie:non-standard,PU,988537,15.051813


In [84]:

# prop_occ_cd
qry = \
"""
SELECT
    src_data,
    prop_occ_cd,
    count(*) AS nl
FROM
    unified.frannie
GROUP BY src_data, prop_occ_cd
ORDER BY src_data, prop_occ_cd
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

dfx = dfx1.groupby('src_data')['nl'].sum()
dfx.rename('tots')
dfx.name='tot'
dfx1 = dfx1.merge(dfx, on='src_data')
dfx1['distr'] = 100.0 * dfx1['nl'] / dfx1['tot']
dfx1[['src_data', 'prop_occ_cd', 'nl', 'distr']].head(n=100)

Unnamed: 0,src_data,prop_occ_cd,nl,distr
0,fannie,I,3330499,6.806748
1,fannie,P,43671118,89.253381
2,fannie,S,1927600,3.939556
3,fannie,U,154,0.000315
4,freddie:non-standard,I,329580,5.018301
5,freddie:non-standard,P,5903110,89.882835
6,freddie:non-standard,S,326316,4.968602
7,freddie:non-standard,U,8555,0.130261
8,freddie:standard,I,2129497,5.525543
9,freddie:standard,P,34957733,90.70708


In [85]:


# ln_orig_ltv
qry = \
"""
SELECT
    src_data,
    min(ln_orig_ltv),
    avg(ln_orig_ltv),
    max(ln_orig_ltv)
FROM
    unified.frannie
WHERE
    ln_orig_ltv > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_ln_orig_ltv_,avg_ln_orig_ltv_,max_ln_orig_ltv_
0,fannie,1,71.211866,999
1,freddie:non-standard,5,76.806368,145
2,freddie:standard,1,70.777542,809


In [86]:


# ln_orig_cltv
qry = \
"""
SELECT
    src_data,
    min(ln_orig_cltv),
    avg(ln_orig_cltv),
    max(ln_orig_cltv)
FROM
    unified.frannie
WHERE
    ln_orig_cltv > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_ln_orig_cltv_,avg_ln_orig_cltv_,max_ln_orig_cltv_
0,fannie,1,72.034021,999
1,freddie:non-standard,5,78.502685,401
2,freddie:standard,1,72.000797,899


In [87]:


# borr_orig_fico
qry = \
"""
SELECT
    src_data,
    min(borr_orig_fico),
    avg(borr_orig_fico),
    max(borr_orig_fico)
FROM
    unified.frannie
WHERE
    borr_orig_fico > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_borr_orig_fico_,avg_borr_orig_fico_,max_borr_orig_fico_
0,fannie,300,745.126012,850
1,freddie:non-standard,300,720.790632,850
2,freddie:standard,300,741.210257,850


In [88]:
# borr_dti
qry = \
"""
SELECT
    src_data,
    min(borr_dti),
    avg(borr_dti),
    max(borr_dti)
FROM
    unified.frannie
WHERE
    borr_dti > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_borr_dti_,avg_borr_dti_,max_borr_dti_
0,fannie,1,33.504046,64
1,freddie:non-standard,1,34.439928,65
2,freddie:standard,1,33.015878,65


In [89]:
# ln_orig_prin
qry = \
"""
SELECT
    src_data,
    min(ln_orig_prin),
    avg(ln_orig_prin),
    max(ln_orig_prin)
FROM
    unified.frannie
WHERE
    ln_orig_prin > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_ln_orig_prin_,avg_ln_orig_prin_,max_ln_orig_prin_
0,fannie,1000.0,202958.224519,1473000.0
1,freddie:non-standard,1000.0,187646.641729,1387000.0
2,freddie:standard,1000.0,192674.690059,1473000.0


In [90]:
# ln_orig_ir
qry = \
"""
SELECT
    src_data,
    min(ln_orig_ir),
    avg(ln_orig_ir),
    max(ln_orig_ir)
FROM
    unified.frannie
WHERE
    ln_orig_ir > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_ln_orig_ir_,avg_ln_orig_ir_,max_ln_orig_ir_
0,fannie,1.75,4.965874,16.5
1,freddie:non-standard,0.5,6.015233,14.05
2,freddie:standard,1.75,5.091005,13.95


In [91]:
# ln_mi_pct
qry = \
"""
SELECT
    src_data,
    min(ln_mi_pct),
    avg(ln_mi_pct),
    max(ln_mi_pct)
FROM
    unified.frannie
WHERE
    ln_mi_pct > 0
GROUP BY src_data
ORDER BY src_data
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,min_ln_mi_pct_,avg_ln_mi_pct_,max_ln_mi_pct_
0,fannie,0.12,24.026512,65.0
1,freddie:non-standard,1.0,25.300741,53.0
2,freddie:standard,1.0,24.069388,54.0


In [92]:

# ln_defrl_amt
qry = \
"""
SELECT
    src_data,
    m.mod_sticky_flg AS msf,
    count(*) as nl
FROM
    unified.frannie ARRAY JOIN monthly AS m
WHERE
    m.defrl_amt > 0
GROUP BY src_data, msf
ORDER BY src_data, msf
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,msf,nl
0,fannie,N,761481
1,fannie,Y,34997
2,freddie:non-standard,N,179113
3,freddie:non-standard,Y,4979734
4,freddie:standard,N,1242125
5,freddie:standard,Y,4896033


In [93]:

# mod flg when defrl_amt is first > 0
qry = \
"""
SELECT
    src_data,
    msf,
    count(*) AS nl
FROM (
SELECT
    src_data,
    arrayFirstIndex(x -> IF(x > 0, 1, 0), monthly.defrl_amt) AS first_def,
    arrayElement(monthly.mod_sticky_flg, first_def) AS msf,
    arrayElement(monthly.borr_asst_plan, first_def) AS bap
FROM
    unified.frannie
WHERE first_def > 0)
GROUP BY src_data, msf
ORDER BY src_data, msf

"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,msf,nl
0,fannie,N,200738
1,fannie,Y,10062
2,freddie:non-standard,N,67254
3,freddie:non-standard,Y,20987
4,freddie:standard,N,240845
5,freddie:standard,Y,27201


In [94]:

# bap when defrl_amt is first > 0
qry = \
"""
SELECT
    src_data,
    bap,
    count(*) AS nl
FROM (
SELECT
    src_data,
    arrayFirstIndex(x -> IF(x > 0, 1, 0), monthly.defrl_amt) AS first_def,
    arrayElement(monthly.mod_sticky_flg, first_def) AS msf,
    arrayElement(monthly.borr_asst_plan, first_def) AS bap
FROM
    unified.frannie
WHERE first_def > 0)
GROUP BY src_data, bap
ORDER BY src_data, bap

"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,bap,nl
0,fannie,7,139228
1,fannie,F,54689
2,fannie,N,16708
3,fannie,R,109
4,fannie,T,66
5,freddie:non-standard,!,74805
6,freddie:non-standard,F,9268
7,freddie:non-standard,R,81
8,freddie:non-standard,T,4087
9,freddie:standard,!,113730


In [95]:

# ln_defrl_amt
qry = \
"""
SELECT
    src_data,
    m.dt AS dt,
    count(*) as nl
FROM
    unified.frannie ARRAY JOIN monthly AS m
WHERE
    m.defrl_amt > 0
GROUP BY src_data, dt
ORDER BY src_data, nl DESC
"""

dfx1 = chu.run_query(qry, client, return_df=True)
dfx1.head(n=1000)

Unnamed: 0,src_data,dt,nl
0,fannie,2020-12-01,196148
1,fannie,2020-11-01,180986
2,fannie,2020-10-01,159728
3,fannie,2020-09-01,119703
4,fannie,2020-08-01,84579
5,fannie,2020-07-01,55334
6,freddie:non-standard,2015-08-01,58671
7,freddie:non-standard,2015-07-01,58657
8,freddie:non-standard,2015-09-01,58652
9,freddie:non-standard,2015-06-01,58642


In [96]:

# ln_defrl_amt
qry = \
"""
SELECT
    src_data,
    arrayMax(monthly.defrl_amt) AS da,
    monthly.borr_asst_plan AS bap,
    monthly.mod_sticky_flg AS msf,
    monthly.defrl_amt AS mad,
    monthly.dt AS dt

FROM
    unified.frannie
WHERE
    da > 0
ORDER BY rand32()
LIMIT 10
"""

dfx1 = chu.run_query(qry, client, return_df=True)
print(np.asarray(dfx1['mad']))
print(np.asarray(dfx1['msf']))
print(np.asarray(dfx1['dt']))


[list([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1924.43994140625, 1924.43994140625, 1924.43994140625, 1924.43994140625, 1924.43994140625, 1924.43994140625, 1924.43994140625])
 list([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 29059.830078125])
 list([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1842.8499755859375, 1842.8499755859375, 1842.8499755859375, 1842.8499755859375, 1842.8499755859375])
 list([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7832.68017578125, 7832.68017578125, 7832.68017578125, 7832.68017578125, 7832.68017578125])
 list([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0