-
Notifications
You must be signed in to change notification settings - Fork 0
/
dq.gom
150 lines (115 loc) · 5.1 KB
/
dq.gom
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
title: DQ model
outDir: /home/will/goMortgage/dq
buildData: no
buildModel: yes
assessModel: yes
// ************* buildData keys *************
// fields to stratify on at pass 1
strats1: state, aoDt, aoAge, aoDqCap6
// target # of rows in output of pass 1
sampleSize1: 30000000
// at the as-of date, we restrict to active loans with a max dq of 24 months and balance greater than $10k
where1: aoAge >= 0 AND aoDq >= 0 AND aoDq <= 24 AND aoUpb > 10000 AND mon.zb='00'
// fields to stratify on at pass 2
strats2: fcstMonth, trgDt
// target # of rows in output of pass 2
sampleSize2: 3000000
// at the target date, we restrict to dates after the as-of date which are active.
where2: trgZb='00' AND fcstMonth > 0
// fannie mortgage data created by https://pkg.go.dev/github.com/invertedv/fannie
mtgDb: mtg.fannie
// keyword specifies the source of the data
mtgFields: fannie
// non-loan data created by https://pkg.go.dev/github.com/invertedv/assemble
econDb: econGo.final
// the fannie data specifies geo location at a zip3 level
econFields: zip3
// outputs
pass1Strat: tmp.stratDq1
pass1Sample: tmp.sampleDq1
pass2Strat: tmp.stratDq2
pass2Sample: tmp.sampleDq2
// final table
outTable: tmp.modelDq
// key for final table
tableKey: lnId
// ************* buildModel keys *************
// targetDq is an int32 field that takes on values 0,..,13 (current, 1-11 months DQ, 12+ months DQ).
target: targetDq
// We treat targetDq as categorical - which will build a model with a softmax output layer
targetType: cat
// one-hot features. Note, it's fine for this to take up multple lines
cat: purpose, propType, occ, amType, standard, nsDoc, nsUw, coBorr, hasSecond, aoPrior30, aoPrior60,
aoPrior90p, harp, aoMod, channel, covid, trgFcType, trgDqMax
// Continuous features. Note, these will automatically be normalized.
cts: fico, aoAge, term, y20PropVal, units, dti, trgUnempRate, trgEltv,
trgRefiIncentive, trgLbrGrowth, orgSpread
// Embedded features. The embedding dimension is in the braces.
emb: aoDq{5}; state {4}; fcstMonth{2}; trgAge{2}
// this is the model we're fitting.
layer1: FC(size:40, activation:relu)
layer2: FC(size:20, activation:relu)
layer3: FC(size:10, activation:relu)
layer4: FC(size:13, activation:softmax)
// Other specifications for the model build.
batchSize: 5000
epochs: 2000
earlyStopping: 40
learningRateStart: .0003
learningRateEnd: .0001
// This query pulls the data for fitting the coefficients. The %s will be replaced with the fields we need.
// The bucket field is a hash of the loan number.
modelQuery: SELECT %s FROM tmp.modelDq WHERE bucket < 10
// This query pulls the data for determining early stopping.
validateQuery: SELECT %s FROM tmp.modelDq WHERE bucket in (10,11,12,13,14)
// ************* assessModel keys *************
// This query pulls the data for the assessments.
assessQuery: SELECT %s FROM tmp.modelDq WHERE bucket in (15,16,17,18,19)
// save Assess Data + model output.
// The table will consist of all the fields used during the run plus any set of model outputs you specify.
saveTable: tmp.outDq
// We're saving five fields from the model output.
saveTableTargets: d120p{4,5,6,7,8,9,10,11,12}; d90{3}; d60{2}; d30{1}; current{0}
// Features not in the model we wish to keep and treat as categorical. These can be used in the assessment.
addlCat: aoMaxDq12, vintage, aoDqCap6, numBorr
// Features to keep, either for assessment or to add to the output table.
addlKeep: lnId, aoDt, trgDt
// Additional fields for the assessment.
assessAddl: aoIncome50, ltv, aoMaxDq12, trgUpbExp, state, aoIncome90, msaLoc,
aoPropVal, trgPropVal, vintage
// Run a by-feature assessment that is sliced by aoDqCap6 on the binary output that coalesces targetDq into two groups
// of (0,1,2,3) and (4,5,6,7,8,9,10,11,12). aoDqCap6 is the delinquency status at the as-of date where the
// delinquency levels are 0 trough 5 and 6+ months.
assessNameaoDq: DQ 4+ Months
assessTargetaoDq: 4,5,6,7,8,9,10,11,12
assessSliceraoDq: aoDqCap6
// Run another assessment that is sliced by occupancy.
assessNameOcc: DQ 4+ Months
assessTargetOcc: 4,5,6,7,8,9,10,11,12
assessSlicerOcc: occ
// Run a by-curve assessment where the metric on the binary output that coalesces into two groups:
// 0-3 months DQ and 4+ months DQ. The graph will be two curves: model and average rate of 4+ months DQ
// where the average is over the distinct levels of trgYrQtr.
// trgYrQtr is the the Year & Quarter of the target date.
curvesNameyrQtrD120: Target Quarter, DQ 4+ Months
curvesTargetyrQtrD120: 4,5,6,7,8,9,10,11,12
curvesSliceryrQtrD120: trgYrQtr
curvesNamevintageD120: Vintage, DQ 4+ Months
curvesTargetvintageD120: 4,5,6,7,8,9,10,11,12
curvesSlicervintageD120: vintage
curvesNamefmD120: Forecast Month, DQ 4+ Months
curvesTargetfmD120: 4,5,6,7,8,9,10,11,12
curvesSlicerfmD120: fcstMonth
curvesNametrgAgeD120: Forecast Month, DQ 4+ Months
curvesTargettrgAgeD120: 4,5,6,7,8,9,10,11,12
curvesSlicertrgAgeD120: trgAge
curvesNameyrQtrD30: Target Quarter, DQ 1 Month
curvesTargetyrQtrD30: 1
curvesSliceryrQtrD30: trgYrQtr
curvesNameyrQtrCur: Target Quarter, Current
curvesTargetyrQtrCur: 0
curvesSliceryrQtrCur: trgYrQtr
// general
show: no
plotHeight: 1200
plotWidth: 1600