-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepayScore.gom
133 lines (103 loc) · 4.17 KB
/
prepayScore.gom
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
title: Prepay Scoring Model
outDir: /home/will/goMortgage/prepay
buildData: yes
buildModel: yes
assessModel: yes
// ************* buildData keys *************
// fields to stratify on at pass 1
strats1: state, aoDt, aoAge, aoDqCap6
// target # of rows in output of pass 1
sampleSize1: 30000000
// at the as-of date, we restrict to active loans with a max dq of 24 months and balance greater than $10k
where1: aoAge >= 0 AND aoDq >= 0 AND aoDq <= 24 AND aoUpb > 10000 AND mon.zb='00'
// fields to stratify on at pass 2
strats2: trgDt
// target # of rows in output of pass 2
sampleSize2: 3000000
// restrict to loans that are either (a) still active at end of window; (b) prepay or (c) default
where2: trgZb in ('00', '01', '03', '09')
// Assess performance over the 24 months after the as-of date
window: 24
// fannie mortgage data created by https://pkg.go.dev/github.com/invertedv/fannie
mtgDb: mtg.fannie
// keyword specifies the source of the data
mtgFields: fannie
// non-loan data created by https://pkg.go.dev/github.com/invertedv/assemble
econDb: econGo.final
// the fannie data specifies geo location at a zip3 level
econFields: zip3
// outputs
pass1Strat: tmp.stratPp1
pass1Sample: tmp.samplePp1
pass2Strat: tmp.stratPp2
pass2Sample: tmp.samplePp2
// final table
outTable: tmp.modelPp
// key for final table
tableKey: lnId
// ************* buildModel keys *************
// targetPp is an int32 field that takes on values 0, 1 -- 1 means prepayed in 24 month window after aoDt
target: targetPp
// We treat targetpp as categorical - which will build a model with a softmax output layer
targetType: cat
// one-hot features. Note, it's fine for this to take up multple lines
cat: purpose, propType, occ, amType, standard, nsDoc, nsUw, coBorr, hasSecond, aoPrior30, aoPrior60,
aoPrior90p, harp, aoMod, aoBap, channel, covid, trgFcType, trgDqMax
// Continuous features. Note, these will automatically be normalized.
cts: fico, aoAge, term, y20PropVal, units, dti, trgUnempRate, trgEltv,
trgRefiIncentive, trgLbrGrowth, orgSpread
// Embedded features. The embedding dimension is in the braces.
emb: aoDq{5}; state {4}; trgAge{2}
// this is the model we're fitting.
layer1: FC(size:40, activation:relu)
layer2: FC(size:20, activation:relu)
layer3: FC(size:10, activation:relu)
layer4: FC(size:2, activation:softmax)
// Other specifications for the model build.
batchSize: 5000
epochs: 2000
earlyStopping: 40
learningRateStart: .0003
learningRateEnd: .0001
// This query pulls the data for fitting the coefficients. The %s will be replaced with the fields we need.
// The bucket field is a hash of the loan number.
modelQuery: SELECT %s FROM tmp.modelPp WHERE bucket < 10
// This query pulls the data for determining early stopping.
validateQuery: SELECT %s FROM tmp.modelPp WHERE bucket in (10,11,12,13,14)
// ************* assessModel keys *************
// This query pulls the data for the assessments.
assessQuery: SELECT %s FROM tmp.modelPp WHERE bucket in (15,16,17,18,19)
// save Assess Data + model output.
// The table will consist of all the fields used during the run plus any set of model outputs you specify.
saveTable: tmp.outPp
// We're saving five fields from the model output.
saveTableTargets: prepay{1}
// Features not in the model we wish to keep and treat as categorical. These can be used in the assessment.
addlCat: aoMaxDq12, vintage, aoDqCap6, numBorr
// Features to keep, either for assessment or to add to the output table.
addlKeep: lnId, aoDt, trgDt
// Additional fields for the assessment.
assessAddl: aoIncome50, ltv, aoMaxDq12, trgUpbExp, state, aoIncome90, msaLoc,
aoPropVal, trgPropVal, vintage
// Run a by-feature assessment that is sliced by aoDqCap6
assessNameaoDq: Prepay
assessTargetaoDq: 1
assessSliceraoDq: aoDqCap6
// Run another assessment that is sliced by occupancy.
assessNameOcc: Prepay
assessTargetOcc: 1
assessSlicerOcc: occ
// Run a by-curve assessments
curvesNameyrQtr: Target Quarter, Prepay
curvesTargetyrQtr: 1
curvesSliceryrQtr: trgYrQtr
curvesNamevintage: Vintage, Prepay
curvesTargetvintage: 1
curvesSlicervintage: vintage
curvesNametrgAge: Forecast Month, Prepay
curvesTargettrgAge: 1
curvesSlicertrgAge: trgAge
// general
show: no
plotHeight: 1200
plotWidth: 1600