scripts/prepayScore.gom

title: Prepay Scoring Model
outDir: /home/will/goMortgage/prepay
buildData: yes
buildModel: yes
assessModel: yes

// ************* buildData keys *************

// fields to stratify on at pass 1
strats1: state, aoDt, aoAge, aoDqCap6
// target # of rows in output of pass 1
sampleSize1: 30000000
// at the as-of date, we restrict to active loans with a max dq of 24 months and balance greater than $10k
where1: aoAge >= 0 AND aoDq >= 0 AND aoDq <= 24 AND aoUpb > 10000 AND mon.zb='00'

// fields to stratify on at pass 2
strats2: trgDt
// target # of rows in output of pass 2
sampleSize2: 3000000
// restrict to loans that are either (a) still active at end of window; (b) prepay or (c) default
where2: trgZb in ('00', '01', '03', '09')
// Assess performance over the 24 months after the as-of date
window: 24

// fannie mortgage data created by https://pkg.go.dev/github.com/invertedv/fannie
mtgDb: mtg.fannie

// keyword specifies the source of the data
mtgFields: fannie

// non-loan data created by https://pkg.go.dev/github.com/invertedv/assemble
econDb: econGo.final

// the fannie data specifies geo location at a zip3 level
econFields: zip3

// outputs
pass1Strat: tmp.stratPp1
pass1Sample: tmp.samplePp1
pass2Strat: tmp.stratPp2
pass2Sample: tmp.samplePp2
// final table
outTable: tmp.modelPp
// key for final table
tableKey: lnId

// ************* buildModel keys *************

// targetPp is an int32 field that takes on values 0, 1 -- 1 means prepayed in 24 month window after aoDt
target: targetPp
// We treat targetpp as categorical - which will build a model with a softmax output layer
targetType: cat

// one-hot features.  Note, it's fine for this to take up multple lines
cat: purpose, propType, occ, amType, standard, nsDoc, nsUw, coBorr, hasSecond, aoPrior30, aoPrior60,
  aoPrior90p, harp, aoMod, aoBap, channel, covid, trgFcType, trgDqMax

// Continuous features.  Note, these will automatically be normalized.
cts: fico, aoAge, term, y20PropVal, units, dti, trgUnempRate, trgEltv,
  trgRefiIncentive, trgLbrGrowth, orgSpread

// Embedded features.  The embedding dimension is in the braces.
emb:  aoDq{5}; state {4}; trgAge{2}

// this is the model we're fitting.
layer1: FC(size:40, activation:relu)
layer2: FC(size:20, activation:relu)
layer3: FC(size:10, activation:relu)
layer4: FC(size:2, activation:softmax)

// Other specifications for the model build.
batchSize: 5000
epochs: 2000
earlyStopping: 40
learningRateStart: .0003
learningRateEnd: .0001

// This query pulls the data for fitting the coefficients.  The %s will be replaced with the fields we need.
// The bucket field is a hash of the loan number.
modelQuery: SELECT %s FROM tmp.modelPp WHERE bucket < 10

// This query pulls the data for determining early stopping.
validateQuery: SELECT %s FROM tmp.modelPp WHERE bucket in (10,11,12,13,14)

// ************* assessModel keys *************

// This query pulls the data for the assessments.
assessQuery: SELECT %s FROM tmp.modelPp WHERE bucket in (15,16,17,18,19)

// save Assess Data + model output.
// The table will consist of all the fields used during the run plus any set of model outputs you specify.
saveTable: tmp.outPp

// We're saving five fields from the model output.
saveTableTargets: prepay{1}

// Features not in the model we wish to keep and treat as categorical.  These can be used in the assessment.
addlCat: aoMaxDq12, vintage, aoDqCap6, numBorr

// Features to keep, either for assessment or to add to the output table.
addlKeep: lnId, aoDt, trgDt

// Additional fields for the assessment.
assessAddl: aoIncome50, ltv, aoMaxDq12, trgUpbExp, state, aoIncome90, msaLoc,
  aoPropVal, trgPropVal, vintage

// Run a by-feature assessment that is sliced by aoDqCap6
assessNameaoDq: Prepay
assessTargetaoDq: 1
assessSliceraoDq: aoDqCap6

// Run another assessment that is sliced by occupancy.
assessNameOcc: Prepay
assessTargetOcc: 1
assessSlicerOcc: occ

// Run a by-curve assessments
curvesNameyrQtr: Target Quarter, Prepay
curvesTargetyrQtr: 1
curvesSliceryrQtr: trgYrQtr

curvesNamevintage: Vintage, Prepay
curvesTargetvintage: 1
curvesSlicervintage: vintage

curvesNametrgAge: Forecast Month, Prepay
curvesTargettrgAge: 1
curvesSlicertrgAge: trgAge

// general
show: no
plotHeight: 1200
plotWidth: 1600