/
gradients_risk.jl
163 lines (140 loc) · 6.52 KB
/
gradients_risk.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
∇riskαᵢ(
model::CPM, data::StudentResponse, αMatrix,
QMatrix, RMatrix, θ, temperature
)
Computes the gradient of the empirical risk of a given student for a particular skill profile
### Arguments
- `model`: the Carla Probability Model
- `data`: `StudentResponse` of the examinee
- `data.itemResponse`: J (No. of items) × T (No. of Time points)
- `data.missingindicator`: J (No. of items) × T (No. of Time points)
- `αMatrix`: K (No. of Skills) × T (No. of Timepoints) Matrix
- `QMatrix`: J (No. of Items) × K (No. of Skills) Matrix
- `RMatrix`: RMatrix specifies how skills are temporally connected
- `θ`: Parameter values, A named tuple θ = (β, δ0, δ)
- `temperature`: temperature parameter for importance sampling
## Output
2J when estimatebeta = 1 and estimatedelta=0 for T = 1 \\
2J+K when estimatebeta =1 and estimatedelta = 1 for T=1 \\
2J when estimatebeta = 1 and estimatedelta = 0 for T > 1 \\
2J + 2K + K when estimatebeta = 1 and estimatedelta = 1 for T > 1
"""
function ∇riskαᵢ(
model::CPM, data::StudentResponse, αMatrix,
QMatrix, RMatrix, θ, temperature)
β, δ0, δ = θ.β, θ.δ0, θ.δ
x, obsloc = data.itemResponse, data.missingindicator
nrskills, nrtimepoints = size(αMatrix)
_, nritems = size(obsloc')
estimateδ = model.opts.estimatedelta
estimateβ = model.opts.estimatebeta
gradcomplete = []
if estimateβ
# Compute the gradient of -log emission probabilities (time-invariant case)
for itemID = 1:nritems
Σemissiongrad = [0,0]
for t = 1:nrtimepoints
psivec = emission_responsevec(model.emissionprob, QMatrix,
itemID,t, αMatrix)
if Bool(obsloc[itemID,t])
pemiss = local_emission(model.emissionprob, QMatrix,
itemID, t, αMatrix, β[itemID])
emissgrad = -(x[itemID,t] - pemiss) * psivec
else
emissgrad = 0 * psivec
end
Σemissiongrad = Σemissiongrad + emissgrad
end # end of time loop
gradcomplete = [gradcomplete; Σemissiongrad]
end # end of itemID loop
end
if estimateδ
# Compute gradient of -log initial transition probabilities (time-invariant case)
t = 1
for skillID = 1:nrskills
phivec = transition_responsevec(model.transitionprob, RMatrix,
skillID,t,αMatrix)
pt = local_transition(model.transitionprob, RMatrix,
skillID, t, αMatrix, δ0[skillID], δ[t], temperature)
gradsubvec = -(αMatrix[skillID,t] -pt)
gradcomplete = [gradcomplete; gradsubvec]
end # end of skillID loop
if nrtimepoints != 1
for skillID = 1:nrskills
Σtransgrad = [0,1]
for t = 2:nrtimepoints
phivec = transition_responsevec(model.transitionprob, RMatrix,
skillID,t,αMatrix)
pt = local_transition(model.transitionprob, RMatrix,
skillID, t, αMatrix, δ0[skillID], δ[t], temperature)
gradsubvec = -(αMatrix[skillID,t] - pt) * phivec
Σtransgrad = Σtransgrad + gradsubvec
end # end of time loop
gradcomplete = [gradcomplete; Σtransgrad]
end
end
end
return gradcomplete
end
export ∇riskαᵢ
"""
∇risk(model::CPM, data, QMatrix, RMatrix, θ;e_strategy::Exact)
Computes the expectation of the complete gradient of the empirical risk
computed in [`∇riskαᵢ`](@ref).
### Arguments
- `model`: the Carla Probability Model
- `data`: `StudentResponse` of the examinee
- `data.itemResponse`: J (No. of items) × T (No. of Time points)
- `data.missingindicator`: J (No. of items) × T (No. of Time points)
- `QMatrix`: J (No. of Items) × K (No. of Skills) Matrix
- `RMatrix`: RMatrix specifies how skills are temporally connected
- `θ`: Parameter values, A named tuple θ = (β, δ0, δ)
## Output
2J when estimatebeta = 1 and estimatedelta=0 for T = 1 \\
2J+K when estimatebeta =1 and estimatedelta = 1 for T=1 \\
2J when estimatebeta = 1 and estimatedelta = 0 for T > 1 \\
2J + 2K + K when estimatebeta = 1 and estimatedelta = 1 for T > 1
"""
function ∇risk(model::CPM, data, QMatrix, RMatrix, θ;e_strategy::Exact=Exact())
data = soa(data)
nrexaminees = length(data)
nrtimepoints, nritems = size(data.itemResponse[1]')
nrskills = length(θ.δ0)
temperature = 1.0
nrterms = 2^(nrskills*nrtimepoints)
all_patterns = AllPatterns(nrskills*nrtimepoints)'
θvals = (β = θ.β.val, δ0 = θ.δ0.val, δ = θ.δ.val)
paramdims = compute_paramdims(model, nritems, nrskills, nrtimepoints)
Σmisscov = 0
# Compute the gradient of the -ve normalized log-likelihood risk
gradavgmx = zeros(nrexaminees, paramdims)
for i = 1:nrexaminees
thedata = data[i]
gradcompletemx = zeros(nrterms, paramdims)
likelihoodcomplete = zeros(nrterms)
for Σᵢ = 1:nrterms
αMatrix = reshape(all_patterns[Σᵢ, :], (nrskills, nrtimepoints))
likelihoodcomplete[Σᵢ] = likelihoodcompletealpha(
model, thedata, QMatrix,
RMatrix, αMatrix, θvals, temperature
)
gradcompletemx[Σᵢ,:] = ∇riskαᵢ(model, thedata, αMatrix,
QMatrix, RMatrix, θvals, temperature)'
end
wts = likelihoodcomplete / (eps() + sum(likelihoodcomplete))
gradavg = gradcompletemx' * wts
gradavgmx[i,:] = gradavg'
weightedgradgradT = gradcompletemx' * diagm(wts)*gradcompletemx
opgmisscovi = weightedgradgradT - gradavg*gradavg'
Σmisscov = Σmisscov .+ opgmisscovi
end
gradmean = vec(mean(gradavgmx,dims=1))
opgmisscov = Σmisscov / nrexaminees
# Add the gradient of the -ve log prior probablily term
∇logθprior = all_∇logpriors(model, θ, nrtimepoints)
MLgradmean = gradmean - (∇logθprior / nrexaminees)
riskgradmx = gradavgmx .- ∇logθprior'/ nrexaminees
return MLgradmean, riskgradmx', opgmisscov
end
export ∇risk