/
stochasticDescent.m
40 lines (34 loc) · 1.33 KB
/
stochasticDescent.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
function [ theta, J ] = stochasticDescent(X, y, theta, alpha, num_iters)
%Alpha should start out around 0.01, shouldn't be higher than 0.3, and
%monotonically decrease (with 1/x?) with num_iters. Num_iters can be anywhere from 10 to
%>1500 depending on how big the data set X is. If X is say >100, then
%num_iters can be somewhat small. Just as long as (num_iters*m) is in the
%thousands, then you're good. You should see J decrease to below 0.1 pretty
%readily. If you truly have a massive dataset, then don't "computeCost" on
%every step, take a snapshot every 10,000 steps or so.
m = size(X, 1);
n = size(X, 2);
window = 100;
J = [];
Jtemp = zeros(window, 1);
theta = zeros(n, 1);
tempAlpha = alpha;
%shuffle data
count = 0;
data = [y, X];
data = data(randperm(size(data,1)),:);
y = data(:,1);
X = data(:,2:end);
for r = 1:num_iters %repeat
for i = 1:m %for every example in X
gradient = (X(i, :)*theta - y(i))*X(i, :); %specific for lin reg
theta = theta - (tempAlpha*gradient)';
Jtemp(mod(i, window)) = computeCost(X, y, theta);
if (mod(count, window) == 0) %report average cost every 1000 calcs
J = [J mean(Jtemp(Jtemp~=0))]; %dont include zeros in average!
Jtemp = zeros(window, 1);
end
tempAlpha = (alpha)/r; %gradually reduce alpha
count = count+1;
end
end