Skip to content

Commit

Permalink
updated tex(s)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmeoli committed Mar 13, 2021
1 parent 90bc05d commit 3cea780
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 21 deletions.
Binary file modified notebooks/ml/tex/CM_report.pdf
Binary file not shown.
20 changes: 11 additions & 9 deletions notebooks/ml/tex/adagrad.tex
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,20 @@ \section{AdaGrad}
\begin{algorithm}[h!]
\caption{AdaGrad}
\label{alg:adagrad}
\begin{algorithmic}
\begin{algorithmic}[1]
\Require{Function $\mathcal{L}$ to minimize}
\Require{Learning rate or step size $\eta > 0$}
\Require{Offset $\epsilon > 0$ to ensures not divide by 0}
\Procedure{AdaGrad}{}
\State Initialize weight vector $\textbf{w}_0$
\State $k \gets 0$
\Function{AdaGrad}{$\mathcal{L},\eta,\epsilon$}
\State Initialize weight vector $\textbf{w}_0$ and the squared accumulated gradients vector $s_t = 0$
\State $t \gets 1$
\While {$not\_convergence$}
\State $\textbf{g}_k \gets \nabla \mathcal{L}(\textbf{w}_k)$
\State $\textbf{s}_k \gets \textbf{s}_{k-1} + \textbf{g}_k^2$
\State $\textbf{w}_{k+1} \gets \textbf{w}_k - \displaystyle \frac{\eta}{\sqrt{\textbf{s}_k + \epsilon}} \cdot \textbf{g}_k$
\State $k \gets k + 1$
\State $\textbf{g}_t \gets \nabla \mathcal{L}(\textbf{w}_t)$
\State $\textbf{s}_t \gets \textbf{s}_{t-1} + \textbf{g}_t^2$
\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \displaystyle \frac{\eta}{\sqrt{\textbf{s}_t + \epsilon}} \cdot \textbf{g}_t$
\State $t \gets t + 1$
\EndWhile
\EndProcedure
\State \Return $w_t$
\EndFunction
\end{algorithmic}
\end{algorithm}
30 changes: 18 additions & 12 deletions notebooks/ml/tex/sgd.tex
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,44 @@ \section{Stochastic Gradient Descent}
\begin{algorithm}[h!]
\caption{Gradient Descent}
\label{alg:gd}
\begin{algorithmic}
\begin{algorithmic}[1]
\Require{Function $\mathcal{L}$ to minimize}
\Require{Learning rate or step size $\eta > 0$}
\Procedure{Gradient Descent}{}
\Function{Gradient Descent}{$\mathcal{L},\eta$}
\State Initialize weight vector $\textbf{w}_0$
\State $t \gets 0$
\While{$not\_convergence$}
\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \eta \nabla \mathcal{L}(\textbf{w}_t)$
\State $t \gets t + 1$
\EndWhile
\EndProcedure
\State \Return $w_t$
\EndFunction
\end{algorithmic}
\end{algorithm}

\subsection{Momentum}



\subsubsection{Standard}
\subsubsection{Polyak}

\begin{algorithm}[h!]
\caption{Momentum Accelerated Gradient Descent}
\caption{Polyak Momentum Accelerated Gradient Descent or Heavy-Ball Method}
\label{alg:sgd}
\begin{algorithmic}
\begin{algorithmic}[1]
\Require{Function $\mathcal{L}$ to minimize}
\Require{Learning rate or step size $\eta > 0$}
\Require{Momentum $\beta \in (0,1)$}
\Procedure{Accelerated Gradient Descent}{}
\Function{Accelerated Gradient Descent}{$\mathcal{L},\eta, \beta$}
\State Initialize weight vector $\textbf{w}_1 = \textbf{w}_0$ and velocity vector $\textbf{v}_0 = 0$
\State $t \gets 1$
\While{$not\_convergence$}
\State $\textbf{v}_t \gets \beta \textbf{v}_{t-1} + \eta \nabla \mathcal{L}(\textbf{w}_t)$
\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \textbf{v}_t$
\State $t \gets t + 1$
\EndWhile
\EndProcedure
\State \Return $w_t$
\EndFunction
\end{algorithmic}
\end{algorithm}

Expand All @@ -45,18 +49,20 @@ \subsubsection{Nesterov}
\begin{algorithm}[h!]
\caption{Nesterov Momentum Accelerated Gradient Descent}
\label{alg:ngd}
\begin{algorithmic}
\begin{algorithmic}[1]
\Require{Function $\mathcal{L}$ to minimize}
\Require{Learning rate $\eta > 0$}
\Require{Momentum $\beta \in (0,1)$}
\Procedure{Nesterov Accelerated Gradient Descent}{}
\Function{Nesterov Accelerated Gradient Descent}{$\mathcal{L},\eta,\beta$}
\State Initialize weight vector $\textbf{w}_1 = \textbf{w}_0$ and velocity vector $\textbf{v}_0 = 0$
\State $t \gets 1$
\While{$not\_convergence$}
\State $\hat{\textbf{w}}_t \gets \textbf{w}_t - \beta \textbf{v}_{t-1}$
\State $\hat{\textbf{w}}_t \gets \textbf{w}_t + \beta \textbf{v}_{t-1}$
\State $\textbf{v}_t \gets \beta \textbf{v}_{t-1} + \eta \nabla \mathcal{L}(\hat{\textbf{w}}_t)$
\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \textbf{v}_t$
\State $t \gets t + 1$
\EndWhile
\EndProcedure
\State \Return $w_t$
\EndFunction
\end{algorithmic}
\end{algorithm}

0 comments on commit 3cea780

Please sign in to comment.