updated tex(s)

dmeoli · Mar 13, 2021 · 3cea780 · 3cea780
1 parent 90bc05d
commit 3cea780
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 21 deletions.
diff --git a/notebooks/ml/tex/CM_report.pdf b/notebooks/ml/tex/CM_report.pdf
diff --git a/notebooks/ml/tex/adagrad.tex b/notebooks/ml/tex/adagrad.tex
@@ -7,18 +7,20 @@ \section{AdaGrad}
 \begin{algorithm}[h!]
 	\caption{AdaGrad}
 	\label{alg:adagrad}
-	\begin{algorithmic}
+	\begin{algorithmic}[1]
+		\Require{Function $\mathcal{L}$ to minimize}
 		\Require{Learning rate or step size $\eta > 0$}
 		\Require{Offset $\epsilon > 0$ to ensures not divide by 0}
-		\Procedure{AdaGrad}{}
-			\State Initialize weight vector $\textbf{w}_0$
-			\State $k \gets 0$
+		\Function{AdaGrad}{$\mathcal{L},\eta,\epsilon$}
+			\State Initialize weight vector $\textbf{w}_0$ and the squared accumulated gradients vector $s_t = 0$
+			\State $t \gets 1$
 			\While {$not\_convergence$}
-				\State $\textbf{g}_k \gets \nabla \mathcal{L}(\textbf{w}_k)$
-				\State $\textbf{s}_k \gets \textbf{s}_{k-1} + \textbf{g}_k^2$
-				\State $\textbf{w}_{k+1} \gets \textbf{w}_k - \displaystyle \frac{\eta}{\sqrt{\textbf{s}_k + \epsilon}} \cdot \textbf{g}_k$
-				\State $k \gets k + 1$
+				\State $\textbf{g}_t \gets \nabla \mathcal{L}(\textbf{w}_t)$
+				\State $\textbf{s}_t \gets \textbf{s}_{t-1} + \textbf{g}_t^2$
+				\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \displaystyle \frac{\eta}{\sqrt{\textbf{s}_t + \epsilon}} \cdot \textbf{g}_t$
+				\State $t \gets t + 1$
 			\EndWhile
-		\EndProcedure
+			\State \Return $w_t$
+		\EndFunction
 	\end{algorithmic}
 \end{algorithm}
diff --git a/notebooks/ml/tex/sgd.tex b/notebooks/ml/tex/sgd.tex
@@ -3,40 +3,44 @@ \section{Stochastic Gradient Descent}
 \begin{algorithm}[h!]
 	\caption{Gradient Descent}
 	\label{alg:gd}
-	\begin{algorithmic}
+	\begin{algorithmic}[1]
+		\Require{Function $\mathcal{L}$ to minimize}
 		\Require{Learning rate or step size $\eta > 0$}
-		\Procedure{Gradient Descent}{}
+		\Function{Gradient Descent}{$\mathcal{L},\eta$}
 			\State Initialize weight vector $\textbf{w}_0$
 			\State $t \gets 0$
 			\While{$not\_convergence$}
 				\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \eta \nabla \mathcal{L}(\textbf{w}_t)$
 				\State $t \gets t + 1$
 			\EndWhile
-		\EndProcedure
+			\State \Return $w_t$
+		\EndFunction
 	\end{algorithmic}
 \end{algorithm}
 
 \subsection{Momentum}
 
 
 
-\subsubsection{Standard}
+\subsubsection{Polyak}
 
 \begin{algorithm}[h!]
-	\caption{Momentum Accelerated Gradient Descent}
+	\caption{Polyak Momentum Accelerated Gradient Descent or Heavy-Ball Method}
 	\label{alg:sgd}
-	\begin{algorithmic}
+	\begin{algorithmic}[1]
+		\Require{Function $\mathcal{L}$ to minimize}
 		\Require{Learning rate or step size $\eta > 0$}
 		\Require{Momentum $\beta \in (0,1)$}
-		\Procedure{Accelerated Gradient Descent}{}
+		\Function{Accelerated Gradient Descent}{$\mathcal{L},\eta,	\beta$}
 			\State Initialize weight vector $\textbf{w}_1 = \textbf{w}_0$ and velocity vector $\textbf{v}_0 = 0$
 			\State $t \gets 1$
 			\While{$not\_convergence$}
 				\State $\textbf{v}_t \gets \beta \textbf{v}_{t-1} + \eta \nabla \mathcal{L}(\textbf{w}_t)$
 				\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \textbf{v}_t$
 				\State $t \gets t + 1$
 			\EndWhile
-		\EndProcedure
+			\State \Return $w_t$
+		\EndFunction
 	\end{algorithmic}
 \end{algorithm}
 
@@ -45,18 +49,20 @@ \subsubsection{Nesterov}
 \begin{algorithm}[h!]
 	\caption{Nesterov Momentum Accelerated Gradient Descent}
 	\label{alg:ngd}
-	\begin{algorithmic}
+	\begin{algorithmic}[1]
+		\Require{Function $\mathcal{L}$ to minimize}
 		\Require{Learning rate $\eta > 0$}
 		\Require{Momentum $\beta \in (0,1)$}
-		\Procedure{Nesterov Accelerated Gradient Descent}{}
+		\Function{Nesterov Accelerated Gradient Descent}{$\mathcal{L},\eta,\beta$}
 			\State Initialize weight vector $\textbf{w}_1 = \textbf{w}_0$ and velocity vector $\textbf{v}_0 = 0$
 			\State $t \gets 1$
 			\While{$not\_convergence$}
-				\State $\hat{\textbf{w}}_t \gets \textbf{w}_t - \beta \textbf{v}_{t-1}$
+				\State $\hat{\textbf{w}}_t \gets \textbf{w}_t + \beta \textbf{v}_{t-1}$
 				\State $\textbf{v}_t \gets \beta \textbf{v}_{t-1} + \eta \nabla \mathcal{L}(\hat{\textbf{w}}_t)$
 				\State $\textbf{w}_{t+1} \gets \textbf{w}_t - \textbf{v}_t$
 				\State $t \gets t + 1$
 			\EndWhile
-		\EndProcedure
+			\State \Return $w_t$
+		\EndFunction
 	\end{algorithmic}
 \end{algorithm}