Implement SGD
-----

Based on this [package](https://hackage.haskell.org/package/sgd)

Create a Stochastic gradient descent implementation using mutable vectors for efficient update of the parameters vector. 

A user is provided with the immutable vector of parameters so he is able to compute the gradient outside of the IO monad. 

```haskell
{-# LANGUAGE RecordWildCards #-}
```

```haskell
module Numeric.SGD
( SgdArgs (..)
, sgdArgsDefault
, Para
, sgd
, module Numeric.SGD.Grad
, module Numeric.SGD.Dataset
) where
```

```haskell
import           Control.Monad (forM_)
import qualified System.Random as R
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as UM
import qualified Control.Monad.Primitive as Prim

import           Numeric.SGD.Grad
import           Numeric.SGD.Dataset
```



```haskell
-- | SGD parameters controlling the learning process.
data SgdArgs = SgdArgs
    { -- | Size of the batch
      batchSize :: Int
    -- | Regularization variance
    , regVar    :: Double
    -- | Number of iterations
    , iterNum   :: Double
    -- | Initial gain parameter
    , gain0     :: Double
    -- | After how many iterations over the entire dataset
    -- the gain parameter is halved
    , tau       :: Double }

```

```haskell
-- | Default SGD parameter values.
sgdArgsDefault :: SgdArgs
sgdArgsDefault = SgdArgs
    { batchSize = 30
    , regVar    = 10
    , iterNum   = 10
    , gain0     = 1
    , tau       = 5 }

```

----

```haskell
-- | A stochastic gradient descent method.
-- A notification function can be used to provide user with
-- information about the progress of the learning.
-- TODO: Implement this method

sgd
    :: SgdArgs                  -- ^ SGD parameter values
    ->                          -- ^ Notification run every update
    ->                          -- ^ Gradient for dataset element
    ->                          -- ^ Dataset
    ->                          -- ^ Starting point
    ->                          -- ^ SGD result
sgd SgdArgs{..} notify mkGrad dataset x0 = do
    u <- UM.new (U.length x0)
    doIt u 0 (R.mkStdGen 0) =<< U.thaw x0
where
    -- Gain in k-th iteration.
    gain k =  

    -- Number of completed iterations over the full dataset.
    done k
        = fromIntegral (k * batchSize)
        / fromIntegral (size dataset)

    doIt u k stdGen x
      | done k > iterNum = do
        frozen <- U.unsafeFreeze x
        notify frozen k
        return frozen
      | otherwise = do
        (batch, stdGen') <- sample stdGen batchSize dataset

        -- Freeze mutable vector of parameters. The frozen version is
        -- then supplied to external mkGrad function provided by user.
        frozen <- U.unsafeFreeze x
        notify frozen k

        -- let grad = M.unions (map (mkGrad frozen) batch)
        let grad = parUnions (map (mkGrad frozen) batch)
        addUp grad u
        scale (gain k) u

        x' <- U.unsafeThaw frozen
        apply u x'
        doIt u (k+1) stdGen' x'
```

------

```haskell
-- | Add up all gradients and store results in normal domain.
addUp :: Grad -> MVect -> IO ()
addUp grad v = do
```

<br>
<br> 
<br>

----