In [53]:
:e ImportQualifiedPost
:e FlexibleContexts
:e BlockArguments
:e TupleSections
:e FlexibleContexts
:e OverloadedStrings
:e LambdaCase

import Data.Map qualified as M
import qualified Data.Text as T
import Control.Arrow (first,second)
import Control.Monad
import Graphics.Vega.VegaLite hiding (density)
import qualified Graphics.Vega.VegaLite as VL
import IHaskell.Display.Hvega (vlShow)
import Control.Monad.State

:l Plotting.hs
:l ../src/Control/Monad/Bayes/Class.hs
:l ../src/Control/Monad/Bayes/Sampler.hs
:l ../src/Control/Monad/Bayes/Free.hs
:l ../src/Control/Monad/Bayes/Weighted.hs
:l ../src/Control/Monad/Bayes/Traced/Common.hs
:l ../src/Control/Monad/Bayes/Traced/Named.hs


We'll start with the example of a simple regression

In [54]:
paramPriorRegression = do
    slope <- traced "slope" $ normal 0 2
    intercept <- traced "intercept" $ normal 0 2
    noise <- traced "noise" $ gamma 4 4
    return (slope, intercept, noise)


-- regressionData :: (MonadSample m, Traversable t) => t Double -> m (t (Double, Double))
regressionData xs = do
    (slope, intercept, noise) <- paramPriorRegression
    forM xs \x -> do
        y <- normal (x*slope + intercept) (sqrt noise)
        return (x, y)

In [55]:
range = [-10,-9.9..10] :: [Double]
regressionSamples <- sampleIOfixed $ lowerToSampler $
    regressionData range
plotVega (fmap (second (T.pack . show)) (zip regressionSamples (Prelude.repeat "N/A")))


In [56]:
-- regression :: MonadInfer m => [Double] -> [Double] -> m (Double, Double, Double)
regression xs ys = do
    params@(slope, intercept, noise) <- paramPriorRegression
    forM_ (zip xs ys) \(x, y) -> factor $ normalPdf (slope * x + intercept) (sqrt noise) y
    return (slope, intercept, noise)


In [57]:
import Statistics.Distribution.Normal (normalDistr)
import qualified Statistics.Distribution as S (quantile, cumulative)
import Data.Maybe (fromMaybe)

prop c = do
  key <- uniformD $ M.keys c
  let old = fromMaybe undefined $ M.lookup key c
  new <- max 0.0000000001 . min 1 <$> normal old 0.05
  return $ M.insert key new c

mhRunsRegression <- sampleIOfixed $ prior $ mh 
    prop 200 $ regression range (snd <$> regressionSamples)
plotVega (range, (\(s,i,_) -> (s,i)) $ head mhRunsRegression) 

This is a sample from the MCMC walk. Since this is an easy inference problem, it wasn't hard to generate good samples.

We can also view the posterior predictive, as follows:

In [58]:
-- posteriorPredictive :: MonadInfer m => [Double] -> [Double] -> m [Double]
posteriorPredictive xs ys = do
    (slope, intercept, noise) <- regression xs ys
    forM xs \x -> do
            let y' = x * slope + intercept
            normal y' (sqrt noise)


predictive <- head <$> (sampleIOfixed $ prior $ mh prop 100 $ posteriorPredictive range (snd <$> regressionSamples))
plotVega (fmap (second (T.pack . show)) (zip (zip range predictive) (Prelude.repeat "N/A")))


# Traced proposal for linear regression with outliers

Now onto the harder problem, where a customized proposal is more useful.

In [59]:
paramPrior = do
    slope <- traced "slope" $ normal 0 2
    intercept <- traced "intercept" $ normal 0 2
    noise <- traced "noise" $ gamma 1 1
    prob_outlier <- traced "prob_outlier" $ uniform 0 0.5 
    return (slope, intercept, noise, prob_outlier)

forward (slope, intercept, noise, probOutlier) x = do
    isOutlier <- bernoulli probOutlier
    let meanParams = if isOutlier
                    then (0, 20)
                    else (x*slope + intercept, sqrt noise)
    return (meanParams, isOutlier)

-- regressionWithOutliersData :: (MonadSample m, Traversable t) => t Double -> m (t ((Double, Double), Bool))
regressionWithOutliersData xs = do
    params <- paramPrior

    forM (zip xs [0..]) \(x,i) -> do
        ((mu, std), isOutlier) <- traced (T.pack $ show i) $ forward params x
        y <- normal mu std
        return ((x, y), isOutlier)

In [60]:
range = [-10,-9.9..10] :: [Double]
samples <- sampleIOfixed $ lowerToSampler $ regressionWithOutliersData range
plotVega (fmap (second (T.pack . show)) samples)

In [61]:
-- regressionWithOutliers :: (MonadSample m, MonadCond m) =>
--     [Double] -> [Double] -> m ((Double, Double, Double, Double), [Bool])
regressionWithOutliers xs ys = do
    params <- paramPrior
    
    outliers <- forM (zip3 xs ys [0..]) \(x, y, i) -> do
        ((mu, std), isOutlier) <- traced (T.pack $ show i) $ forward params x
        factor $ normalPdf mu std y
        return isOutlier
    return (params, outliers)

In [62]:


outlierProb s = (\(x, y) -> log (fromIntegral y / fromIntegral x)) 
        <$> foldr 
    (\(_,lb) li -> 
        [ if b then (num1+1, num2) else (num1,num2+1) | (b,(num1, num2)) <- zip lb li]) 
    (Prelude.repeat (0,0)) s


In [63]:
import Data.Maybe (fromMaybe)
import Debug.Trace

simpleProposal oldProposal = do
  updateSlope <- bernoulli 0.1
  if updateSlope then do
      key <- uniformD ["slope", "intercept", "noise"]
      new <- random
      return $ M.insert [key] new oldProposal
  else do 
    i <- uniformD [0..length (M.keys oldProposal)]
    val <- random
    return $ M.union (M.fromList $ zip [[T.pack $ show i]] [val]) oldProposal

In [64]:
mhRuns = 
    sampleSTfixed $ 
    prior $ 
    mh simpleProposal 5000 $ 
    regressionWithOutliers range (snd . fst <$> samples)
    
plotVega $ take 5000 (zip (fst <$> samples) (outlierProb mhRuns))

In [40]:
-- import Numeric.Log

-- countOutliersWithWeight :: [((a, [Bool]), Log Double)] -> [(Double, Double)]
-- countOutliersWithWeight = foldr 
--     (\((_,lb),w) li -> 
--         [ if b then (num1+ 1, num2) else (num1,num2+ 1) | ((b),(num1, num2)) <- zip lb li]) 
--     (Prelude.repeat (0,0))

-- predData = baseData . 
--   dataColumn "Outlier Prediction" 
--       -- (Booleans $ (\((_,s),_) -> s) (maximumBy (compare `on` (snd)) smcRuns))
--       (Booleans ((\(x, y) -> ( if x > y then False else True) )
--         <$> (countOutliers (fst <$> smcRuns))))
  
-- predEncoding = baseEncoding . color [ MName "Outlier Prediction", VL.MmType VL.Quantitative]
-- showPlot predEncoding predData

: 