chris-taylor · mhuesch · Aug 4, 2017
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.stack-work/
+.DS_Store
diff --git a/aima-haskell.cabal b/aima-haskell.cabal
@@ -39,6 +39,7 @@ Library
                         array,
                         gnuplot,
                         hmatrix,
+                        hmatrix-gsl,
                         QuickCheck
     Exposed-Modules:    AI.Core.Agents
                         AI.Search.Core

diff --git a/src/AI/Learning/Bootstrap.hs b/src/AI/Learning/Bootstrap.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts #-}
 module AI.Learning.Bootstrap where
 
 import Control.Monad.Random
@@ -20,9 +21,9 @@ genBootstrapSample sz = go sz []
             i <- getRandomR (0,sz-1)
             go (n - 1) (i:accum)
 
-sampleVector :: (Storable a, RandomGen g) => Vector a -> Rand g (Vector a)
+sampleVector :: (Storable a, Container Vector a, RandomGen g) => Vector a -> Rand g (Vector a)
 sampleVector v = do
-    idx <- genBootstrapSample (dim v)
+    idx <- genBootstrapSample (size v)
     return (v `subRefVec` idx)
 
 sampleMatrixRows :: (Element a, RandomGen g) => Matrix a -> Rand g (Matrix a)

diff --git a/src/AI/Learning/CrossValidation.hs b/src/AI/Learning/CrossValidation.hs
@@ -1,8 +1,10 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE UndecidableInstances #-}
 module AI.Learning.CrossValidation where
 
 import Control.Monad.Random
 import Foreign.Storable (Storable)
-import Numeric.LinearAlgebra
+import Numeric.LinearAlgebra hiding (Indexable)
 import qualified Data.List as L
 
 import AI.Util.Matrix
@@ -16,16 +18,16 @@ class Indexable c where
     index :: c -> Index -> c
     nobs :: c -> Int
 
-instance Storable a => Indexable (Vector a) where
+instance (Storable a, Container Vector a) => Indexable (Vector a) where
     index = subRefVec
-    nobs = dim
+    nobs = size
 
 instance Element a => Indexable (Matrix a) where
     index = subRefRows
     nobs = rows
 
 instance Indexable [a] where
-    index = map . (!!) 
+    index = map . (!!)
     nobs = length
 
 -- |Indexes are lists of 'Int'. Should refactor this to use something more
@@ -38,7 +40,7 @@ data CVPartition = CVPartition [(Index, Index)]
 -- |Specify what type of cross-validation you want to do.
 data CVType = LeaveOneOut
             | KFold Int
-    
+
 -- |Prediction function. A prediction function should take a training and a test
 --  set, and use the training set to build a model whose performance is
 --  evaluated on the test set, returning a final score as a 'Double'.
@@ -79,7 +81,7 @@ cvp n k = do
 -- |Perform k-fold cross-validation. Given a 'CVPartition' containing a list
 --  of training and test sets, we repeatedly fit a model on the training set
 --  and test its performance on the test set/
-kFoldCV_ :: (Indexable a, Indexable b) => 
+kFoldCV_ :: (Indexable a, Indexable b) =>
             CVPartition
          -> PredFun a b
          -> a
@@ -125,4 +127,4 @@ type ModelBuilder = Matrix Double   -- Training set regressors
 --  the predictions match the target.
 type EvalFun = Vector Double    -- Target
             -> Vector Double    -- Predictions
-            -> Double           -- Score (e.g. MSE, MCR, likelihood)
+            -> Double           -- Score (e.g. MSE, MCR, likelihood)
diff --git a/src/AI/Learning/DecisionTree.hs b/src/AI/Learning/DecisionTree.hs
@@ -37,15 +37,19 @@ data DTree a i b = Result b
 
 instance Show b => Show (DTree a i b) where
     show (Result b) = show b
-    show (Decision att _ ts) = 
+    show (Decision att _ ts) =
         "Decision " ++ show att ++ " " ++ show (M.elems ts)
 
 instance Functor (DTree a i) where
     fmap f (Result b) = Result (f b)
     fmap f (Decision att i branches) = Decision att i (fmap (fmap f) branches)
 
+instance Applicative (DTree a i) where
+    pure = Result
+    (<*>) = ap
+
 instance Monad (DTree a i) where
-    return b = Result b
+    return = pure
     Result b          >>= f = f b
     Decision att i ts >>= f = Decision att i (fmap (>>=f) ts)
 
@@ -145,7 +149,7 @@ maxDecisions _ r = r
 -- |Prune decisions using a predicate.
 prune :: (b -> Bool) -> DTree a b b -> DTree a b b
 prune _ (Result b)          = Result b
-prune p (Decision att i ts) = 
+prune p (Decision att i ts) =
     if p i
     then Result i
     else Decision att i (fmap (prune p) ts)
@@ -161,10 +165,10 @@ mcr :: Eq b =>
     -> [a]          -- List of elements to be classified
     -> [b]          -- List of correct classifications
     -> Double       -- Misclassification rate
-mcr predfun as bs = 
+mcr predfun as bs =
     let bsPred     = map predfun as
         numCorrect = countIf id (zipWith (==) bs bsPred)
         numTotal   = length as
      in fromIntegral (numTotal - numCorrect) / fromIntegral numTotal
 
-predfun xtrain ytrain xtest ytest = undefined
+predfun xtrain ytrain xtest ytest = undefined
diff --git a/src/AI/Learning/LinearRegression.hs b/src/AI/Learning/LinearRegression.hs
@@ -4,7 +4,6 @@ module AI.Learning.LinearRegression where
 
 import Data.List (foldl')
 import Numeric.LinearAlgebra
-import Numeric.LinearAlgebra.Util (ones)
 
 import AI.Util.Matrix
 
@@ -27,7 +26,7 @@ A linear model should contain the following information:
 * PRESS Statistic
 * R squared
 * adjusted r squared
-* 
+*
 
 -}
 
@@ -110,7 +109,7 @@ lmWith kind opts x y = LM { coefs = beta
 lmPrepare :: LMOpts
           -> Matrix Double
           -> (Matrix Double, Vector Double, Vector Double)
-lmPrepare opts x = (x3,mu,sigma) 
+lmPrepare opts x = (x3,mu,sigma)
     where
         (x1,mu,sigma) = standardize x
         x2            = if standardizeRegressors opts then x1 else x
@@ -122,7 +121,7 @@ lmPrepare opts x = (x3,mu,sigma)
 
 -- |Make predictions from a linear model.
 lmPredict :: LinearModel -> Matrix Double -> Vector Double
-lmPredict model x = x2 <> beta
+lmPredict model x = x2 #> beta
     where
         beta    = coefs model
         xbar    = lmMean model
@@ -137,7 +136,7 @@ lmPredict model x = x2 <> beta
 -- |Calculate statistics for a linear regression.
 lmStats :: LinearModel -> Matrix Double -> Vector Double -> LMStats
 lmStats model x y =
-    let ybar      = constant (mean y) (dim y)
+    let ybar      = constant (mean y) (size y)
         yhat      = lmPredict model x
         residuals = y - yhat
         covBeta   = Nothing
@@ -171,11 +170,11 @@ regress :: Matrix Double -- X
         -> Vector Double -- y
         -> Vector Double -- beta
 regress x y
-    | rows x /= dim y = error "Inconsistent dimensions -- REGRESS"
+    | rows x /= size y = error "Inconsistent dimensions -- REGRESS"
     | otherwise = let (_,n) = size x
                       (_,r) = qr x
                       rr = takeRows n r
-                   in (trans rr <> rr) <\> trans x <> y
+                   in (tr' rr <> rr) <\> tr' x #> y
 
 -- |Ridge regression. This adds a penalty term to OLS regression, which
 --  discourages large coefficient values to prevent overfitting.
@@ -185,14 +184,14 @@ ridgeRegress :: Matrix Double  -- X
              -> Double         -- lambda
              -> Vector Double  -- beta
 ridgeRegress x y useConst lambda
-    | rows x /= dim y = error "Inconsistent dimensions -- RIDGEREGRESS"
+    | rows x /= size y = error "Inconsistent dimensions -- RIDGEREGRESS"
     | otherwise = let (_,n) = size x
                       (_,r) = qr x
                       rr = takeRows n r
                       ww = if useConst
-                            then diag $ join [0, constant 1 (n-1)]
+                            then diag $ vjoin [0, constant 1 (n-1)]
                             else ident n
-                   in (trans rr <> rr + lambda `scale` ww) <\> trans x <> y
+                   in (tr' rr <> rr + lambda `scale` ww) <\> tr' x #> y
 
 -----------------
 ---- Utilities --
@@ -214,7 +213,7 @@ standardize m = (eachRow (\x -> (x - mu) / sigma) m, mu, sigma)
 --  the function discards the mean and standard deviation vectors, only
 --  returning the standardized sample.
 standardize_ :: Matrix Double -> Matrix Double
-standardize_ x = a where (a,_,_) = standardize x 
+standardize_ x = a where (a,_,_) = standardize x
 
 -------------------------
 -- Mean, Variance etc. --
@@ -231,10 +230,10 @@ class Floating b => Variance a b | a -> b where
     std x = sqrt $ var x
 
 instance Mean (Vector Double) Double where
-    mean v = sumVector v / fromIntegral (dim v)
+    mean v = sumVector v / fromIntegral (size v)
 
 instance Variance (Vector Double) Double where
-    var v  = mean $ (v - constant vbar (dim v)) ^ 2
+    var v  = mean $ (v - constant vbar (size v)) ^ 2
         where vbar = mean v
 
 instance Mean (Matrix Double) (Vector Double) where

diff --git a/src/AI/Learning/LogisticRegression.hs b/src/AI/Learning/LogisticRegression.hs
@@ -1,7 +1,6 @@
 module AI.Learning.LogisticRegression where
 
 import Numeric.LinearAlgebra
-import Numeric.LinearAlgebra.Util
 
 import AI.Learning.Core
 import AI.Util.Matrix
@@ -15,7 +14,7 @@ import AI.Util.Matrix
 --  Typically the values in the vector /y/ are either boolean (i.e. 0/1) or they
 --  represent frequency of observations, i.e. they are values between 0.0 and
 --  1.0.
--- 
+--
 --  The function fits /theta/ by numerically maximizing the likelihood function.
 --  It may be subject to overfit or non-convergence in the case where the number
 --  of observations is small or the predictors are highly correlated.
@@ -56,9 +55,9 @@ lrLogLikelihood :: Vector Double             -- targets (y)
 lrLogLikelihood y x theta = (cost, grad)
     where
         m    = fromIntegral (rows x)    -- For computing average
-        h    = sigmoid (x <> theta)     -- Predictions for y
+        h    = sigmoid (x #> theta)     -- Predictions for y
         cost = sumVector (y * log h + (1-y) * log (1-h)) / m
-        grad = (1/m) `scale` (y - h) <> x
+        grad = (1/m) `scale` (y - h) <# x
 
 -- |Cost function and derivative for regularized logistic regression. This is
 --  maximized when fitting parameters for the regression.
@@ -72,8 +71,8 @@ lrLogLikRegularized y x useConst lambda theta = (cost, grad)
     where
         m      = fromIntegral (rows x)
         (c,g)  = lrLogLikelihood y x theta
-        theta' = if useConst then join [0, dropVector 1 theta] else theta
-        cost   = c - (lambda / (2 * m)) * norm theta' ^ 2
+        theta' = if useConst then vjoin [0, dropVector 1 theta] else theta
+        cost   = c - (lambda / (2 * m)) * norm_2 theta' ^ 2
         grad   = g - (lambda / m) `scale` theta'
 
 -------------
@@ -84,7 +83,7 @@ test n k lambda = do
     x <- randn n k                  -- design matrix
     e <- flatten `fmap` randn n 1   -- errors
     let theta = fromList $ 1 : replicate (k-1) 0
-        h = sigmoid $ x <> theta + e
+        h = sigmoid $ (x #> theta) + e
         y = (\i -> if i > 0.5 then 1 else 0) `mapVector` h
         theta_est1 = lr y x
         theta_est2 = lrRegularized y x False lambda

diff --git a/src/AI/Learning/NeuralNetwork.hs b/src/AI/Learning/NeuralNetwork.hs
@@ -12,7 +12,6 @@ module AI.Learning.NeuralNetwork
 
 import Control.Monad.Random hiding (fromList)
 import Numeric.LinearAlgebra
-import Numeric.LinearAlgebra.Util
 import System.IO.Unsafe
 
 import AI.Learning.Core
@@ -47,7 +46,7 @@ fromVector (k,h,l) vec = NN theta0 theta1
           theta1 = reshape l $ dropVector ((k + 1) * h) vec
 
 toVector :: Matrix Double -> Matrix Double -> Vector Double
-toVector theta0 theta1 = join [flatten theta0, flatten theta1]
+toVector theta0 theta1 = vjoin [flatten theta0, flatten theta1]
 
 ----------------------
 -- NN Train/Predict --
@@ -117,9 +116,9 @@ nnBackProp :: NeuralNetwork                                 -- neural net
 nnBackProp (NN _ theta1) y (a0,a1,a2) = (dropColumns 1 delta0, delta1)
     where
         d2     = a2 - y
-        d1     = (d2 <> trans theta1) * a1 * (1 - a1)
-        delta0 = trans a0 <> d1
-        delta1 = trans a1 <> d2
+        d1     = (d2 <> tr' theta1) * a1 * (1 - a1)
+        delta0 = tr' a0 <> d1
+        delta1 = tr' a1 <> d2
 
 -- |Perform back and forward propagation through a neural network, returning the
 --  final predictions (variable /a2/) and the gradient matrices (variables
@@ -153,7 +152,9 @@ nnCostGradient shape y x lambda vec = (cost, grad)
         grad2 = lambda `scale` toVector (insertNils theta0) (insertNils theta1)
 
         normMatrix m = sumMatrix $ (dropRows 1 m) ^ 2
-        insertNils m = vertcat [0, dropRows 1 m]
+        -- TODO: make sure this is correct behavior.
+        -- Instead of overloading 0, we now explcitly create a single row of 0's.
+        insertNils m = vertcat [fromLists [replicate (rows m) 0], dropRows 1 m]
 
 nnCost shape y x lambda = fst . nnCostGradient shape y x lambda
 nnGrad shape y x lambda = snd . nnCostGradient shape y x lambda
@@ -165,7 +166,7 @@ nnGradApprox :: NNShape -> Matrix Double -> Matrix Double -> Double -> Vector Do
 nnGradApprox shape y x lambda vec = fromList $ g `map` [0..n-1]
     where
         h = 1e-6
-        n = dim vec
+        n = size vec
         f v = nnCost shape y x lambda v
         g i = (f (vec + e i) - f (vec - e i)) / (2*h)
         e i = fromList $ replicate i 0 ++ [h] ++ replicate (n-i-1) 0
@@ -191,7 +192,7 @@ testFwdProp = do
     let y = nnPredict nn x
     putStrLn "Predictions (should be roughly 0.0, 0.5, 1.0)"
     disp 2 y
-    
+
 testBackProp :: IO ()
 testBackProp = do
     putStrLn "***\nCompare back propagation to the MATLAB implementation.\n"
@@ -224,7 +225,7 @@ test n lambda = do
     putStrLn "***\nLearning XOR function.\n"
     x <- rand n 2
     e <- fmap (0.01*) (rand n 1)
-    let y = mapMatrix (\x -> if x > 0.5 then 1.0 else 0.0) (xor x)
+    let y = cmap (\x -> if x > 0.5 then 1.0 else 0.0) (xor x)
     nn <- nnTrainIO 4 y x lambda
     let ypred = nnPredict nn x
     -- Show predictions
@@ -237,7 +238,7 @@ test n lambda = do
         yy = nnPredict nn xx
     putStrLn "Exclusive or:"
     disp 2 $ horzcat [xx,yy]
-    
+
 xor :: Matrix Double -> Matrix Double
 xor x = let [u,v] = toColumns x in asColumn (u + v - 2 * u * v)
-    
+
diff --git a/src/AI/Learning/Perceptron.hs b/src/AI/Learning/Perceptron.hs
@@ -1,10 +1,9 @@
 module AI.Learning.Perceptron where
 
 import Numeric.LinearAlgebra
-import Numeric.LinearAlgebra.Util
 
 perceptronPredict :: Vector Double -> Matrix Double -> Vector Double
-perceptronPredict weights x = step (x <> weights)
+perceptronPredict weights x = step (x #> weights)
 
 perceptronCost :: Vector Double -> Vector Double -> Vector Double -> Double
 perceptronCost y yhat = undefined
@@ -23,7 +22,7 @@ gradientDescent g x0 alpha tol = go x0 (fun x0)
         go x x' = if converged x x'
                     then x'
                     else go x' (fun x')
-        converged a b = norm b / norm a - 1 < tol
+        converged a b = norm_2 b / norm_2 a - 1 < tol
         fun x = gradientDescentStep g alpha x
 
 gradientDescentStep :: (Vector Double -> Vector Double) -> Double -> Vector Double -> Vector Double

diff --git a/src/AI/Logic/Core.hs b/src/AI/Logic/Core.hs
@@ -12,7 +12,7 @@ module AI.Logic.Core (
     , LogicError(..)
     ) where
 
-import Control.Monad.Error
+import Control.Monad.Except
 import Control.Monad.State
 import Data.Map (Map)
 
@@ -29,8 +29,6 @@ data LogicError    = ParseError
                    | UnknownCommand
                    | DefaultError deriving (Show)
 
-instance Error LogicError where noMsg = DefaultError
-
 -----------------
 -- Expressions --
 -----------------