Generalize linear regression algorithms to multiple target variables …

…in preperation for linear classification
batterseapower · Sep 12, 2008 · fdd4ac5 · fdd4ac5
1 parent 5323a94
commit fdd4ac5
Show file tree

Hide file tree

Showing 7 changed files with 125 additions and 55 deletions.
diff --git a/Algorithms/MachineLearning/Framework.hs b/Algorithms/MachineLearning/Framework.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE FlexibleInstances #-}
-
 -- | The "framework" provides all the core classes and types used ubiquitously by
 -- the machine learning algorithms.
 module Algorithms.MachineLearning.Framework where
@@ -70,50 +68,74 @@ instance Vectorable (Vector Double) where
 -- Labelled data set
 --
 
-data DataSet = DataSet {
+data DataSet input target = DataSet {
         ds_inputs :: Matrix Double, -- One row per sample, one column per input variable
-        ds_targets :: Vector Target -- One row per sample, each value being a single target variable
+        ds_targets :: Matrix Target -- One row per sample, one column per target variable
     }
 
-dataSetFromSampleList :: Vectorable input => [(input, Target)] -> DataSet
+dataSetFromSampleList :: (Vectorable input, Vectorable target) => [(input, target)] -> DataSet input target
 dataSetFromSampleList elts
   = DataSet {
     ds_inputs = fromRows $ map (toVector . fst) elts,
-    ds_targets = fromList $ map snd elts
+    ds_targets = fromRows $ map (toVector . snd) elts
   }
 
-dataSetToSampleList :: Vectorable input => DataSet -> [(input, Target)]
+dataSetToSampleList :: (Vectorable input, Vectorable target) => DataSet input target -> [(input, target)]
 dataSetToSampleList ds = zip (dataSetInputs ds) (dataSetTargets ds)
 
-dataSetInputs :: Vectorable input => DataSet -> [input]
+dataSetInputs :: Vectorable input => DataSet input target -> [input]
 dataSetInputs ds = map fromVector $ toRows $ ds_inputs ds
 
-dataSetTargets :: DataSet -> [Target]
-dataSetTargets ds = toList $ ds_targets ds
+dataSetTargets :: Vectorable target => DataSet input target -> [target]
+dataSetTargets ds = map fromVector $ toRows $ ds_targets ds
 
-dataSetInputLength :: DataSet -> Int
+dataSetInputLength :: DataSet input target -> Int
 dataSetInputLength ds = cols (ds_inputs ds)
 
-dataSetSize :: DataSet -> Int
+dataSetSize :: DataSet input target -> Int
 dataSetSize ds = rows (ds_inputs ds)
 
-binDataSet :: StdGen -> Int -> DataSet -> [DataSet]
-binDataSet gen bins ds = map dataSetFromSampleList $ chunk bin_size shuffled_samples
+binDataSet :: StdGen -> Int -> DataSet input target -> [DataSet input target]
+binDataSet gen bins = transformDataSetAsVectors binDataSet'
   where
-    shuffled_samples = shuffle gen (dataSetToSampleList ds :: [(Vector Double, Target)])
-    bin_size = ceiling $ (fromIntegral $ dataSetSize ds :: Double) / (fromIntegral bins)
+    binDataSet' ds = map dataSetFromSampleList $ chunk bin_size shuffled_samples
+      where
+        shuffled_samples = shuffle gen (dataSetToSampleList ds)
+        bin_size = ceiling $ (fromIntegral $ dataSetSize ds :: Double) / (fromIntegral bins)
+
+sampleDataSet :: StdGen -> Int -> DataSet input target -> DataSet input target
+sampleDataSet gen n = unK . transformDataSetAsVectors (K . dataSetFromSampleList . sample gen n . dataSetToSampleList)
+
+transformDataSetAsVectors :: Functor f => (DataSet (Vector Double) (Vector Double) -> f (DataSet (Vector Double) (Vector Double))) -> DataSet input target -> f (DataSet input target)
+transformDataSetAsVectors transform input = fmap castDataSet (transform (castDataSet input))
+  where
+    castDataSet :: DataSet input1 target1 -> DataSet input2 target2
+    castDataSet ds = DataSet {
+          ds_inputs = ds_inputs ds,
+          ds_targets = ds_targets ds
+      }
+
+--
+-- Metric spaces
+--
+
+class MetricSpace a where
+    distance :: a -> a -> Double
+
+instance MetricSpace Double where
+    distance x y = abs (x - y)
 
-sampleDataSet :: StdGen -> Int -> DataSet -> DataSet
-sampleDataSet gen n ds = dataSetFromSampleList (sample gen n (dataSetToSampleList ds :: [(Vector Double, Target)]))
+instance MetricSpace (Vector Double) where
+    distance = (<.>)
 
 --
 -- Models
 --
 
-class Model model where
-    predict :: Vectorable input => model input -> input -> Target
+class (Vectorable input, Vectorable target) => Model model input target | model -> input target where
+    predict :: model -> input -> target
 
-modelSumSquaredError :: (Model model, Vectorable input) => model input -> DataSet -> Double
-modelSumSquaredError model ds = error_vector <.> error_vector
+modelSumSquaredError :: (Model model input target, MetricSpace target) => model -> DataSet input target -> Double
+modelSumSquaredError model ds = sum [sample_error * sample_error | sample_error <- sample_errors]
   where
-    error_vector = ds_targets ds - fromList (map (predict model) (dataSetInputs ds))
+    sample_errors = zipWith (\x y -> x `distance` y) (dataSetTargets ds) (map (predict model) (dataSetInputs ds))
diff --git a/Algorithms/MachineLearning/LinearAlgebra.hs b/Algorithms/MachineLearning/LinearAlgebra.hs
@@ -66,4 +66,15 @@ sumColumns m = constant 1 (rows m) <> m
 
 -- | Create a constant matrix of the given dimension, analagously to 'constant'.
 constantM :: Element a => a -> Int -> Int -> Matrix a
-constantM elt row_count col_count = reshape row_count (constant elt (row_count * col_count))
+constantM elt row_count col_count = reshape row_count (constant elt (row_count * col_count))
+
+matrixToVector :: Element a => Matrix a -> Vector a
+matrixToVector m
+  | rows m == 1  -- Row vector
+  || cols m == 1 -- Column vector
+  = flatten m
+  | otherwise
+  = error "matrixToVector: matrix is neither a row or column vector"
+
+trace :: Element a => Matrix a -> a
+trace = vectorSum . takeDiag
diff --git a/Algorithms/MachineLearning/LinearRegression.hs b/Algorithms/MachineLearning/LinearRegression.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE PatternSignatures #-}
-
 -- | Linear regression models, as discussed in chapter 3 of Bishop.
 module Algorithms.MachineLearning.LinearRegression (
         LinearModel,
@@ -12,22 +10,23 @@ import Algorithms.MachineLearning.LinearAlgebra
 import Algorithms.MachineLearning.Utilities
 
 
-data LinearModel input = LinearModel {
-        lm_basis_fns :: [input -> Target],
-        lm_weights   :: Vector Weight
+data LinearModel input target = LinearModel {
+        lm_basis_fns :: [input -> Double],
+        lm_weights   :: Matrix Weight -- One column per target variable,
+                                      -- one row per basis function output
     }
 
-instance Show (LinearModel input) where
+instance Show (LinearModel input target) where
     show model = "Weights: " ++ show (lm_weights model)
 
-instance Model LinearModel where
-    predict model input = (lm_weights model) <.> phi_app_x
+instance (Vectorable input, Vectorable target) => Model (LinearModel input target) input target where
+    predict model input = fromVector $ (trans $ lm_weights model) <> phi_app_x
       where
         phi_app_x = applyVector (lm_basis_fns model) input
 
 
 data BayesianVarianceModel input = BayesianVarianceModel {
-        bvm_basis_fns :: [input -> Target],
+        bvm_basis_fns :: [input -> Double],
         bvm_inv_hessian :: Matrix Weight, -- Equivalent to the weight distribution covariance matrix
         bvm_beta :: Precision
     }
@@ -36,13 +35,13 @@ instance Show (BayesianVarianceModel input) where
     show model = "Inverse Hessian: " ++ show (bvm_inv_hessian model) ++ "\n" ++
                  "Beta: " ++ show (bvm_beta model)
 
-instance Model BayesianVarianceModel where
-    predict model input = recip (bvm_beta model) + (phi_app_x <> bvm_inv_hessian model) <.> phi_app_x --phi_app_x <.> (bvm_inv_hessian model <> phi_app_x)
+instance (Vectorable input) => Model (BayesianVarianceModel input) input Variance where
+    predict model input = recip (bvm_beta model) + (phi_app_x <> bvm_inv_hessian model) <.> phi_app_x
       where
         phi_app_x = applyVector (bvm_basis_fns model) input
 
 
-regressDesignMatrix :: (Vectorable input) => [input -> Target] -> Matrix Double -> Matrix Double
+regressDesignMatrix :: (Vectorable input) => [input -> Double] -> Matrix Double -> Matrix Double
 regressDesignMatrix basis_fns inputs
   = applyMatrix (map (. fromVector) basis_fns) inputs -- One row per sample, one column per basis function
 
@@ -68,7 +67,7 @@ regularizedPrePinv alpha beta phi = inv $ (alpha .* (ident (cols phi))) + (beta
 --
 -- Equation 3.15 in Bishop.
 regressLinearModel
-    :: (Vectorable input) => [input -> Target] -> DataSet -> LinearModel input
+    :: (Vectorable input) => [input -> Double] -> DataSet input target -> LinearModel input target
 regressLinearModel basis_fns ds = LinearModel { lm_basis_fns = basis_fns, lm_weights = weights }
   where
     design_matrix = regressDesignMatrix basis_fns (ds_inputs ds)
@@ -84,15 +83,15 @@ regressLinearModel basis_fns ds = LinearModel { lm_basis_fns = basis_fns, lm_wei
 --
 -- Equation 3.28 in Bishop.
 regressRegularizedLinearModel
-    :: (Vectorable input) => RegularizationCoefficient -> [input -> Target] -> DataSet -> LinearModel input
+    :: (Vectorable input) => RegularizationCoefficient -> [input -> Double] -> DataSet input target -> LinearModel input target
 regressRegularizedLinearModel lambda basis_fns ds = LinearModel { lm_basis_fns = basis_fns, lm_weights = weights }
   where
     design_matrix = regressDesignMatrix basis_fns (ds_inputs ds)
     weights = regularizedPinv lambda design_matrix <> ds_targets ds
 
 
 -- | Determine the mean weight and inverse hessian matrix given alpha, beta, the design matrix and the targets.
-bayesianPosteriorParameters :: Precision -> Precision -> Matrix Double -> Vector Double -> (Vector Double, Matrix Double)
+bayesianPosteriorParameters :: Precision -> Precision -> Matrix Double -> Matrix Double -> (Matrix Double, Matrix Double)
 bayesianPosteriorParameters alpha beta design_matrix targets = (weights, inv_hessian)
   where
     inv_hessian = regularizedPrePinv alpha beta design_matrix
@@ -106,12 +105,16 @@ bayesianPosteriorParameters alpha beta design_matrix targets = (weights, inv_hes
 -- lambda = alpha / beta.  However, the twist is that we can use our knowledge of the prior to also make an estimate
 -- for the variance of the true value about any input point.
 --
+-- For the case of multiple target variables, this function makes the naive Bayesian assumption that the probability
+-- distributions on output variables are independent, and takes as an error metric the unweighted sum-squared error
+-- in all the targets. The variance model is common to all the target variables.
+--
 -- Equations 3.53, 3.54 and 3.59 in Bishop.
 regressBayesianLinearModel
     :: (Vectorable input)
     => Precision -- ^ Precision of Gaussian weight prior
     -> Precision -- ^ Precision of noise on samples
-    -> [input -> Target] -> DataSet -> (LinearModel input, BayesianVarianceModel input)
+    -> [input -> Double] -> DataSet input target -> (LinearModel input target, BayesianVarianceModel input)
 regressBayesianLinearModel alpha beta basis_fns ds
   = (LinearModel { lm_basis_fns = basis_fns, lm_weights = weights },
      BayesianVarianceModel { bvm_basis_fns = basis_fns, bvm_inv_hessian = inv_hessian, bvm_beta = beta })
@@ -130,12 +133,16 @@ regressBayesianLinearModel alpha beta basis_fns ds
 --
 -- As a bonus, this function returns gamma, the effective number of parameters used by the regressed model.
 --
+-- For the case of multiple target variables, this function makes the naive Bayesian assumption that the probability
+-- distributions on output variables are independent, and takes as an error metric the unweighted sum-squared error
+-- in all the targets. The variance model is common to all the target variables.
+--
 -- Equations 3.87, 3.92 and 3.95 in Bishop.
 regressEMBayesianLinearModel
-    :: (Vectorable input)
+    :: (Vectorable input, Vectorable target, MetricSpace target)
     => Precision -- ^ Initial estimate of Gaussian weight prior
     -> Precision -- ^ Initial estimate for precision of noise on samples
-    -> [input -> Target] -> DataSet -> (LinearModel input, BayesianVarianceModel input, EffectiveNumberOfParameters)
+    -> [input -> Double] -> DataSet input target -> (LinearModel input target, BayesianVarianceModel input, EffectiveNumberOfParameters)
 regressEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
   = convergeOnEMBayesianLinearModel loopWorker design_matrix initial_alpha initial_beta basis_fns ds
   where
@@ -164,12 +171,16 @@ regressEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
 -- that all the parameters are determined, the returned gamma is always just the number of basis functions (and
 -- hence weights).
 --
+-- For the case of multiple target variables, this function makes the naive Bayesian assumption that the probability
+-- distributions on output variables are independent, and takes as an error metric the unweighted sum-squared error
+-- in all the targets. The variance model is common to all the target variables.
+--
 -- Equations 3.98 and 3.99 in Bishop.
 regressFullyDeterminedEMBayesianLinearModel
-    :: (Vectorable input)
+    :: (Vectorable input, Vectorable target, MetricSpace target)
     => Precision -- ^ Initial estimate of Gaussian weight prior
     -> Precision -- ^ Initial estimate for precision of noise on samples
-    -> [input -> Target] -> DataSet -> (LinearModel input, BayesianVarianceModel input, EffectiveNumberOfParameters)
+    -> [input -> Double] -> DataSet input target -> (LinearModel input target, BayesianVarianceModel input, EffectiveNumberOfParameters)
 regressFullyDeterminedEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
   = convergeOnEMBayesianLinearModel loopWorker design_matrix initial_alpha initial_beta basis_fns ds
   where
@@ -183,14 +194,14 @@ regressFullyDeterminedEMBayesianLinearModel initial_alpha initial_beta basis_fns
     loopWorker _ _ = (n, m)
 
 convergeOnEMBayesianLinearModel
-    :: (Vectorable input)
+    :: (Vectorable input, Vectorable target, MetricSpace target)
     => (Precision -> Precision -> (Double, EffectiveNumberOfParameters)) -- ^ Loop worker: given alpha and beta, return new beta numerator and gamma
     -> Matrix Double        -- ^ Design matrix
     -> Precision            -- ^ Initial alpha
     -> Precision            -- ^ Initial beta
-    -> [input -> Target]    -- ^ Basis functions
-    -> DataSet
-    -> (LinearModel input, BayesianVarianceModel input, EffectiveNumberOfParameters)
+    -> [input -> Double]    -- ^ Basis functions
+    -> DataSet input target
+    -> (LinearModel input target, BayesianVarianceModel input, EffectiveNumberOfParameters)
 convergeOnEMBayesianLinearModel loop_worker design_matrix initial_alpha initial_beta basis_fns ds
   = loop eps initial_alpha initial_beta False
   where
@@ -203,5 +214,15 @@ convergeOnEMBayesianLinearModel loop_worker design_matrix initial_alpha initial_
 
         (beta_numerator, gamma) = loop_worker alpha beta
 
-        alpha' = gamma / (weights <.> weights)
+        -- This alpha computation is not the most efficient way to get the result, but it is idiomatic.
+        -- This is the modification to the algorithm in Bishop that generalises the result to the case
+        -- of multiple target variables, but to prove that this is the right thing to do I had to make the
+        -- naive Bayesian assumption.
+        --
+        -- The reason that this is correct because under naive Bayes:
+        --
+        --  dE(W)       K    T           T
+        -- ------- = \Sigma W * W = Tr (W * W)
+        -- d\alpha    k = 1  k   k
+        alpha' = gamma / (trace $ (trans weights) <> weights)
         beta' = beta_numerator / modelSumSquaredError linear_model ds
diff --git a/Algorithms/MachineLearning/Tests/Data.hs b/Algorithms/MachineLearning/Tests/Data.hs
@@ -12,8 +12,8 @@ import Algorithms.MachineLearning.Framework
 -- @
 --
 -- Source: http://research.microsoft.com/~cmbishop/PRML/webdatasets/curvefitting.txt
-sinDataSet :: DataSet
-sinDataSet = dataSetFromSampleList ([
+sinDataSet :: DataSet Double Double
+sinDataSet = dataSetFromSampleList [
     (0.000000, 0.349486),
     (0.111111, 0.830839),
     (0.222222, 1.007332),
@@ -24,4 +24,4 @@ sinDataSet = dataSetFromSampleList ([
     (0.777778, -0.445686),
     (0.888889, -0.563567),
     (1.000000, 0.261502)
-  ] :: [(Double, Double)])
+  ]
diff --git a/Algorithms/MachineLearning/Tests/Driver.hs b/Algorithms/MachineLearning/Tests/Driver.hs
@@ -28,9 +28,9 @@ sumOfSquaresError targetsAndPredictions = sum $ map (abs . uncurry (-)) targetsA
 sampleFunction :: (Double -> Double) -> [(Double, Double)]
 sampleFunction f = map (\(x :: Rational) -> let x' = rationalToDouble x in (x', f x')) [0,0.01..1.0]
 
-evaluate :: (Model model, Show (model Double)) => model Double -> DataSet -> IO ()
+evaluate :: (Model model Double Double, Show model) => model -> DataSet Double Double -> IO ()
 evaluate model true_data = do
-    putStrLn $ "Target Mean = " ++ show (vectorMean (ds_targets true_data))
+    putStrLn $ "Target Mean = " ++ show (vectorMean (head $ toRows $ ds_targets true_data))
     putStrLn $ "Error = " ++ show (modelSumSquaredError model true_data)
 
 plot :: [[(Double, Target)]] -> IO ()
@@ -54,4 +54,4 @@ main = do
     --putStrLn $ "Gamma = " ++ show gamma
 
     -- Show some graphical information about the model
-    plot [dataSetToSampleList used_data, sampleFunction $ predict model, sampleFunction $ (sqrt . predict variance_model)
+    plot [dataSetToSampleList used_data, sampleFunction $ predict model, sampleFunction $ (sqrt . predict variance_model)]
diff --git a/Algorithms/MachineLearning/Utilities.hs b/Algorithms/MachineLearning/Utilities.hs
@@ -8,6 +8,12 @@ import Data.Ord
 import System.Random
 
 
+newtype K a = K { unK :: a }
+
+instance Functor K where
+    fmap f (K x) = K (f x)
+
+
 square :: Num a => a -> a
 square x = x * x
 

diff --git a/machine-learning.cabal b/machine-learning.cabal
@@ -36,6 +36,11 @@ Library
                 Build-Depends:  base < 3
 
         Extensions:             PatternSignatures
+                                MultiParamTypeClasses
+                                FunctionalDependencies
+                                TypeSynonymInstances
+                                FlexibleInstances
+                                FlexibleContexts
         Ghc-Options:            -O2 -fvia-C -Wall
 
 Executable machine-learning-tests
@@ -50,6 +55,11 @@ Executable machine-learning-tests
                 Build-Depends:  base < 3
 
         Extensions:             PatternSignatures
+                                MultiParamTypeClasses
+                                FunctionalDependencies
+                                TypeSynonymInstances
+                                FlexibleInstances
+                                FlexibleContexts
         Ghc-Options:            -O2 -fvia-C -Wall
 
         if !flag(tests)