Permalink
Browse files

Generalize linear regression algorithms to multiple target variables …

…in preperation for linear classification
  • Loading branch information...
1 parent 5323a94 commit fdd4ac5a6d97c4ab8d28d937197aa2c4be51dd73 @batterseapower committed Sep 12, 2008
@@ -1,5 +1,3 @@
-{-# LANGUAGE FlexibleInstances #-}
-
-- | The "framework" provides all the core classes and types used ubiquitously by
-- the machine learning algorithms.
module Algorithms.MachineLearning.Framework where
@@ -70,50 +68,74 @@ instance Vectorable (Vector Double) where
-- Labelled data set
--
-data DataSet = DataSet {
+data DataSet input target = DataSet {
ds_inputs :: Matrix Double, -- One row per sample, one column per input variable
- ds_targets :: Vector Target -- One row per sample, each value being a single target variable
+ ds_targets :: Matrix Target -- One row per sample, one column per target variable
}
-dataSetFromSampleList :: Vectorable input => [(input, Target)] -> DataSet
+dataSetFromSampleList :: (Vectorable input, Vectorable target) => [(input, target)] -> DataSet input target
dataSetFromSampleList elts
= DataSet {
ds_inputs = fromRows $ map (toVector . fst) elts,
- ds_targets = fromList $ map snd elts
+ ds_targets = fromRows $ map (toVector . snd) elts
}
-dataSetToSampleList :: Vectorable input => DataSet -> [(input, Target)]
+dataSetToSampleList :: (Vectorable input, Vectorable target) => DataSet input target -> [(input, target)]
dataSetToSampleList ds = zip (dataSetInputs ds) (dataSetTargets ds)
-dataSetInputs :: Vectorable input => DataSet -> [input]
+dataSetInputs :: Vectorable input => DataSet input target -> [input]
dataSetInputs ds = map fromVector $ toRows $ ds_inputs ds
-dataSetTargets :: DataSet -> [Target]
-dataSetTargets ds = toList $ ds_targets ds
+dataSetTargets :: Vectorable target => DataSet input target -> [target]
+dataSetTargets ds = map fromVector $ toRows $ ds_targets ds
-dataSetInputLength :: DataSet -> Int
+dataSetInputLength :: DataSet input target -> Int
dataSetInputLength ds = cols (ds_inputs ds)
-dataSetSize :: DataSet -> Int
+dataSetSize :: DataSet input target -> Int
dataSetSize ds = rows (ds_inputs ds)
-binDataSet :: StdGen -> Int -> DataSet -> [DataSet]
-binDataSet gen bins ds = map dataSetFromSampleList $ chunk bin_size shuffled_samples
+binDataSet :: StdGen -> Int -> DataSet input target -> [DataSet input target]
+binDataSet gen bins = transformDataSetAsVectors binDataSet'
where
- shuffled_samples = shuffle gen (dataSetToSampleList ds :: [(Vector Double, Target)])
- bin_size = ceiling $ (fromIntegral $ dataSetSize ds :: Double) / (fromIntegral bins)
+ binDataSet' ds = map dataSetFromSampleList $ chunk bin_size shuffled_samples
+ where
+ shuffled_samples = shuffle gen (dataSetToSampleList ds)
+ bin_size = ceiling $ (fromIntegral $ dataSetSize ds :: Double) / (fromIntegral bins)
+
+sampleDataSet :: StdGen -> Int -> DataSet input target -> DataSet input target
+sampleDataSet gen n = unK . transformDataSetAsVectors (K . dataSetFromSampleList . sample gen n . dataSetToSampleList)
+
+transformDataSetAsVectors :: Functor f => (DataSet (Vector Double) (Vector Double) -> f (DataSet (Vector Double) (Vector Double))) -> DataSet input target -> f (DataSet input target)
+transformDataSetAsVectors transform input = fmap castDataSet (transform (castDataSet input))
+ where
+ castDataSet :: DataSet input1 target1 -> DataSet input2 target2
+ castDataSet ds = DataSet {
+ ds_inputs = ds_inputs ds,
+ ds_targets = ds_targets ds
+ }
+
+--
+-- Metric spaces
+--
+
+class MetricSpace a where
+ distance :: a -> a -> Double
+
+instance MetricSpace Double where
+ distance x y = abs (x - y)
-sampleDataSet :: StdGen -> Int -> DataSet -> DataSet
-sampleDataSet gen n ds = dataSetFromSampleList (sample gen n (dataSetToSampleList ds :: [(Vector Double, Target)]))
+instance MetricSpace (Vector Double) where
+ distance = (<.>)
--
-- Models
--
-class Model model where
- predict :: Vectorable input => model input -> input -> Target
+class (Vectorable input, Vectorable target) => Model model input target | model -> input target where
+ predict :: model -> input -> target
-modelSumSquaredError :: (Model model, Vectorable input) => model input -> DataSet -> Double
-modelSumSquaredError model ds = error_vector <.> error_vector
+modelSumSquaredError :: (Model model input target, MetricSpace target) => model -> DataSet input target -> Double
+modelSumSquaredError model ds = sum [sample_error * sample_error | sample_error <- sample_errors]
where
- error_vector = ds_targets ds - fromList (map (predict model) (dataSetInputs ds))
+ sample_errors = zipWith (\x y -> x `distance` y) (dataSetTargets ds) (map (predict model) (dataSetInputs ds))
@@ -66,4 +66,15 @@ sumColumns m = constant 1 (rows m) <> m
-- | Create a constant matrix of the given dimension, analagously to 'constant'.
constantM :: Element a => a -> Int -> Int -> Matrix a
-constantM elt row_count col_count = reshape row_count (constant elt (row_count * col_count))
+constantM elt row_count col_count = reshape row_count (constant elt (row_count * col_count))
+
+matrixToVector :: Element a => Matrix a -> Vector a
+matrixToVector m
+ | rows m == 1 -- Row vector
+ || cols m == 1 -- Column vector
+ = flatten m
+ | otherwise
+ = error "matrixToVector: matrix is neither a row or column vector"
+
+trace :: Element a => Matrix a -> a
+trace = vectorSum . takeDiag
@@ -1,5 +1,3 @@
-{-# LANGUAGE PatternSignatures #-}
-
-- | Linear regression models, as discussed in chapter 3 of Bishop.
module Algorithms.MachineLearning.LinearRegression (
LinearModel,
@@ -12,22 +10,23 @@ import Algorithms.MachineLearning.LinearAlgebra
import Algorithms.MachineLearning.Utilities
-data LinearModel input = LinearModel {
- lm_basis_fns :: [input -> Target],
- lm_weights :: Vector Weight
+data LinearModel input target = LinearModel {
+ lm_basis_fns :: [input -> Double],
+ lm_weights :: Matrix Weight -- One column per target variable,
+ -- one row per basis function output
}
-instance Show (LinearModel input) where
+instance Show (LinearModel input target) where
show model = "Weights: " ++ show (lm_weights model)
-instance Model LinearModel where
- predict model input = (lm_weights model) <.> phi_app_x
+instance (Vectorable input, Vectorable target) => Model (LinearModel input target) input target where
+ predict model input = fromVector $ (trans $ lm_weights model) <> phi_app_x
where
phi_app_x = applyVector (lm_basis_fns model) input
data BayesianVarianceModel input = BayesianVarianceModel {
- bvm_basis_fns :: [input -> Target],
+ bvm_basis_fns :: [input -> Double],
bvm_inv_hessian :: Matrix Weight, -- Equivalent to the weight distribution covariance matrix
bvm_beta :: Precision
}
@@ -36,13 +35,13 @@ instance Show (BayesianVarianceModel input) where
show model = "Inverse Hessian: " ++ show (bvm_inv_hessian model) ++ "\n" ++
"Beta: " ++ show (bvm_beta model)
-instance Model BayesianVarianceModel where
- predict model input = recip (bvm_beta model) + (phi_app_x <> bvm_inv_hessian model) <.> phi_app_x --phi_app_x <.> (bvm_inv_hessian model <> phi_app_x)
+instance (Vectorable input) => Model (BayesianVarianceModel input) input Variance where
+ predict model input = recip (bvm_beta model) + (phi_app_x <> bvm_inv_hessian model) <.> phi_app_x
where
phi_app_x = applyVector (bvm_basis_fns model) input
-regressDesignMatrix :: (Vectorable input) => [input -> Target] -> Matrix Double -> Matrix Double
+regressDesignMatrix :: (Vectorable input) => [input -> Double] -> Matrix Double -> Matrix Double
regressDesignMatrix basis_fns inputs
= applyMatrix (map (. fromVector) basis_fns) inputs -- One row per sample, one column per basis function
@@ -68,7 +67,7 @@ regularizedPrePinv alpha beta phi = inv $ (alpha .* (ident (cols phi))) + (beta
--
-- Equation 3.15 in Bishop.
regressLinearModel
- :: (Vectorable input) => [input -> Target] -> DataSet -> LinearModel input
+ :: (Vectorable input) => [input -> Double] -> DataSet input target -> LinearModel input target
regressLinearModel basis_fns ds = LinearModel { lm_basis_fns = basis_fns, lm_weights = weights }
where
design_matrix = regressDesignMatrix basis_fns (ds_inputs ds)
@@ -84,15 +83,15 @@ regressLinearModel basis_fns ds = LinearModel { lm_basis_fns = basis_fns, lm_wei
--
-- Equation 3.28 in Bishop.
regressRegularizedLinearModel
- :: (Vectorable input) => RegularizationCoefficient -> [input -> Target] -> DataSet -> LinearModel input
+ :: (Vectorable input) => RegularizationCoefficient -> [input -> Double] -> DataSet input target -> LinearModel input target
regressRegularizedLinearModel lambda basis_fns ds = LinearModel { lm_basis_fns = basis_fns, lm_weights = weights }
where
design_matrix = regressDesignMatrix basis_fns (ds_inputs ds)
weights = regularizedPinv lambda design_matrix <> ds_targets ds
-- | Determine the mean weight and inverse hessian matrix given alpha, beta, the design matrix and the targets.
-bayesianPosteriorParameters :: Precision -> Precision -> Matrix Double -> Vector Double -> (Vector Double, Matrix Double)
+bayesianPosteriorParameters :: Precision -> Precision -> Matrix Double -> Matrix Double -> (Matrix Double, Matrix Double)
bayesianPosteriorParameters alpha beta design_matrix targets = (weights, inv_hessian)
where
inv_hessian = regularizedPrePinv alpha beta design_matrix
@@ -106,12 +105,16 @@ bayesianPosteriorParameters alpha beta design_matrix targets = (weights, inv_hes
-- lambda = alpha / beta. However, the twist is that we can use our knowledge of the prior to also make an estimate
-- for the variance of the true value about any input point.
--
+-- For the case of multiple target variables, this function makes the naive Bayesian assumption that the probability
+-- distributions on output variables are independent, and takes as an error metric the unweighted sum-squared error
+-- in all the targets. The variance model is common to all the target variables.
+--
-- Equations 3.53, 3.54 and 3.59 in Bishop.
regressBayesianLinearModel
:: (Vectorable input)
=> Precision -- ^ Precision of Gaussian weight prior
-> Precision -- ^ Precision of noise on samples
- -> [input -> Target] -> DataSet -> (LinearModel input, BayesianVarianceModel input)
+ -> [input -> Double] -> DataSet input target -> (LinearModel input target, BayesianVarianceModel input)
regressBayesianLinearModel alpha beta basis_fns ds
= (LinearModel { lm_basis_fns = basis_fns, lm_weights = weights },
BayesianVarianceModel { bvm_basis_fns = basis_fns, bvm_inv_hessian = inv_hessian, bvm_beta = beta })
@@ -130,12 +133,16 @@ regressBayesianLinearModel alpha beta basis_fns ds
--
-- As a bonus, this function returns gamma, the effective number of parameters used by the regressed model.
--
+-- For the case of multiple target variables, this function makes the naive Bayesian assumption that the probability
+-- distributions on output variables are independent, and takes as an error metric the unweighted sum-squared error
+-- in all the targets. The variance model is common to all the target variables.
+--
-- Equations 3.87, 3.92 and 3.95 in Bishop.
regressEMBayesianLinearModel
- :: (Vectorable input)
+ :: (Vectorable input, Vectorable target, MetricSpace target)
=> Precision -- ^ Initial estimate of Gaussian weight prior
-> Precision -- ^ Initial estimate for precision of noise on samples
- -> [input -> Target] -> DataSet -> (LinearModel input, BayesianVarianceModel input, EffectiveNumberOfParameters)
+ -> [input -> Double] -> DataSet input target -> (LinearModel input target, BayesianVarianceModel input, EffectiveNumberOfParameters)
regressEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
= convergeOnEMBayesianLinearModel loopWorker design_matrix initial_alpha initial_beta basis_fns ds
where
@@ -164,12 +171,16 @@ regressEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
-- that all the parameters are determined, the returned gamma is always just the number of basis functions (and
-- hence weights).
--
+-- For the case of multiple target variables, this function makes the naive Bayesian assumption that the probability
+-- distributions on output variables are independent, and takes as an error metric the unweighted sum-squared error
+-- in all the targets. The variance model is common to all the target variables.
+--
-- Equations 3.98 and 3.99 in Bishop.
regressFullyDeterminedEMBayesianLinearModel
- :: (Vectorable input)
+ :: (Vectorable input, Vectorable target, MetricSpace target)
=> Precision -- ^ Initial estimate of Gaussian weight prior
-> Precision -- ^ Initial estimate for precision of noise on samples
- -> [input -> Target] -> DataSet -> (LinearModel input, BayesianVarianceModel input, EffectiveNumberOfParameters)
+ -> [input -> Double] -> DataSet input target -> (LinearModel input target, BayesianVarianceModel input, EffectiveNumberOfParameters)
regressFullyDeterminedEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
= convergeOnEMBayesianLinearModel loopWorker design_matrix initial_alpha initial_beta basis_fns ds
where
@@ -183,14 +194,14 @@ regressFullyDeterminedEMBayesianLinearModel initial_alpha initial_beta basis_fns
loopWorker _ _ = (n, m)
convergeOnEMBayesianLinearModel
- :: (Vectorable input)
+ :: (Vectorable input, Vectorable target, MetricSpace target)
=> (Precision -> Precision -> (Double, EffectiveNumberOfParameters)) -- ^ Loop worker: given alpha and beta, return new beta numerator and gamma
-> Matrix Double -- ^ Design matrix
-> Precision -- ^ Initial alpha
-> Precision -- ^ Initial beta
- -> [input -> Target] -- ^ Basis functions
- -> DataSet
- -> (LinearModel input, BayesianVarianceModel input, EffectiveNumberOfParameters)
+ -> [input -> Double] -- ^ Basis functions
+ -> DataSet input target
+ -> (LinearModel input target, BayesianVarianceModel input, EffectiveNumberOfParameters)
convergeOnEMBayesianLinearModel loop_worker design_matrix initial_alpha initial_beta basis_fns ds
= loop eps initial_alpha initial_beta False
where
@@ -203,5 +214,15 @@ convergeOnEMBayesianLinearModel loop_worker design_matrix initial_alpha initial_
(beta_numerator, gamma) = loop_worker alpha beta
- alpha' = gamma / (weights <.> weights)
+ -- This alpha computation is not the most efficient way to get the result, but it is idiomatic.
+ -- This is the modification to the algorithm in Bishop that generalises the result to the case
+ -- of multiple target variables, but to prove that this is the right thing to do I had to make the
+ -- naive Bayesian assumption.
+ --
+ -- The reason that this is correct because under naive Bayes:
+ --
+ -- dE(W) K T T
+ -- ------- = \Sigma W * W = Tr (W * W)
+ -- d\alpha k = 1 k k
+ alpha' = gamma / (trace $ (trans weights) <> weights)
beta' = beta_numerator / modelSumSquaredError linear_model ds
@@ -12,8 +12,8 @@ import Algorithms.MachineLearning.Framework
-- @
--
-- Source: http://research.microsoft.com/~cmbishop/PRML/webdatasets/curvefitting.txt
-sinDataSet :: DataSet
-sinDataSet = dataSetFromSampleList ([
+sinDataSet :: DataSet Double Double
+sinDataSet = dataSetFromSampleList [
(0.000000, 0.349486),
(0.111111, 0.830839),
(0.222222, 1.007332),
@@ -24,4 +24,4 @@ sinDataSet = dataSetFromSampleList ([
(0.777778, -0.445686),
(0.888889, -0.563567),
(1.000000, 0.261502)
- ] :: [(Double, Double)])
+ ]
@@ -28,9 +28,9 @@ sumOfSquaresError targetsAndPredictions = sum $ map (abs . uncurry (-)) targetsA
sampleFunction :: (Double -> Double) -> [(Double, Double)]
sampleFunction f = map (\(x :: Rational) -> let x' = rationalToDouble x in (x', f x')) [0,0.01..1.0]
-evaluate :: (Model model, Show (model Double)) => model Double -> DataSet -> IO ()
+evaluate :: (Model model Double Double, Show model) => model -> DataSet Double Double -> IO ()
evaluate model true_data = do
- putStrLn $ "Target Mean = " ++ show (vectorMean (ds_targets true_data))
+ putStrLn $ "Target Mean = " ++ show (vectorMean (head $ toRows $ ds_targets true_data))
putStrLn $ "Error = " ++ show (modelSumSquaredError model true_data)
plot :: [[(Double, Target)]] -> IO ()
@@ -54,4 +54,4 @@ main = do
--putStrLn $ "Gamma = " ++ show gamma
-- Show some graphical information about the model
- plot [dataSetToSampleList used_data, sampleFunction $ predict model, sampleFunction $ (sqrt . predict variance_model)
+ plot [dataSetToSampleList used_data, sampleFunction $ predict model, sampleFunction $ (sqrt . predict variance_model)]
@@ -8,6 +8,12 @@ import Data.Ord
import System.Random
+newtype K a = K { unK :: a }
+
+instance Functor K where
+ fmap f (K x) = K (f x)
+
+
square :: Num a => a -> a
square x = x * x
View
@@ -36,6 +36,11 @@ Library
Build-Depends: base < 3
Extensions: PatternSignatures
+ MultiParamTypeClasses
+ FunctionalDependencies
+ TypeSynonymInstances
+ FlexibleInstances
+ FlexibleContexts
Ghc-Options: -O2 -fvia-C -Wall
Executable machine-learning-tests
@@ -50,6 +55,11 @@ Executable machine-learning-tests
Build-Depends: base < 3
Extensions: PatternSignatures
+ MultiParamTypeClasses
+ FunctionalDependencies
+ TypeSynonymInstances
+ FlexibleInstances
+ FlexibleContexts
Ghc-Options: -O2 -fvia-C -Wall
if !flag(tests)

0 comments on commit fdd4ac5

Please sign in to comment.