Permalink
Browse files

Improved DataSet API

  • Loading branch information...
1 parent b10cfae commit 68f34e616ce5b9b9d6a155f9d0fd78ddfe170820 @batterseapower committed Aug 17, 2008
@@ -91,11 +91,20 @@ dataSetInputs ds = map fromVector $ toRows $ ds_inputs ds
dataSetTargets :: DataSet -> [Target]
dataSetTargets ds = toList $ ds_targets ds
-binDS :: StdGen -> Int -> DataSet -> [DataSet]
-binDS gen bins ds = map dataSetFromSampleList $ chunk (ceiling $ (fromIntegral $ length samples :: Double) / (fromIntegral bins)) shuffled_samples
+dataSetInputLength :: DataSet -> Int
+dataSetInputLength ds = cols (ds_inputs ds)
+
+dataSetSize :: DataSet -> Int
+dataSetSize ds = rows (ds_inputs ds)
+
+binDataSet :: StdGen -> Int -> DataSet -> [DataSet]
+binDataSet gen bins ds = map dataSetFromSampleList $ chunk bin_size shuffled_samples
where
- samples = zip (toRows $ ds_inputs ds) (toList $ ds_targets ds)
- shuffled_samples = shuffle gen samples
+ shuffled_samples = shuffle gen (dataSetToSampleList ds :: [(Vector Double, Target)])
+ bin_size = ceiling $ (fromIntegral $ dataSetSize ds :: Double) / (fromIntegral bins)
+
+sampleDataSet :: StdGen -> Int -> DataSet -> DataSet
+sampleDataSet gen n ds = dataSetFromSampleList (sample gen n (dataSetToSampleList ds :: [(Vector Double, Target)]))
--
-- Models
@@ -139,7 +139,7 @@ regressEMBayesianLinearModel
regressEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
= loop initial_alpha initial_beta eps False
where
- n = fromIntegral $ rows (ds_inputs ds) -- Number of samples
+ n = fromIntegral $ dataSetSize ds
design_matrix = regressDesignMatrix basis_fns (ds_inputs ds)
(unscaled_eigenvalues, _) = eigSH (trans design_matrix <> design_matrix)
@@ -180,7 +180,7 @@ regressFullyDeterminedEMBayesianLinearModel
regressFullyDeterminedEMBayesianLinearModel initial_alpha initial_beta basis_fns ds
= loop initial_alpha initial_beta eps False
where
- n = fromIntegral $ rows (ds_inputs ds) -- Number of samples
+ n = fromIntegral $ dataSetSize ds
m = fromIntegral $ length basis_fns
design_matrix = regressDesignMatrix basis_fns (ds_inputs ds)
@@ -22,8 +22,8 @@ basisFunctions = const 1 : map (\mean -> gaussianBasis (rationalToDouble mean) 0
sumOfSquaresError :: [(Double, Double)] -> Double
sumOfSquaresError targetsAndPredictions = sum $ map (abs . uncurry (-)) targetsAndPredictions
-sample :: (Double -> Double) -> [(Double, Double)]
-sample f = map (\(x :: Rational) -> let x' = rationalToDouble x in (x', f x')) [0,0.01..1.0]
+sampleFunction :: (Double -> Double) -> [(Double, Double)]
+sampleFunction f = map (\(x :: Rational) -> let x' = rationalToDouble x in (x', f x')) [0,0.01..1.0]
evaluate :: (Model model, Show (model Double)) => model Double -> DataSet -> IO ()
evaluate model true_data = do
@@ -42,12 +42,13 @@ plot sampless = do
main :: IO ()
main = do
gen <- newStdGen
- let used_data = head $ binDS gen 2 sinDataSet
+ let --used_data = sinDataSet
+ used_data = sampleDataSet gen 5 sinDataSet
(model, variance_model, gamma) = regressEMBayesianLinearModel 5 (1 / 0.3) basisFunctions used_data
-- Show some model statistics
evaluate model used_data
print $ "Gamma = " ++ show gamma
-- Show some graphical information about the model
- plot [dataSetToSampleList used_data, sample $ predict model, sample $ predict variance_model]
+ plot [dataSetToSampleList used_data, sampleFunction $ predict model, sampleFunction $ predict variance_model]
@@ -35,5 +35,8 @@ chunk n xs = this : chunk n rest
where
(this, rest) = splitAt n xs
+sample :: StdGen -> Int -> [a] -> [a]
+sample gen n xs = take n (shuffle gen xs)
+
eqWithin :: Double -> Double -> Double -> Bool
eqWithin jitter left right = abs (left - right) < jitter

0 comments on commit 68f34e6

Please sign in to comment.