Skip to content

Commit

Permalink
Properly namespace the base oml library and make its archive correct.
Browse files Browse the repository at this point in the history
  • Loading branch information
dbuenzli committed Oct 10, 2016
1 parent 0ae946c commit 53c2978
Show file tree
Hide file tree
Showing 56 changed files with 123 additions and 86 deletions.
7 changes: 5 additions & 2 deletions .merlin
@@ -1,5 +1,8 @@
PKG kaputt
S ./src/lib/**
PKG lacaml lbfgs ocephes
S src/**
S src-full/**
S test
S scripts
B _build/**
B _driver/**
PKG lacaml lbfgs ocephes
4 changes: 2 additions & 2 deletions src-full/cls/descriminant.ml
Expand Up @@ -4,7 +4,7 @@ open Lacaml.D
open Oml_util
module LU = Lacaml_util

module LDA(Data : Cls_intf.Continuous_encoded_data) = struct
module LDA(Data : Oml_cls_intf.Continuous_encoded_data) = struct
type feature = Data.feature
type clas = Data.clas

Expand Down Expand Up @@ -100,7 +100,7 @@ module LDA(Data : Cls_intf.Continuous_encoded_data) = struct

end (* LDA *)

module QDA(Data : Cls_intf.Continuous_encoded_data) = struct
module QDA(Data : Oml_cls_intf.Continuous_encoded_data) = struct
type feature = Data.feature
type clas = Data.clas

Expand Down
8 changes: 4 additions & 4 deletions src-full/cls/descriminant.mli
Expand Up @@ -5,17 +5,17 @@
then used to model a {{:https://en.wikipedia.org/wiki/Multivariate_normal_distribution}
Multivariate normal distribution}. These, per class, distributions are used
in Bayes's rule for classification. *)
module LDA(D: Cls_intf.Continuous_encoded_data) : sig
include Cls_intf.Generative with type feature = D.feature
module LDA(D: Oml_cls_intf.Continuous_encoded_data) : sig
include Oml_cls_intf.Generative with type feature = D.feature
and type clas = D.clas
and type feature_probability = float

val opt : ?shrinkage:float -> unit -> opt

end

module QDA(D: Cls_intf.Continuous_encoded_data) : sig
include Cls_intf.Generative with type feature = D.feature
module QDA(D: Oml_cls_intf.Continuous_encoded_data) : sig
include Oml_cls_intf.Generative with type feature = D.feature
and type clas = D.clas
and type feature_probability = float

Expand Down
2 changes: 1 addition & 1 deletion src-full/cls/logistic_regression.ml
Expand Up @@ -16,7 +16,7 @@
*)

open Oml_util
open Cls_intf
open Oml_cls_intf

module LrCommon(Data: Continuous_encoded_data) = struct

Expand Down
8 changes: 4 additions & 4 deletions src-full/cls/logistic_regression.mli
Expand Up @@ -30,9 +30,9 @@
A constant [1] is added to all encoded features by [estimate],
there is no need to add one with
{{!val:Cls_intf.Continuous_encoded_data.encoding}encoding}. *)
module Binary(D: Cls_intf.Continuous_encoded_data) :
module Binary(D: Oml_cls_intf.Continuous_encoded_data) :
sig
include Cls_intf.Classifier with type feature = D.feature
include Oml_cls_intf.Classifier with type feature = D.feature
and type clas = D.clas

(** [opt ~lambda ~tolerance ()] a constructor for the optional arguments,
Expand Down Expand Up @@ -67,9 +67,9 @@ module Binary(D: Cls_intf.Continuous_encoded_data) :
A constant [1] is added to all encoded features by [estimate],
there is no need to add one with
{{!val:Cls_intf.Continuous_encoded_data.encoding}encoding}. *)
module Multiclass(D: Cls_intf.Continuous_encoded_data) :
module Multiclass(D: Oml_cls_intf.Continuous_encoded_data) :
sig
include Cls_intf.Classifier with (* type opt := opt
include Oml_cls_intf.Classifier with (* type opt := opt
and*) type feature = D.feature
and type clas = D.clas

Expand Down
10 changes: 5 additions & 5 deletions src-full/cls/oml_naive_bayes.ml
Expand Up @@ -18,10 +18,10 @@
include Oml_lite_naive_bayes
open Oml_util

module Gaussian(Data: Cls_intf.Continuous_encoded_data) = struct
module Gaussian(Data: Oml_cls_intf.Continuous_encoded_data) = struct

module D = Distributions
module O = Online
module O = Oml_online

type samples = (Data.clas * Data.feature) list

Expand Down Expand Up @@ -51,11 +51,11 @@ module Gaussian(Data: Cls_intf.Continuous_encoded_data) = struct
let to_prior (prior, _) = prior in
let to_likelihood (_, lkhd) =
let indices = safe_encoding feature in
Common_naive_bayes.prod_arr2 (fun (mean,std) y ->
Oml_common_naive_bayes.prod_arr2 (fun (mean,std) y ->
if std = 0. then 1. else D.normal_pdf ~mean ~std y)
lkhd indices
in
Common_naive_bayes.eval ~to_prior ~to_likelihood table
Oml_common_naive_bayes.eval ~to_prior ~to_likelihood table

type opt = unit
let default = ()
Expand All @@ -80,7 +80,7 @@ module Gaussian(Data: Cls_intf.Continuous_encoded_data) = struct
let attr_params = Array.map select rsarr in
(c, (class_prior, attr_params)))
in
Common_naive_bayes.estimate "Gaussian"
Oml_common_naive_bayes.estimate "Gaussian"
init update incorporate (module Cm)

end (* Gaussian *)
4 changes: 2 additions & 2 deletions src-full/cls/oml_naive_bayes.mli
Expand Up @@ -22,8 +22,8 @@ include module type of Oml_lite_naive_bayes
Gaussian Naive Bayes} by estimating mean and standard deviations
for each of the quantitative features in the
{{!modtype:Cls_intf.Continuous_encoded_data}encoded data}. *)
module Gaussian(D: Cls_intf.Continuous_encoded_data) : sig
include Cls_intf.Generative with type feature := D.feature
module Gaussian(D: Oml_cls_intf.Continuous_encoded_data) : sig
include Oml_cls_intf.Generative with type feature := D.feature
and type clas := D.clas
and type feature_probability = float array
end
2 changes: 1 addition & 1 deletion src-full/oml_full.ml
Expand Up @@ -20,7 +20,7 @@ module Classification = struct
module Naive_bayes = Oml_naive_bayes
module Logistic_regression = Logistic_regression
module Descriminant = Descriminant
module Performance = Performance
module Performance = Oml.Classification.Performance
end

module Regression = struct
Expand Down
3 changes: 1 addition & 2 deletions src-full/rgr/oml_univariate.ml
Expand Up @@ -15,7 +15,7 @@
limitations under the License.
*)

include Common_univariate
include Oml_common_univariate

let confidence_interval, prediction_interval =
let interval a lrm ~alpha x =
Expand Down Expand Up @@ -49,4 +49,3 @@ let beta_test ?(null=0.0) t =

let coefficient_tests ?null t =
[| alpha_test ?null t ; beta_test ?null t |]

2 changes: 1 addition & 1 deletion src-full/stats/hypothesis_test.ml
Expand Up @@ -16,7 +16,7 @@
*)

open Oml_util
open Descriptive
open Oml_descriptive
open Distributions
module F = Oml_functions

Expand Down
6 changes: 2 additions & 4 deletions src-full/stats/hypothesis_test.mli
Expand Up @@ -26,7 +26,7 @@
we can still create a prediction interval based off of basic sampled
statistics and Student's distribution.
*)
val prediction_interval : Descriptive.summary -> float -> float * float
val prediction_interval : Oml_descriptive.summary -> float -> float * float

(** A hypothesis test. *)
type t =
Expand Down Expand Up @@ -54,7 +54,7 @@ type null_hypothesis =
hypothesis, where [d] is the difference between population parameter and
the observed value, [e] is the standard error of the observed value, and
[k] is the degrees of freedom in the statistical procedure.
One may think of this as a principled way to test the signal (diff)
to noise (error) seen in a sample of data. *)
val t_test : null_hypothesis -> degrees_of_freedom:int -> diff:float
Expand All @@ -81,5 +81,3 @@ val means_different_variance_test : null_hypothesis -> float array
(** [variance_ratio_test sample1 sample2] tests the data in [sample1] and
[sample2] have the same variance based on F-test.*)
val variance_ratio_test : float array -> float array -> t


7 changes: 4 additions & 3 deletions src-full/stats/oml_functions.ml
Expand Up @@ -52,15 +52,16 @@ let rec regularized_beta ~alpha:a ~beta:b ?epsilon ?max_iterations =
((a +. (2. *. m)) *. (a +. (2. *. m) +. 1.0)) in
let get_a _n _x = 1.0 in
let log_beta = ln_beta a b in
let fraction = Continued_fraction.init ~get_a ~get_b in fun x ->
let fraction = Oml_continued_fraction.init ~get_a ~get_b in fun x ->
if Oml_util.is_nan x || Oml_util.is_nan a || Oml_util.is_nan b ||
x < 0.0 || x > 1.0 || a <= 0.0 || b <= 0.0 then nan
else if (x > (a +. 1.) /. (2. +. b +. a) &&
1. -. x <= (b +. 1.) /. (2. +. b +. a))
then 1. -. regularized_beta ~alpha:b ~beta:a ?epsilon ?max_iterations (1. -. x)
else exp ((a *. log x) +. (b *. log1p (-.x)) -.
log a -. log_beta) *.
1.0 /. Continued_fraction.evaluate fraction ?epsilon ?max_iterations x
log a -. log_beta) *.
1.0 /.
Oml_continued_fraction.evaluate fraction ?epsilon ?max_iterations x

let chi_square_less num_observations chi_square =
regularized_lower_gamma ~a:((float num_observations) /. 2.0) (chi_square /. 2.0)
Expand Down
2 changes: 1 addition & 1 deletion src/cls/cls_intf.ml → src/cls/oml_cls_intf.ml
Expand Up @@ -95,7 +95,7 @@ module type Classifier = sig

(** [eval classifier feature] assign {{!Probabilities.t}probabilities} to the
possible {{!type:Data.clas}classes} based upon [feature]. *)
val eval : t -> feature -> clas Probabilities.t
val eval : t -> feature -> clas Oml_probabilities.t

(** Representing training data. *)
type samples = (clas * feature) list
Expand Down
File renamed without changes.
23 changes: 12 additions & 11 deletions src/cls/oml_lite_naive_bayes.ml
Expand Up @@ -17,7 +17,7 @@

open Oml_util

module Binomial(Data: Cls_intf.Dummy_encoded_data) = struct
module Binomial(Data: Oml_cls_intf.Dummy_encoded_data) = struct

type feature = Data.feature
type clas = Data.clas
Expand Down Expand Up @@ -51,16 +51,16 @@ module Binomial(Data: Cls_intf.Dummy_encoded_data) = struct
let idx = safe_encoding b in
if nb.e_bernoulli then
let set = Array.to_list idx in
Common_naive_bayes.prod_arr (fun i ->
Oml_common_naive_bayes.prod_arr (fun i ->
if List.mem i ~set then
class_probs.(i)
else
(1.0 -. class_probs.(i)))
(Array.init Data.size (fun x -> x))
else
Common_naive_bayes.prod_arr (fun i -> class_probs.(i)) idx
Oml_common_naive_bayes.prod_arr (fun i -> class_probs.(i)) idx
in
Common_naive_bayes.eval ~to_prior ~to_likelihood nb.table
Oml_common_naive_bayes.eval ~to_prior ~to_likelihood nb.table

module Cm = Map.Make(struct type t = clas let compare = compare end)

Expand All @@ -75,7 +75,7 @@ module Binomial(Data: Cls_intf.Dummy_encoded_data) = struct
arr
in
let incorporate all num_classes totalf =
let to_prob = Common_naive_bayes.smoothing_to_prob opt.smoothing in
let to_prob = Oml_common_naive_bayes.smoothing_to_prob opt.smoothing in
List.map all ~f:(fun (cl, attr_count) ->
let prior_count = float attr_count.(Data.size) in
let likelihood =
Expand All @@ -88,7 +88,7 @@ module Binomial(Data: Cls_intf.Dummy_encoded_data) = struct
cl, likelihood)
in
let table =
Common_naive_bayes.estimate "Binomial"
Oml_common_naive_bayes.estimate "Binomial"
init update incorporate (module Cm) ?classes data
in
{table ; e_bernoulli = opt.bernoulli}
Expand All @@ -103,7 +103,7 @@ module Binomial(Data: Cls_intf.Dummy_encoded_data) = struct

end (* Binomial *)

module Categorical(Data: Cls_intf.Category_encoded_data) = struct
module Categorical(Data: Oml_cls_intf.Category_encoded_data) = struct

type feature = Data.feature
type clas = Data.clas
Expand Down Expand Up @@ -140,9 +140,10 @@ module Categorical(Data: Cls_intf.Category_encoded_data) = struct
let to_prior (prior, _) = prior in
let to_likelihood (_, ftr_prob) =
let indices = safe_encoding feature in
Common_naive_bayes.prod_arr2 (fun i lk_arr -> lk_arr.(i)) indices ftr_prob
Oml_common_naive_bayes.prod_arr2
(fun i lk_arr -> lk_arr.(i)) indices ftr_prob
in
Common_naive_bayes.eval ~to_prior ~to_likelihood table
Oml_common_naive_bayes.eval ~to_prior ~to_likelihood table

module Cm = Map.Make(struct type t = clas let compare = compare end)

Expand All @@ -154,7 +155,7 @@ module Categorical(Data: Cls_intf.Category_encoded_data) = struct
(c + 1, arr)
in
let incorporate all num_classes totalf =
let to_prob = Common_naive_bayes.smoothing_to_prob opt in
let to_prob = Oml_common_naive_bayes.smoothing_to_prob opt in
List.map all ~f:(fun (cl, (class_count, attr_count)) ->
let prior = to_prob (float class_count) totalf (float num_classes) in
let likelihood =
Expand All @@ -167,7 +168,7 @@ module Categorical(Data: Cls_intf.Category_encoded_data) = struct
in
cl, (prior, likelihood))
in
Common_naive_bayes.estimate "Categorical" init update incorporate
Oml_common_naive_bayes.estimate "Categorical" init update incorporate
(module Cm)

end (* Categorical *)
8 changes: 4 additions & 4 deletions src/cls/oml_lite_naive_bayes.mli
Expand Up @@ -19,8 +19,8 @@
{{:https://en.wikipedia.org/wiki/Naive_Bayes_classifier}Naive Bayes}
classifier on data encoded using
{{!modtype:Cls_intf.Dummy_encoded_data}Dummy variables.} *)
module Binomial(D: Cls_intf.Dummy_encoded_data) : sig
include Cls_intf.Generative with type feature = D.feature
module Binomial(D: Oml_cls_intf.Dummy_encoded_data) : sig
include Oml_cls_intf.Generative with type feature = D.feature
and type clas = D.clas
and type feature_probability = float array

Expand All @@ -45,8 +45,8 @@ end
{{:https://en.wikipedia.org/wiki/Naive_Bayes_classifier}Naive Bayes}
classifier on data encoded using
{{!modtype:Cls_intf.Category_encoded_data}Categorical variables.} *)
module Categorical(D: Cls_intf.Category_encoded_data) : sig
include Cls_intf.Generative with type feature = D.feature
module Categorical(D: Oml_cls_intf.Category_encoded_data) : sig
include Oml_cls_intf.Generative with type feature = D.feature
and type clas = D.clas
and type feature_probability = float array

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 9 additions & 9 deletions src/oml.ml
Expand Up @@ -3,24 +3,24 @@ module Util = Oml_util
module Uncategorized = Oml_lite_uncategorized

module Statistics = struct
module Continued_fraction = Continued_fraction
module Continued_fraction = Oml_continued_fraction
module Functions = Oml_lite_functions
module Sampling = Sampling
module Descriptive = Descriptive
module Measures = Measures
module Sampling = Oml_sampling
module Descriptive = Oml_descriptive
module Measures = Oml_measures
end

module Online = Online
module Online = Oml_online

module Classification = struct
module Intf = Cls_intf
module Probabilities = Probabilities
module Intf = Oml_cls_intf
module Probabilities = Oml_probabilities
module Naive_bayes = Oml_lite_naive_bayes
module Performance = Performance
module Performance = Oml_performance
end

module Regression = struct
module Intf = Oml_lite_intf
module Univariate = Oml_lite_univariate
module Interpolate = Interpolate
module Interpolate = Oml_interpolate
end
20 changes: 11 additions & 9 deletions src/oml.mli
Expand Up @@ -12,29 +12,31 @@ end

(** Inference, parameter estimation.*)
module Statistics : sig
module Continued_fraction : sig include module type of Continued_fraction end
module Continued_fraction : sig include
module type of Oml_continued_fraction
end
module Functions : sig include module type of Oml_lite_functions end
module Sampling : sig include module type of Sampling end
module Descriptive : sig include module type of Descriptive end
module Measures : sig include module type of Measures end
module Sampling : sig include module type of Oml_sampling end
module Descriptive : sig include module type of Oml_descriptive end
module Measures : sig include module type of Oml_measures end
end

(** Compute running statitics using recurrence equations. *)
module Online : sig include module type of Online end
module Online : sig include module type of Oml_online end

(** Classify data based on features. *)
module Classification : sig
module Intf : sig include module type of Cls_intf end
module Probabilities : sig include module type of Probabilities end
module Intf : sig include module type of Oml_cls_intf end
module Probabilities : sig include module type of Oml_probabilities end
module Naive_bayes : sig
include module type of Oml_lite_naive_bayes
end
module Performance : sig include module type of Performance end
module Performance : sig include module type of Oml_performance end
end

(** Model relationship between variables. *)
module Regression : sig
module Intf : sig include module type of Oml_lite_intf end
module Univariate : sig include module type of Oml_lite_univariate end
module Interpolate : sig include module type of Interpolate end
module Interpolate : sig include module type of Oml_interpolate end
end

0 comments on commit 53c2978

Please sign in to comment.