In [8]:
open Core_kernel

let build_counts () =
  In_channel.fold_lines In_channel.stdin ~init:[] ~f:(fun counts line ->
    let count =
      match List.Assoc.find ~equal:String.equal counts line with
      | None -> 0
      | Some x -> x
    in
    List.Assoc.add ~equal:String.equal counts line (count + 1)
  )

let () =
  build_counts ()
  |> List.sort ~cmp:(fun (_,x) (_,y) -> Int.descending x y)
  |> (fun l -> List.take l 10)
  |> List.iter ~f:(fun (line,count) -> printf "%3d: %s\n" count line)

val build_counts :
  unit -> (Core_kernel.String.t, int) Core_kernel.List.Assoc.t = <fun>


In [5]:
let build_counts () = 
In_channel.fold_lines stdin ~init:[] ~f:(fun counts line -> 
let count = 
match List.Assoc.find counts line with
| None -> 0
| Some x -> x
in
List.Assoc.add counts line (count + 1)
)

File "[5]", line 2, characters 22-27:
[since 2016-04] Use [In_channel.stdin]


error: compile_error

In [6]:
let assoc = [("one", 1); ("two", 2); ("three",3)]

val assoc : (string * int) list = [("one", 1); ("two", 2); ("three", 3)]


In [7]:
List.assoc.find "four" 4

error: compile_error

In [10]:
List.Assoc.add assoc "two"  4 (* overwrite an existing key *) ;;

- : equal:(string -> string -> bool) ->
    (string, int) Core_kernel.List.Assoc.t
= <fun>


In [13]:
let () = print_string "something"

In [14]:
()

- : unit = ()


In [16]:
print_string "few"

- : unit = ()


let is the keyword used to define new variables, like in the following construct:

let pattern = expr

assigns the value 2 to the name a. (Note this is not a way to assign a value to an already existing variable, but this is another topic).

But the pattern to the left of the = sign can be more than just a name. For instance

In [17]:
let (a,b) = (42,"foo")

val a : int = 42
val b : string = "foo"


defines both a and b, to be respectively 42 and "foo". Of course, the types on both sides must match. Which is the case here: both sides are of type int * string.

The expressions to the right of the = sign can also be elaborated, for instance

defines both a and b, to be respectively 42 and "foo". Of course, the types on both sides must match. Which is the case here: both sides are of type int * string.

The expressions to the right of the = sign can also be elaborated, for instance

In [18]:
let foo =
  let temp = String.make 10 'a' in
  temp.[2] <- 'b';
  temp

File "[18]", line 3, characters 2-17:
[since 2017-10] Use [Bytes.set] instead


val foo : Core_kernel.String.t = "aabaaaaaaa"


defines foo as the string "aabaaaaaaa". (As a side note, it also ensures that temp is local to this code snippet).

Now, let's use both: on the left, a pattern matching values of type unit, and on the right, an expression of type unit:

In [19]:
let () = Printf.printf "Hello world!\n"

Which explains the let () = construct. Now, about the let _, one simply needs to know that _ can be used in a pattern as a wildcard: it matches values of any type and does not bind any name. For instance

In [20]:
let (a,_) = (42,"foo")

val a : int = 42


defines a as 42, and discards the value "foo". _ means "I know there is something here and I explicitly say I will not use it, so I don't name it". Here _ was used to match values of type string, but it can match value of any type, like int * string:

which does not define any variable and is not very useful. Such constructs are useful when the right hand side has side effects, like this:

In [21]:
let _ = Printf.printf "Hello world!\n"

- : unit = ()


which explains the second part of the question.

Both are used and it's rather a matter of taste whether to use one or the other.

let () = is slightly safer as it has the compiler check that the right hand side is of type unit. A value of any other type than unit is often a bug.

let _ = is slightly shorter (I've seen this argument). (Note that with an editor that automatically closes parenthesizes, the number of keystrokes is the same ;-)

与`C`不同，`Ocaml`中程序并没有一个唯一的`main`函数。执行`Ocaml`程序的时候，实现文件中的所有语句会暗器链接的顺序进行计算。这些实现文件可能包含任意的表达式，而不只是函数定义。在这个例子。在这个例子中，以`let()=`开头的声明就扮演着`main`函数的角色，启动了处理过程。不过，整个文件都会在启动时计算，所以在某种程度来看，整个代码基都是一个庞大的`main`函数。

写`let ()=`是一个惯用法，看上去有点奇怪，不过这样有一个目的。这里的`let`绑定是对一个`unit`类型值的模式匹配，它是为了确保右边的表达式返回`unit`，对于主要为得到副作用的函数来说，这个用法很常见。

`Ocaml`附带了两个编译器：`ocamlc`字节码编译器和`ocamlopt`原生代码编译器。用`ocamlc`编译的程序将由一个虚拟机解释，而用`ocamlopt`编译的程序会编译为在一个特定的操作系统和处理器体系结构上运行的原生机器代码。使用`ocamlbuild`便宜时，以`byte`结尾的目标会构建为字节码可执行程序，以`.native`结尾的目标将构建为原生代码。

除了性能以外，这两个编译器生成的可执行程序的行为基本上完全相同。有几点需要注意：首先，字节码编译器可以在更多体系上使用，而且字节码编译器的一些工具并不适用原生代码。例如，Ocaml调试工具只能处理字节码[不过，(gdb)确实可以处理Ocaml原生代码应用]。字节码编译器比原生代码编译器速度更快。另外，要运行一个字节码可执行程序，通过需要在当前系统上安装Ocaml。不过，这并不是一个严格的要求，因为通过`-custom`编译器标志，也可以用一个嵌入式运行时系统构建字节码可执行程序。

一般来说，产品可执行陈旭通常应当用原生艾玛编译器来构建，不过有时也可以使用字节码完成开发构建。当然，如果原生代码编译器不支持所面向的平台，字节码编译器就很有意义。

`Ocaml`中的源文件都关联到模块系统，每个文件编译为一个模块，模块名有文件名得出。之前已经见过模块了，如我们一斤使用过`List.Assoc`模块中的`find`和`add`等函数。考虑最简单的模块，可以认为模块就是存储在一个命名空间中的定义结合。

文件`counter.ml`将编译为一个名为`Counter`的模块，模块名由文件名自动得出。这个模块名首字母大写（尽管文件名的首字母并不大写）。实际上，模块名都为首字母大写。

In [22]:
let build_counts() = In_channel.fold_lines stdin ~init:[] ~f:Counter.touch

File "[22]", line 1, characters 43-48:
[since 2016-04] Use [In_channel.stdin]


error: compile_error

尽管我们把一些逻辑放在了`Counter`模块中，但`freq.ml`中的代码仍要依赖`Counter`实现的具体细节。实际上，如果查看`build_counts`的定义，你会看到它依赖与这样一个事实：空的评述集合表示为一个空列表。我们希望避免这种依赖性，从而可以改变`Counter`的实现而不需要改变客户代码。

模块的实现细节可以通过接口（`interface`）来影藏

In [23]:
open Core

In [24]:
var touch: (string * int) list -> string -> (string * int) list

error: compile_error

# 签名中的具体类型

假设我们希望为`Counter`增加一个函数，返回评述为中位数的行。如果行数为偶数，那么并没有准确的中位数，，这个函数要返回聘书在中位数前后的行。我们将使用一个定制类型来表示有两个可能返回值的情况。

In [25]:
type median = | Median of string
              | Before_and_after of string * string
var median: t -> median


error: compile_error

确定一个给定类型是抽象类型还是具体类型非常重要。抽象类型允许你对如何创建和访问值有更多的控制，除了类型本省世家的限制外，可以更容易的确保不定式：具体类型则允许以一种轻量级的方式为客户代码提供更多细节和结构。如何做出正确的选择，这很大程度上取决于所在的具体上下文。

在一个子模块中创建这样一个抽象类型如下所示：

In [26]:
module Username: sig
type t
val of_string : string -> t
val to_string : t -> string
end = struct
type t= string
let of_string x = x
let to_string x = x
end

module Username :
  sig type t val of_string : string -> t val to_string : t -> string end


模块声明的基本结构是

In [27]:
module <name> : <signature> = <implementation>

error: compile_error

还可以采用稍有不同的写法，为签名指定自己的顶层模块类型声明，从而能以一种轻量级的方式创建多个有相同底层实现的不同类型

In [37]:
module type ID = sig
type t
val of_string : string -> t
val to_string : t -> string
end

module String_id = struct
type t = string
let of_string x = x
let to_string x = x
end

module Username : ID = String_id
module Hostname : ID = String_id

type session_info = {
user: Username.t;
host: Hostname.t;
when_started: Time.t;
}
let sessions_have_same_user s1 s2 = 
s1.user = s2.host

module type ID =
  sig type t val of_string : string -> t val to_string : t -> string end


module String_id :
  sig type t = string val of_string : 'a -> 'a val to_string : 'a -> 'a end


module Username : ID


module Hostname : ID


type session_info = {
  user : Username.t;
  host : Hostname.t;
  when_started : Core.Time.t;
}


error: compile_error

In [None]:
前面的代码有一个`bug`，它将一个会话中的用户名与另一个会话中的主机名进行比较，实际上本来应该比较两个会话的用户名

In [31]:
module M = struct let fooo = 3 end

module M : sig val fooo : int end


In [38]:
fooo

- : int = 3


一般的，打开一个模块会把该模块中的所有内容增加到一个环境中，编译器要在这个环境中查找各个标识符的定义

In [34]:
open M

In [36]:
fooo

- : int = 3


如果确需要打开模块，最好局部打开。局部打开有两种语法。例如，可以写为：

In [39]:
let average x y = let open Int64 in
x + y / of_int 2;;

val average : Core.Int64.t -> Core.Int64.t -> Core.Int64.t = <fun>


在这里，`of_int`来自于`Int64`模块

还有一种更轻量级的语法来完成局部打开

In [40]:
let average x y = 
Int64.(x + y / of_int 2)

val average : Core.Int64.t -> Core.Int64.t -> Core.Int64.t = <fun>


局部`open`还有一种替代方法，可以让你的代码更简洁，而不会损失明确性。这种方法就是局部重新绑定一个模块名，所以，使用`Counter.median`的时候，可以不这样写。

In [41]:
let print_median m =
match m with 
| Counter.median string -> printf "True median:\n %s \n" string
| Counter.Before_and_after (before, after) ->
printf "Before and after median:\n %s \n %s\n" before after

error: compile_error

而是写为

In [42]:
let print_median m = 
let module C = Counter in
match m with
| C.median string -> printf...

error: compile_error

由于模块名C值2存在于一个很小的作用域中，所以很容易阅读并记住C代表什么。如果在模块的顶层将模块重新绑定到一个非常短的名字，这通常是错误的做法

打开一个模块会影响用来搜索标识符的环境，而包含一个模块则是要为模块增加新的标识符。考虑下面这个简单的模块，它提供一组整数值

In [47]:
module Interval  = struct
type t = | Interval of int * int
         | Empty

error: compile_error

In [46]:
let create low high = 
if high < low then Empty else Interval (low, high)

error: compile_error

可以使用`include`指令来创建一个扩展版本的新的`Interval`模块

In [48]:
module Extended_interval = struct
 include Interval
 
 let contains t x = 
 match t with 
 |Empty -> false
 | Interval (low, high) -> x >= low && x <= high
 end;;

error: compile_error

In [49]:
module Extended_interval:
sig
 type t = Interval.t  = Interval of int * int | Empty
 val create : int -> int -> t
 val contains : t -> int -> bool
 end

error: compile_error

在OCaml中， 每一段代码都被包成一个模块。一个模块可以选择性地作为另外一个模块的子模块，很像文件系统中的目录-但是我们不经常这样做。

当你写一个程序使用两个文件amodule.ml和bmodule.ml，它们中的每一个都自动定义一个模块，名字叫Amodule和Bmodule，模块的内容就是你写到文件中的东西。

这里是文件amodule.ml里面的代码：

let hello () = print_endline "Hello"

还有bmodule.ml里面的：

Amodule.hello ()

通常文件一个一个编译，让我们来编译：

ocamlopt -c amodule.ml
ocamlopt -c bmodule.ml
ocamlopt -o hello amodule.cmx bmodule.cmx

现在我们有一个很好的可执行文件用来打印 “Hello”。如你所见，如果你要访问一个给定模块的任何东西，你要用模块的名字（通常是大写字母开头）后面跟一个点号，然后是你要用的东西。可能是一个值，一个类型构造器，或者是给定模块能提供的任何东西。

不管怎样，用不用open是个人选择的问题。一些模块使用了很多很普遍的名字。List模块就是这样的例子。通常我们不用open List。像Printf的其他模块，提供通常不受冲突的名字，比如printf。为了避免到处写Printf.printf，在文件开头放一句open Printf是有道理的。

In [50]:
 open Printf
  let my_data = [ "a"; "beautiful"; "day" ]
  let () = List.iter (fun s -> printf "%s\n" s) my_data;;

val my_data : string list = ["a"; "beautiful"; "day"]


error: compile_error

We saw that one example.ml file results automatically in one module implementation named Example. Its module signature is automatically derived and is the broadest possible, or can be restricted by writing an example.mli file.

That said, a given module can also be defined explicitly from within a file. That makes it a submodule of the current module. Let's consider this example.ml file:
```
module Hello = struct
  let message = "Hello"
  let hello () = print_endline message
end
let goodbye () = print_endline "Goodbye"
let hello_goodbye () =
  Hello.hello ();
  goodbye ()
```
From another file, it is clear that we now have two levels of modules. We can write:
```
let () =
  Example.Hello.hello ();
  Example.goodbye ()
  ```

In [51]:
List.rev [1; 2; 3]

- : int Core.List.t = [3; 2; 1]


In [52]:
Array.append [| 1; 2 |] [| 3; 4; 5 |]

- : int Core.Array.t = [|1; 2; 3; 4; 5|]


In [53]:
[| 42; 51; 32 |].(2) 

- : int = 32


In [54]:
fst (42, "John")

- : int = 42


In [55]:
let xl = ref []

val xl : '_a list Core.ref = {contents = []}


In [57]:
for i = 1 to 10 do
  xl := i :: !xl;
done;
!xl

- : int list = [10; 9; 8; 7; 6; 5; 4; 3; 2; 1; 10; 9; 8; 7; 6; 5; 4; 3; 2; 1]


In [58]:
for i = 10 downto 1 do
 xl := i :: !xl
done;
!xl

- : int list =
[1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 10; 9; 8; 7; 6; 5; 4; 3; 2; 1; 10; 9; 8; 7;
 6; 5; 4; 3; 2; 1]


To test equality, you can use =, and for inequality, there is <>. For example:
```
1 = 1

1 <> 1

"1" = 1
```
Oops, OCaml does not allow you to compare values with different types, so we must be more careful:
```
"1" = string_of_int 1
```

In [59]:
let a = 1 and b = 2

val a : int = 1
val b : int = 2


In [63]:
let z = if a < 100 then begin
    print_string "return at least 100 !";
    print_newline ();
    100
 end else a

File "[63]", line 3, characters 4-17:
[since 2016-04] Use [Out_channel.newline stdout]


return at least 100 !


val z : int = 100


In [64]:
let x = ref 1

val x : int Core.ref = {contents = 1}


In [65]:
while !x > 20 do
  print_int !x; print_newline ();
  x := !x - 2
done

File "[65]", line 2, characters 2-11:
[since 2016-04] Use [Out_channel.output_string stdout]
File "[65]", line 2, characters 16-29:
[since 2016-04] Use [Out_channel.newline stdout]


- : unit = ()
