Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Monitor PRs on repository, and add branch information on Postgres #25

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
169 changes: 93 additions & 76 deletions pipeline.ml
Original file line number Diff line number Diff line change
Expand Up @@ -28,86 +28,110 @@ let dockerfile ~base =

let weekly = Current_cache.Schedule.v ~valid_for:(Duration.of_day 7) ()

let pipeline ~github ~repo ?slack_path ?docker_cpu ?docker_numa_node
~docker_shm_size ~commit ~conninfo () =
let head = Github.Api.head_commit github repo in
let src = Git.fetch (Current.map Github.Api.Commit.id head) in
let dockerfile =
let+ base = Docker.pull ~schedule:weekly "ocaml/opam2" in
`Contents (dockerfile ~base)
let process_pipeline ~docker_cpu ~docker_numa_node ~docker_shm_size ~slack_path ~conninfo
~name ~repo =
let+ refs =
Current.component "Get PRs"
|> let> api, repo = repo in
Github.Api.refs api repo
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use ci_refs ? It would make this code easier to read.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can;t get PR numbers or any branch information in ci_refs hence using refs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, you only get the commits with ci_refs.

in
let image = Docker.build ~pool ~pull:false ~dockerfile (`Git src) in
let docker_cpuset_cpus =
match docker_cpu with
| Some i -> [ "--cpuset-cpus"; string_of_int i ]
| None -> []
in
let docker_cpuset_mems =
match docker_numa_node with
| Some i -> [ "--cpuset-mems"; string_of_int i ]
| None -> []
in
let tmpfs =
match docker_numa_node with
| Some i ->
[
"--tmpfs";
Fmt.str "/dev/shm:rw,noexec,nosuid,size=%dG,mpol=bind:%d"
docker_shm_size i;
]
| None ->
[
"--tmpfs";
Fmt.str "/dev/shm:rw,noexec,nosuid,size=%dG" docker_shm_size;
]
in
let s =
let run_args =
[ "--security-opt"; "seccomp=./aslr_seccomp.json" ]
@ tmpfs
@ docker_cpuset_cpus
@ docker_cpuset_mems
in
let+ output =
Docker.pread ~run_args image
~args:
[
"/usr/bin/setarch";
"x86_64";
"--addr-no-randomize";
"_build/default/bench/bench.exe";
"-d";
"/dev/shm";
"--json";
]
in
let content =
Utils.merge_json repo.name commit (Yojson.Basic.from_string output)
in
let () = Utils.populate_postgres conninfo commit content in
match slack_path with Some p -> Some (p, content) | None -> None
in
s
|> Current.option_map (fun p ->
Current.component "post"
|> let** path, _ = p in
let channel = read_channel_uri path in
Slack.post channel ~key:"output" (Current.map snd p))
Github.Api.Ref_map.fold
(fun key head _ ->
match key with
| `Ref _ -> Current.return () (* Skip branches, only check PRs *)
| `PR num ->
let dockerfile =
let+ base = Docker.pull ~schedule:weekly "ocaml/opam2" in
`Contents (dockerfile ~base)
in
let docker_cpuset_cpus =
match docker_cpu with
| Some i -> [ "--cpuset-cpus"; string_of_int i ]
| None -> []
in
let docker_cpuset_mems =
match docker_numa_node with
| Some i -> [ "--cpuset-mems"; string_of_int i ]
| None -> []
in
let tmpfs =
match docker_numa_node with
| Some i ->
[
"--tmpfs";
Fmt.str "/dev/shm:rw,noexec,nosuid,size=%dG,mpol=bind:%d"
docker_shm_size i;
]
| None ->
[
"--tmpfs";
Fmt.str "/dev/shm:rw,noexec,nosuid,size=%dG" docker_shm_size;
]
in
let s =
let run_args =
[ "--security-opt"; "seccomp=./aslr_seccomp.json" ]
@ tmpfs
@ docker_cpuset_cpus
@ docker_cpuset_mems
in
let+ output =
let src =
Git.fetch
(Current.map Github.Api.Commit.id (Current.return head))
in
let image =
Docker.build ~pool ~pull:false ~dockerfile (`Git src)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know what is the default value for pool? As it is an optional argument and when you create a pool with a single job, which could be the default.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! pool is 1. We intend it to be always 1 so we are not contending for resources.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, can you add a comment about it in the code?

in
Docker.pread ~run_args image
~args:
[
"/usr/bin/setarch";
"x86_64";
"--addr-no-randomize";
"_build/default/bench/bench.exe";
"--nb-entries";
"1000";
"-d";
"/dev/shm";
"--json";
]
in
let commit = Github.Api.Commit.hash head in
let content =
Utils.merge_json name commit (Yojson.Basic.from_string output)
in
let () = Utils.populate_postgres conninfo commit content num in
match slack_path with Some p -> Some (p, content) | None -> None
in
s
|> Current.option_map (fun p ->
Current.component "post"
|> let** path, _ = p in
let channel = read_channel_uri path in
Slack.post channel ~key:"output" (Current.map snd p))
|> Current.ignore_value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can get rid of this line if you replace https://github.com/gs0510/index-benchmarks/pull/25/files#diff-bb69bf8bfba5fa2eb0e602d7891536ecR41 with Current.return None

refs

let pipeline ~github ~(repo : Github.Repo_id.t) ?slack_path ?docker_cpu
?docker_numa_node ~docker_shm_size ~conninfo () =
let name = repo.name in
let repo = Current.return (github, repo) in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a bit weird that you construct a term Current.t from a pair which then you deconstruct in https://github.com/gs0510/index-benchmarks/pull/25/files#diff-bb69bf8bfba5fa2eb0e602d7891536ecR33.
Why not pass both arguments to the function process_pipeline ?
I don't understand current well enough, maybe you need to construct this pair to ensure re-evaluation?

process_pipeline ~docker_cpu ~docker_numa_node ~docker_shm_size ~conninfo ~name ~repo
~slack_path
Comment on lines +120 to +121
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
process_pipeline ~docker_cpu ~docker_numa_node ~docker_shm_size ~conninfo ~name ~repo
~slack_path
process_pipeline ?docker_cpu ?docker_numa_node ~docker_shm_size ~conninfo ~name ~repo
?slack_path

also it's good practice to keep the same order of arguments when calling internal functions

|> Current.ignore_value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can also remove this line, by ensuring that process_pipeline return unit Current.t. I think it should be feasible as Slack.post returns unit Current.t and that's the last function you call.


let webhooks = [ ("github", Github.webhook) ]

type token = { token_file : string; token_api_file : Github.Api.t }

let main config mode github_token (repo : Current_github.Repo_id.t) slack_path
docker_cpu docker_numa_node docker_shm_size conninfo user () =
let token = Utils.read_file github_token.token_file in
let commit = Utils.get_commit repo.name repo.owner user token in
let main config mode github_token (repo : Github.Repo_id.t) slack_path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason to have type annotations for repo?

docker_cpu docker_numa_node docker_shm_size conninfo () =
let github = github_token.token_api_file in
let engine =
Current.Engine.create ~config
(pipeline ~github ~repo ?slack_path ?docker_cpu ?docker_numa_node
~docker_shm_size ~commit ~conninfo)
~docker_shm_size ~conninfo)
in
let routes =
Routes.((s "webhooks" / s "github" /? nil) @--> Github.webhook)
Expand Down Expand Up @@ -152,12 +176,6 @@ let repo =
@@ Arg.info ~doc:"The GitHub repository (owner/name) to monitor." ~docv:"REPO"
[]

let user =
Arg.required
@@ Arg.opt Arg.(some string) None
@@ Arg.info ~doc:"User for which the OAuth token is given." ~docv:"USER"
[ "oauth-user" ]

let setup_log =
let init style_renderer level = Logging.init ?style_renderer ?level () in
Term.(const init $ Fmt_cli.style_renderer () $ Logs_cli.level ())
Expand Down Expand Up @@ -196,7 +214,6 @@ let cmd =
$ docker_numa_node
$ docker_shm_size
$ conninfo
$ user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you removed this? Does it mean that a GitHub token is no longer needed to run the pipeline?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A note just to remember to update the readme https://github.com/gs0510/index-benchmarks/blob/main/README.md#running-the-pipeline if --oauth-user is no longer needed.

$ setup_log),
Term.info "github" ~doc )

Expand Down
6 changes: 4 additions & 2 deletions postgres/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ GRANT ALL PRIVILEGES ON DATABASE docker TO docker;
CREATE TABLE benchmarks(
repositories varchar(256),
commits varchar(50) NOT NULL UNIQUE,
json_data jsonb
json_data jsonb,
branch varchar(256)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
branch varchar(256)
branch varchar(256)

);

CREATE TABLE benchmarksrun (
Expand All @@ -12,7 +13,8 @@ CREATE TABLE benchmarksrun (
time float8,
ops_per_sec float8,
mbs_per_sec float8,
timestamp float8
timestamp float8,
branch varchar(256)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need branch in both tables. The commit can be used to retrieve which branch is used from the benchmarks table.

);


22 changes: 15 additions & 7 deletions utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ let read_file path =

open Yojson.Basic.Util

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of adding utils.mli? You can keep better track of unused functions and also add some documentation.

let format_benchmark_data commit name time mbs_per_sec ops_per_sec timestamp =
let format_benchmark_data commit name time mbs_per_sec ops_per_sec timestamp pr
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you rename name into bench_name ? I confused it with the repo name, for which sometimes you use the variable name.

=
"('"
^ commit
^ "', '"
Expand All @@ -52,11 +53,13 @@ let format_benchmark_data commit name time mbs_per_sec ops_per_sec timestamp =
^ ops_per_sec
^ ","
^ timestamp
^ ","
^ pr
^ ") "

let get_repo json = Yojson.Basic.from_string json |> member "repo" |> to_string

let get_data_from_json commit json =
let get_data_from_json commit json pr =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a bit confusing to have the function get_data_from_json add the pr string, as it is not extracted from the json. I would add pr to the result separately.

let bench_objects =
Yojson.Basic.from_string json
|> member "result"
Expand All @@ -74,32 +77,37 @@ let get_data_from_json commit json =
(metrics |> member "time" |> to_float |> string_of_float)
(metrics |> member "mbs_per_sec" |> to_float |> string_of_float)
(metrics |> member "ops_per_sec" |> to_float |> string_of_float))
(string_of_float (Unix.time ())))
(string_of_float (Unix.time ()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(string_of_float (Unix.time ()))
Unix.time () |> string_of_float

so that it's similar to the lines above

pr)
bench_objects bench_names
in
String.concat "," result_string

open! Postgresql

let populate_postgres conninfo commit json_string =
let populate_postgres conninfo commit json_string num =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use pr or pr_number instead of num

try
let repository = get_repo json_string in
let c = new connection ~conninfo () in
let pr = Printf.sprintf "%s/%d" repository num in
let _ =
c#exec ~expect:[ Command_ok ]
( "insert into benchmarks(repositories, commits, json_data) values ( '"
( "insert into benchmarks(repositories, commits, json_data, branch) \
values ( '"
^ repository
^ "', '"
^ commit
^ "', '"
^ json_string
^ "', '"
^ pr
^ "' )" )
in
let data_to_insert = get_data_from_json commit json_string in
let data_to_insert = get_data_from_json commit json_string pr in
let _ =
c#exec ~expect:[ Command_ok ]
( "insert into benchmarksrun(commits, name, time, mbs_per_sec, \
ops_per_sec, timestamp) values "
ops_per_sec, timestamp, branch) values "
^ data_to_insert )
in
c#finish
Expand Down
7 changes: 7 additions & 0 deletions utils.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
val read_file : string -> string

val read_fpath : Fpath.t -> string

val merge_json : string -> string -> Yojson.Basic.t -> string

val populate_postgres : string -> string -> string -> int -> unit