From 2960410da622670c1124065fb5abbb6c37585095 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Wed, 7 May 2025 09:12:25 -0700 Subject: [PATCH 01/51] Experimenting with integrating patterns from git-lfs --- cmd/add/main.go | 28 ++++++++ cmd/filterprocess/main.go | 59 +++++++++++++++++ cmd/list/main.go | 4 +- cmd/root.go | 14 ++-- cmd/track/main.go | 21 ++++++ docs/README-git-plugin-dev.md | 25 +++++++ drs/README.md | 119 ++++++++++++++++++++++++++++++++++ drs/object.go | 44 +++++++++++++ drs/util.go | 53 +++++++++++++++ git-gen3.go => git-drs.go | 6 +- go.mod | 29 ++++++++- go.sum | 84 ++++++++++++++++++++++++ utils/common.go | 5 ++ utils/util.go | 34 ++++++++++ 14 files changed, 514 insertions(+), 11 deletions(-) create mode 100644 cmd/add/main.go create mode 100644 cmd/filterprocess/main.go create mode 100644 cmd/track/main.go create mode 100644 docs/README-git-plugin-dev.md create mode 100644 drs/README.md create mode 100644 drs/object.go create mode 100644 drs/util.go rename git-gen3.go => git-drs.go (59%) create mode 100644 utils/common.go create mode 100644 utils/util.go diff --git a/cmd/add/main.go b/cmd/add/main.go new file mode 100644 index 0000000..3ea7201 --- /dev/null +++ b/cmd/add/main.go @@ -0,0 +1,28 @@ +package add + +import ( + "fmt" + "path/filepath" + + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "add", + Short: "Add a file", + Long: ``, + Args: cobra.MinimumNArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + for _, fileArg := range args { + matches, err := filepath.Glob(fileArg) + if err == nil { + for _, f := range matches { + + fmt.Printf("Adding %s\n", f) + } + } + } + return nil + }, +} diff --git a/cmd/filterprocess/main.go b/cmd/filterprocess/main.go new file mode 100644 index 0000000..70fda3b --- /dev/null +++ b/cmd/filterprocess/main.go @@ -0,0 +1,59 @@ +package filterprocess + +import ( + "fmt" + "io" + "log" + "os" + + "github.com/git-lfs/git-lfs/v3/git" + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "filter-process", + Short: "filter proces", + Long: ``, + Args: cobra.MinimumNArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + s := git.NewFilterProcessScanner(os.Stdin, os.Stdout) + err := s.Init() + if err != nil { + return err + } + + caps, err := s.NegotiateCapabilities() + if err != nil { + return err + } + log.Printf("Caps: %#v\n", caps) + log.Printf("Running filter-process: %s\n", args) + + for s.Scan() { + req := s.Request() + switch req.Header["command"] { + case "clean": + log.Printf("Request to clean %#v %s\n", req.Payload, req.Header["pathname"]) + + clean(os.Stdout, req.Payload, req.Header["pathname"], -1) + + case "smudge": + log.Printf("Request to smudge %s %s\n", req.Payload, req.Header["pathname"]) + case "list_available_blobs": + log.Printf("Request for list_available_blobs\n") + + default: + return fmt.Errorf("don't know what to do: %s", req.Header["command"]) + } + log.Printf("Request: %#v\n", req) + } + + return nil + }, +} + +func clean(to io.Writer, from io.Reader, fileName string, fileSize int64) error { + + return nil +} diff --git a/cmd/list/main.go b/cmd/list/main.go index f6deccb..27f36ae 100644 --- a/cmd/list/main.go +++ b/cmd/list/main.go @@ -5,7 +5,7 @@ import ( "os" "path/filepath" - "github.com/bmeg/git-gen3/git" + "github.com/bmeg/git-drs/utils" "github.com/spf13/cobra" ) @@ -17,7 +17,7 @@ var Cmd = &cobra.Command{ Long: ``, Args: cobra.MinimumNArgs(0), RunE: func(cmd *cobra.Command, args []string) error { - gitTop, err := git.GitTopLevel() + gitTop, err := utils.GitTopLevel() if err != nil { fmt.Printf("Error: %s\n", err) return err diff --git a/cmd/root.go b/cmd/root.go index 9afb30e..7c87057 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,16 +3,18 @@ package cmd import ( "os" - "github.com/bmeg/git-gen3/cmd/initialize" - "github.com/bmeg/git-gen3/cmd/list" - "github.com/bmeg/git-gen3/cmd/pull" - "github.com/bmeg/git-gen3/cmd/push" + "github.com/bmeg/git-drs/cmd/add" + "github.com/bmeg/git-drs/cmd/filterprocess" + "github.com/bmeg/git-drs/cmd/initialize" + "github.com/bmeg/git-drs/cmd/list" + "github.com/bmeg/git-drs/cmd/pull" + "github.com/bmeg/git-drs/cmd/push" "github.com/spf13/cobra" ) // RootCmd represents the root command var RootCmd = &cobra.Command{ - Use: "git-gen3", + Use: "git-drs", SilenceErrors: true, SilenceUsage: true, PersistentPreRun: func(cmd *cobra.Command, args []string) { @@ -25,6 +27,8 @@ func init() { RootCmd.AddCommand(push.Cmd) RootCmd.AddCommand(pull.Cmd) RootCmd.AddCommand(list.Cmd) + RootCmd.AddCommand(add.Cmd) + RootCmd.AddCommand(filterprocess.Cmd) RootCmd.AddCommand(genBashCompletionCmd) } diff --git a/cmd/track/main.go b/cmd/track/main.go new file mode 100644 index 0000000..31d1964 --- /dev/null +++ b/cmd/track/main.go @@ -0,0 +1,21 @@ +package track + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "track", + Short: "Set a file track filter", + Long: ``, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + for i := range args { + fmt.Printf("Track %s\n", args[i]) + } + return nil + }, +} diff --git a/docs/README-git-plugin-dev.md b/docs/README-git-plugin-dev.md new file mode 100644 index 0000000..5ab7885 --- /dev/null +++ b/docs/README-git-plugin-dev.md @@ -0,0 +1,25 @@ + +# Notes about the development of git plugins + + +To attach the plugin into the configutation. In the global config `~/.gitconfig` add the lines: +``` +[filter "drs"] + clean = git-drs clean -- %f + smudge = git-drs smudge -- %f + process = git-drs filter-process + required = true +``` + +Then to add tracking in a project, add entries to `.gitattributes` in the working directory. Example: +``` +*.tsv filter=drs diff=drs merge=drs -text +``` + +For when `git status` or `git diff` are invoked on `*.tsv` file, the process `git-drs filter-process` will be +invoked. The communication between git and the subprocess is outlined in (gitprotocol-common)[https://git-scm.com/docs/gitprotocol-common]. A library for parsing this event stream is part of the git-lfs code base https://github.com/git-lfs/git-lfs/blob/main/git/filter_process_scanner.go +An example of responding to these requests can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_filter_process.go + +My understanding: The main set of command the the filter-process command responds to are `clean` and `smudge`. +The `clean` process cleans an input document before running diff, things like run auto formatting before committing. This is where the change from the file to the remote data pointer could take place. An example of the +clean process can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_clean.go#L27 \ No newline at end of file diff --git a/drs/README.md b/drs/README.md new file mode 100644 index 0000000..f18d028 --- /dev/null +++ b/drs/README.md @@ -0,0 +1,119 @@ + + +DRS OpenAPI definition + +```yaml +type: object +required: + - id + - self_uri + - size + - created_time + - checksums +properties: + id: + type: string + description: An identifier unique to this `DrsObject` + name: + type: string + description: |- + A string that can be used to name a `DrsObject`. + This string is made up of uppercase and lowercase letters, decimal digits, hyphen, period, and underscore [A-Za-z0-9.-_]. See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_282[portable filenames]. + self_uri: + type: string + description: |- + A drs:// hostname-based URI, as defined in the DRS documentation, that tells clients how to access this object. + The intent of this field is to make DRS objects self-contained, and therefore easier for clients to store and pass around. For example, if you arrive at this DRS JSON by resolving a compact identifier-based DRS URI, the `self_uri` presents you with a hostname and properly encoded DRS ID for use in subsequent `access` endpoint calls. + example: + drs://drs.example.org/314159 + size: + type: integer + format: int64 + description: |- + For blobs, the blob size in bytes. + For bundles, the cumulative size, in bytes, of items in the `contents` field. + created_time: + type: string + format: date-time + description: |- + Timestamp of content creation in RFC3339. + (This is the creation time of the underlying content, not of the JSON object.) + updated_time: + type: string + format: date-time + description: >- + Timestamp of content update in RFC3339, identical to `created_time` in systems + that do not support updates. + (This is the update time of the underlying content, not of the JSON object.) + version: + type: string + description: >- + A string representing a version. + + (Some systems may use checksum, a RFC3339 timestamp, or an incrementing version number.) + mime_type: + type: string + description: A string providing the mime-type of the `DrsObject`. + example: + application/json + checksums: + type: array + minItems: 1 + items: + $ref: './Checksum.yaml' + description: >- + The checksum of the `DrsObject`. At least one checksum must be provided. + + For blobs, the checksum is computed over the bytes in the blob. + + For bundles, the checksum is computed over a sorted concatenation of the + checksums of its top-level contained objects (not recursive, names not included). + The list of checksums is sorted alphabetically (hex-code) before concatenation + and a further checksum is performed on the concatenated checksum value. + + For example, if a bundle contains blobs with the following checksums: + + md5(blob1) = 72794b6d + + md5(blob2) = 5e089d29 + + Then the checksum of the bundle is: + + md5( concat( sort( md5(blob1), md5(blob2) ) ) ) + + = md5( concat( sort( 72794b6d, 5e089d29 ) ) ) + + = md5( concat( 5e089d29, 72794b6d ) ) + + = md5( 5e089d2972794b6d ) + + = f7a29a04 + access_methods: + type: array + minItems: 1 + items: + $ref: './AccessMethod.yaml' + description: |- + The list of access methods that can be used to fetch the `DrsObject`. + Required for single blobs; optional for bundles. + contents: + type: array + description: >- + If not set, this `DrsObject` is a single blob. + + If set, this `DrsObject` is a bundle containing the listed `ContentsObject` s (some of which may be further nested). + items: + $ref: './ContentsObject.yaml' + description: + type: string + description: A human readable description of the `DrsObject`. + aliases: + type: array + items: + type: string + description: >- + A list of strings that can be used to find other metadata + about this `DrsObject` from external metadata sources. These + aliases can be used to represent secondary + accession numbers or external GUIDs. +``` \ No newline at end of file diff --git a/drs/object.go b/drs/object.go new file mode 100644 index 0000000..56072cc --- /dev/null +++ b/drs/object.go @@ -0,0 +1,44 @@ +package drs + +type Checksum struct { + Checksum string `json:"checksum"` + Type string `json:"type"` +} + +type AccessURL struct { + URL string `json:"url"` + Headers []string `json:"headers"` +} + +type Authorizations struct { + //This structue is not stored in the file system +} + +type AccessMethod struct { + Type string `json:"type"` + AccessURL AccessURL `json:"access_url"` + AccessID string `json:"access_id"` + Cloud string `json:"cloud"` + Region string `json:"region"` + Avalible string `json:"available"` + Authorizations Authorizations `json:"Authorizations"` +} + +type Contents struct { +} + +type DRSObject struct { + Id string `json:"id"` + Name string `json:"name"` + SelfURL string `json:"self_url"` + Size int64 `json:"size"` + CreatedTime string `json:"created_time"` + UpdatedTime string `json:"updated_time"` + Version string `json:"version"` + MimeType string `json:"mime_type"` + Checksums []Checksum `json:"checksum"` + AccessMethods []AccessMethod `json:"access_methods"` + Contents []Contents `json:"contents"` + Description string `json:"description"` + Aliases []string `json:"aliases"` +} diff --git a/drs/util.go b/drs/util.go new file mode 100644 index 0000000..8bde784 --- /dev/null +++ b/drs/util.go @@ -0,0 +1,53 @@ +package drs + +import ( + "encoding/json" + "io/fs" + "os" + "path/filepath" + + "github.com/bmeg/git-drs/utils" +) + +const DRS_DIR = ".drs" + +type DrsWalkFunc func(path string, d *DRSObject) error + +func BaseDir() (string, error) { + gitTopLevel, err := utils.GitTopLevel() + if err != nil { + return "", err + } + return filepath.Join(gitTopLevel, DRS_DIR), nil +} + +type dirWalker struct { + baseDir string + userFunc DrsWalkFunc +} + +func (d *dirWalker) call(path string, dir fs.DirEntry, cErr error) error { + data, err := os.ReadFile(path) + if err != nil { + return nil + } + obj := DRSObject{} + err = json.Unmarshal(data, &obj) + if err != nil { + return err + } + relPath, err := filepath.Rel(d.baseDir, path) + if err != nil { + return err + } + return d.userFunc(relPath, &obj) +} + +func ObjectWalk(f DrsWalkFunc) error { + baseDir, err := BaseDir() + if err != nil { + return err + } + ud := dirWalker{baseDir, f} + return filepath.WalkDir(baseDir, ud.call) +} diff --git a/git-gen3.go b/git-drs.go similarity index 59% rename from git-gen3.go rename to git-drs.go index a6b9838..237cfd1 100644 --- a/git-gen3.go +++ b/git-drs.go @@ -1,15 +1,15 @@ package main import ( - "fmt" + "log" "os" - "github.com/bmeg/git-gen3/cmd" + "github.com/bmeg/git-drs/cmd" ) func main() { if err := cmd.RootCmd.Execute(); err != nil { - fmt.Println("Error:", err.Error()) + log.Println("Error:", err.Error()) os.Exit(1) } } diff --git a/go.mod b/go.mod index e9c6287..4e11a3f 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,36 @@ -module github.com/bmeg/git-gen3 +module github.com/bmeg/git-drs go 1.24.0 require ( + github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 // indirect + github.com/avast/retry-go v2.4.2+incompatible // indirect + github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 // indirect + github.com/git-lfs/git-lfs/v3 v3.6.1 // indirect + github.com/git-lfs/gitobj/v2 v2.1.1 // indirect + github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a // indirect + github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect + github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect + github.com/hashicorp/go-uuid v1.0.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jcmturner/aescts/v2 v2.0.0 // indirect + github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect + github.com/jcmturner/gofork v1.0.0 // indirect + github.com/jcmturner/goidentity/v6 v6.0.1 // indirect + github.com/jcmturner/gokrb5/v8 v8.4.2 // indirect + github.com/jcmturner/rpc/v2 v2.0.3 // indirect + github.com/jmhodges/clock v1.2.0 // indirect + github.com/leonelquinteros/gotext v1.5.0 // indirect + github.com/mattn/go-isatty v0.0.4 // indirect + github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 // indirect + github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 // indirect + github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 // indirect github.com/spf13/cobra v1.9.1 // indirect github.com/spf13/pflag v1.0.6 // indirect + github.com/ssgelm/cookiejarparser v1.0.1 // indirect + golang.org/x/crypto v0.21.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/sync v0.1.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect ) diff --git a/go.sum b/go.sum index ffae55e..f79b8f3 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,94 @@ +github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 h1:Kk6a4nehpJ3UuJRqlA3JxYxBZEqCeOmATOvrbT4p9RA= +github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= +github.com/avast/retry-go v2.4.2+incompatible h1:+ZjCypQT/CyP0kyJO2EcU4d/ZEJWSbP8NENI578cPmA= +github.com/avast/retry-go v2.4.2+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 h1:oempk9HjNt6rVKyKmpdnoN7XABQv3SXLWu3pxUI7Vlk= +github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430/go.mod h1:AVSs/gZKt1bOd2AhkhbS7Qh56Hv7klde22yXVbwYJhc= +github.com/git-lfs/git-lfs/v3 v3.6.1 h1:0RA2HzkMVl69KE5zCGY1PxqkDSbd/f/O7Du6CNkTYtY= +github.com/git-lfs/git-lfs/v3 v3.6.1/go.mod h1:1YO3nafGw2wKBR5LTZ7/LXJ7U7ELdvIGvcCBrLt6mfM= +github.com/git-lfs/gitobj/v2 v2.1.1 h1:tf/VU6zL1kxa3he+nf6FO/syX+LGkm6WGDsMpfuXV7Q= +github.com/git-lfs/gitobj/v2 v2.1.1/go.mod h1:q6aqxl6Uu3gWsip5GEKpw+7459F97er8COmU45ncAxw= +github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a h1:6pskVZacdMUL93pCpMAYnMDLjH1yDFhssPYGe32sjdk= +github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a/go.mod h1:70O4NAtvWn1jW8V8V+OKrJJYcxDLTmIozfi2fmSz5SI= +github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d5raz9t3+IzoR1i1wqxE1kZC6dY+U= +github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= +github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= +github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= +github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= +github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= +github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.2 h1:6ZIM6b/JJN0X8UM43ZOM6Z4SJzla+a/u7scXFJzodkA= +github.com/jcmturner/gokrb5/v8 v8.4.2/go.mod h1:sb+Xq/fTY5yktf/VxLsE3wlfPqQjp0aWNYyvBVK62bc= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= +github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= +github.com/leonelquinteros/gotext v1.5.0 h1:ODY7LzLpZWWSJdAHnzhreOr6cwLXTAmc914FOauSkBM= +github.com/leonelquinteros/gotext v1.5.0/go.mod h1:OCiUVHuhP9LGFBQ1oAmdtNCHJCiHiQA8lf4nAifHkr0= +github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= +github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2bxbqI6DXV8bF8POAnArqvRrIyw= +github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= +github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 h1:chPfVn+gpAM5CTpTyVU9j8J+xgRGwmoDlNDLjKnJiYo= +github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= +github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/ssgelm/cookiejarparser v1.0.1 h1:cRdXauUbOTFzTPJFaeiWbHnQ+tRGlpKKzvIK9PUekE4= +github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9VrZuz3wSlI+OEI= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191027093000-83d349e8ac1a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/utils/common.go b/utils/common.go new file mode 100644 index 0000000..cd016f2 --- /dev/null +++ b/utils/common.go @@ -0,0 +1,5 @@ +package utils + +const ( + DRS_DIR = ".drs" +) diff --git a/utils/util.go b/utils/util.go new file mode 100644 index 0000000..3f6d71a --- /dev/null +++ b/utils/util.go @@ -0,0 +1,34 @@ +package utils + +import ( + "bytes" + "os/exec" + "path/filepath" + "strings" +) + +func GitTopLevel() (string, error) { + path, err := SimpleRun([]string{"git", "rev-parse", "--show-toplevel"}) + path = strings.TrimSuffix(path, "\n") + return path, err +} + +func SimpleRun(cmds []string) (string, error) { + exePath, err := exec.LookPath(cmds[0]) + if err != nil { + return "", err + } + buf := &bytes.Buffer{} + cmd := exec.Command(exePath, cmds[1:]...) + cmd.Stdout = buf + err = cmd.Run() + return buf.String(), err +} + +func DrsTopLevel() (string, error) { + base, err := GitTopLevel() + if err != nil { + return "", err + } + return filepath.Join(base, DRS_DIR), nil +} From 6e251ce98d59048d00210e218afced244ebe4667 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Thu, 8 May 2025 16:41:32 -0700 Subject: [PATCH 02/51] Adding DRS query test to code --- cmd/query/main.go | 37 ++++++++++++++++++++++++++++++ cmd/root.go | 2 ++ docs/README-git-plugin-dev.md | 4 ++-- drs/client.go | 43 +++++++++++++++++++++++++++++++++++ drs/object.go | 32 +++++++++++++------------- 5 files changed, 100 insertions(+), 18 deletions(-) create mode 100644 cmd/query/main.go create mode 100644 drs/client.go diff --git a/cmd/query/main.go b/cmd/query/main.go new file mode 100644 index 0000000..189a29d --- /dev/null +++ b/cmd/query/main.go @@ -0,0 +1,37 @@ +package query + +import ( + "encoding/json" + "fmt" + + "github.com/bmeg/git-drs/drs" + "github.com/spf13/cobra" +) + +var server string = "https://calypr.ohsu.edu/ga4gh" + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "query", + Short: "Query server for DRS ID", + Long: ``, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + + client, err := drs.NewClient(server) + if err != nil { + return err + } + + obj, err := client.GetObject(args[0]) + if err != nil { + return err + } + out, err := json.MarshalIndent(*obj, "", " ") + if err != nil { + return err + } + fmt.Printf("%s\n", string(out)) + return nil + }, +} diff --git a/cmd/root.go b/cmd/root.go index 7c87057..e0b2b56 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -9,6 +9,7 @@ import ( "github.com/bmeg/git-drs/cmd/list" "github.com/bmeg/git-drs/cmd/pull" "github.com/bmeg/git-drs/cmd/push" + "github.com/bmeg/git-drs/cmd/query" "github.com/spf13/cobra" ) @@ -29,6 +30,7 @@ func init() { RootCmd.AddCommand(list.Cmd) RootCmd.AddCommand(add.Cmd) RootCmd.AddCommand(filterprocess.Cmd) + RootCmd.AddCommand(query.Cmd) RootCmd.AddCommand(genBashCompletionCmd) } diff --git a/docs/README-git-plugin-dev.md b/docs/README-git-plugin-dev.md index 5ab7885..f33fa15 100644 --- a/docs/README-git-plugin-dev.md +++ b/docs/README-git-plugin-dev.md @@ -17,9 +17,9 @@ Then to add tracking in a project, add entries to `.gitattributes` in the workin ``` For when `git status` or `git diff` are invoked on `*.tsv` file, the process `git-drs filter-process` will be -invoked. The communication between git and the subprocess is outlined in (gitprotocol-common)[https://git-scm.com/docs/gitprotocol-common]. A library for parsing this event stream is part of the git-lfs code base https://github.com/git-lfs/git-lfs/blob/main/git/filter_process_scanner.go +invoked. The communication between git and the subprocess is outlined in [gitprotocol-common](https://git-scm.com/docs/gitprotocol-common). A library for parsing this event stream is part of the git-lfs code base https://github.com/git-lfs/git-lfs/blob/main/git/filter_process_scanner.go An example of responding to these requests can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_filter_process.go My understanding: The main set of command the the filter-process command responds to are `clean` and `smudge`. The `clean` process cleans an input document before running diff, things like run auto formatting before committing. This is where the change from the file to the remote data pointer could take place. An example of the -clean process can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_clean.go#L27 \ No newline at end of file +clean process can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_clean.go#L27 diff --git a/drs/client.go b/drs/client.go new file mode 100644 index 0000000..0b7bac7 --- /dev/null +++ b/drs/client.go @@ -0,0 +1,43 @@ +package drs + +import ( + "encoding/json" + "io" + "net/http" + "net/url" + "path/filepath" +) + +type Client struct { + base *url.URL +} + +func NewClient(base string) (*Client, error) { + baseURL, err := url.Parse(base) + return &Client{baseURL}, err +} + +func (cl *Client) GetObject(id string) (*DRSObject, error) { + + a := *cl.base + a.Path = filepath.Join(a.Path, "drs/v1/objects", id) + + response, err := http.Get(a.String()) + if err != nil { + return nil, err + } + + body, err := io.ReadAll(response.Body) + if err != nil { + return nil, err + } + //log.Printf("Getting URL %s\n", a.String()) + //fmt.Printf("%s\n", string(body)) + + out := DRSObject{} + err = json.Unmarshal(body, &out) + if err != nil { + return nil, err + } + return &out, nil +} diff --git a/drs/object.go b/drs/object.go index 56072cc..2199e3c 100644 --- a/drs/object.go +++ b/drs/object.go @@ -15,13 +15,13 @@ type Authorizations struct { } type AccessMethod struct { - Type string `json:"type"` - AccessURL AccessURL `json:"access_url"` - AccessID string `json:"access_id"` - Cloud string `json:"cloud"` - Region string `json:"region"` - Avalible string `json:"available"` - Authorizations Authorizations `json:"Authorizations"` + Type string `json:"type"` + AccessURL AccessURL `json:"access_url"` + AccessID string `json:"access_id,omitempty"` + Cloud string `json:"cloud,omitempty"` + Region string `json:"region,omitempty"` + Avalible string `json:"available,omitempty"` + Authorizations *Authorizations `json:"Authorizations,omitempty"` } type Contents struct { @@ -30,15 +30,15 @@ type Contents struct { type DRSObject struct { Id string `json:"id"` Name string `json:"name"` - SelfURL string `json:"self_url"` + SelfURL string `json:"self_url,omitempty"` Size int64 `json:"size"` - CreatedTime string `json:"created_time"` - UpdatedTime string `json:"updated_time"` - Version string `json:"version"` - MimeType string `json:"mime_type"` - Checksums []Checksum `json:"checksum"` + CreatedTime string `json:"created_time,omitempty"` + UpdatedTime string `json:"updated_time,omitempty"` + Version string `json:"version,omitempty"` + MimeType string `json:"mime_type,omitempty"` + Checksums []Checksum `json:"checksums"` AccessMethods []AccessMethod `json:"access_methods"` - Contents []Contents `json:"contents"` - Description string `json:"description"` - Aliases []string `json:"aliases"` + Contents []Contents `json:"contents,omitempty"` + Description string `json:"description,omitempty"` + Aliases []string `json:"aliases,omitempty"` } From ab2437fadbe421f2726218fad46202be83956cb7 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Mon, 12 May 2025 14:22:06 -0700 Subject: [PATCH 03/51] Starting to outline the DRS/indexd client support --- client/config.go | 58 ++++++++++++++++++++++++++++++++++++++++++++ client/indexd.go | 55 +++++++++++++++++++++++++++++++++++++++++ client/interface.go | 14 +++++++++++ cmd/add/main.go | 1 - cmd/download/main.go | 37 ++++++++++++++++++++++++++++ cmd/query/main.go | 16 ++++++++---- cmd/register/main.go | 34 ++++++++++++++++++++++++++ cmd/root.go | 4 +++ drs/client.go | 43 -------------------------------- go.mod | 25 +++++-------------- go.sum | 26 +++++++------------- 11 files changed, 228 insertions(+), 85 deletions(-) create mode 100644 client/config.go create mode 100644 client/indexd.go create mode 100644 client/interface.go create mode 100644 cmd/download/main.go create mode 100644 cmd/register/main.go delete mode 100644 drs/client.go diff --git a/client/config.go b/client/config.go new file mode 100644 index 0000000..08be1e4 --- /dev/null +++ b/client/config.go @@ -0,0 +1,58 @@ +package client + +import ( + "io" + "log" + "os" + "path/filepath" + + "github.com/bmeg/git-drs/utils" + "sigs.k8s.io/yaml" +) + +type Server struct { + BaseURL string `json:"baseURL"` + ExtensionType string `json:"type,omitempty"` +} + +type Config struct { + QueryServer Server `json:"queryServer"` + WriteServer Server `json:"writeServer"` +} + +const ( + DRS_CONFIG = ".drsconfig" +) + +func LoadConfig() (*Config, error) { + //look in Git base dir and find .drsconfig file + + topLevel, err := utils.GitTopLevel() + + if err != nil { + return nil, err + } + + configPath := filepath.Join(topLevel, DRS_CONFIG) + + log.Printf("Looking for %s", configPath) + //check if config exists + reader, err := os.Open(configPath) + if err != nil { + return nil, err + } + + b, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + + conf := Config{} + err = yaml.Unmarshal(b, &conf) + if err != nil { + return nil, err + } + + log.Printf("Config: %s %#v", string(b), conf) + return &conf, nil +} diff --git a/client/indexd.go b/client/indexd.go new file mode 100644 index 0000000..d0e90a2 --- /dev/null +++ b/client/indexd.go @@ -0,0 +1,55 @@ +package client + +import ( + "encoding/json" + "io" + "net/http" + "net/url" + "path/filepath" + + "github.com/bmeg/git-drs/drs" +) + +type IndexDClient struct { + base *url.URL +} + +func NewIndexDClient(base string) (ObjectStoreClient, error) { + baseURL, err := url.Parse(base) + return &IndexDClient{baseURL}, err +} + +// DownloadFile implements ObjectStoreClient. +func (cl *IndexDClient) DownloadFile(id string, dstPath string) (*drs.DRSObject, error) { + panic("unimplemented") +} + +// RegisterFile implements ObjectStoreClient. +func (cl *IndexDClient) RegisterFile(path string, name string) (*drs.DRSObject, error) { + panic("unimplemented") +} + +func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { + + a := *cl.base + a.Path = filepath.Join(a.Path, "drs/v1/objects", id) + + response, err := http.Get(a.String()) + if err != nil { + return nil, err + } + + body, err := io.ReadAll(response.Body) + if err != nil { + return nil, err + } + //log.Printf("Getting URL %s\n", a.String()) + //fmt.Printf("%s\n", string(body)) + + out := drs.DRSObject{} + err = json.Unmarshal(body, &out) + if err != nil { + return nil, err + } + return &out, nil +} diff --git a/client/interface.go b/client/interface.go new file mode 100644 index 0000000..97cce01 --- /dev/null +++ b/client/interface.go @@ -0,0 +1,14 @@ +package client + +import "github.com/bmeg/git-drs/drs" + +type ObjectStoreClient interface { + //Given a DRS string ID, retrieve the object describing it + QueryID(id string) (*drs.DRSObject, error) + + //Put file into object storage and obtain a DRS record pointing to it + RegisterFile(path string, name string) (*drs.DRSObject, error) + + //Download file given a DRS ID + DownloadFile(id string, dstPath string) (*drs.DRSObject, error) +} diff --git a/cmd/add/main.go b/cmd/add/main.go index 3ea7201..0a63c26 100644 --- a/cmd/add/main.go +++ b/cmd/add/main.go @@ -18,7 +18,6 @@ var Cmd = &cobra.Command{ matches, err := filepath.Glob(fileArg) if err == nil { for _, f := range matches { - fmt.Printf("Adding %s\n", f) } } diff --git a/cmd/download/main.go b/cmd/download/main.go new file mode 100644 index 0000000..b9b8972 --- /dev/null +++ b/cmd/download/main.go @@ -0,0 +1,37 @@ +package download + +import ( + "encoding/json" + "fmt" + + "github.com/bmeg/git-drs/client" + "github.com/spf13/cobra" +) + +var server string = "https://calypr.ohsu.edu/ga4gh" + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "download", + Short: "Query server for DRS ID", + Long: ``, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + + client, err := client.NewIndexDClient(server) + if err != nil { + return err + } + + obj, err := client.QueryID(args[0]) + if err != nil { + return err + } + out, err := json.MarshalIndent(*obj, "", " ") + if err != nil { + return err + } + fmt.Printf("%s\n", string(out)) + return nil + }, +} diff --git a/cmd/query/main.go b/cmd/query/main.go index 189a29d..8aef738 100644 --- a/cmd/query/main.go +++ b/cmd/query/main.go @@ -4,12 +4,10 @@ import ( "encoding/json" "fmt" - "github.com/bmeg/git-drs/drs" + "github.com/bmeg/git-drs/client" "github.com/spf13/cobra" ) -var server string = "https://calypr.ohsu.edu/ga4gh" - // Cmd line declaration var Cmd = &cobra.Command{ Use: "query", @@ -18,12 +16,20 @@ var Cmd = &cobra.Command{ Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - client, err := drs.NewClient(server) + cfg, err := client.LoadConfig() + if err != nil { + return err + } + + //fix this later + baseURL := cfg.QueryServer.BaseURL + + client, err := client.NewIndexDClient(baseURL) if err != nil { return err } - obj, err := client.GetObject(args[0]) + obj, err := client.QueryID(args[0]) if err != nil { return err } diff --git a/cmd/register/main.go b/cmd/register/main.go new file mode 100644 index 0000000..2f8bd41 --- /dev/null +++ b/cmd/register/main.go @@ -0,0 +1,34 @@ +package register + +import ( + "log" + "path/filepath" + + "github.com/bmeg/git-drs/client" + "github.com/spf13/cobra" +) + +var server string = "https://calypr.ohsu.edu/ga4gh" + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "register", + Short: "", + Long: ``, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + log.Printf("Registering file %s", args[0]) + client, err := client.NewIndexDClient(server) + if err != nil { + return err + } + + //upload the file, name would probably be relative to the base of the git repo + client.RegisterFile(args[0], filepath.Base(args[0])) + + //remove later + _ = client + + return nil + }, +} diff --git a/cmd/root.go b/cmd/root.go index e0b2b56..cb463eb 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,12 +4,14 @@ import ( "os" "github.com/bmeg/git-drs/cmd/add" + "github.com/bmeg/git-drs/cmd/download" "github.com/bmeg/git-drs/cmd/filterprocess" "github.com/bmeg/git-drs/cmd/initialize" "github.com/bmeg/git-drs/cmd/list" "github.com/bmeg/git-drs/cmd/pull" "github.com/bmeg/git-drs/cmd/push" "github.com/bmeg/git-drs/cmd/query" + "github.com/bmeg/git-drs/cmd/register" "github.com/spf13/cobra" ) @@ -31,6 +33,8 @@ func init() { RootCmd.AddCommand(add.Cmd) RootCmd.AddCommand(filterprocess.Cmd) RootCmd.AddCommand(query.Cmd) + RootCmd.AddCommand(register.Cmd) + RootCmd.AddCommand(download.Cmd) RootCmd.AddCommand(genBashCompletionCmd) } diff --git a/drs/client.go b/drs/client.go deleted file mode 100644 index 0b7bac7..0000000 --- a/drs/client.go +++ /dev/null @@ -1,43 +0,0 @@ -package drs - -import ( - "encoding/json" - "io" - "net/http" - "net/url" - "path/filepath" -) - -type Client struct { - base *url.URL -} - -func NewClient(base string) (*Client, error) { - baseURL, err := url.Parse(base) - return &Client{baseURL}, err -} - -func (cl *Client) GetObject(id string) (*DRSObject, error) { - - a := *cl.base - a.Path = filepath.Join(a.Path, "drs/v1/objects", id) - - response, err := http.Get(a.String()) - if err != nil { - return nil, err - } - - body, err := io.ReadAll(response.Body) - if err != nil { - return nil, err - } - //log.Printf("Getting URL %s\n", a.String()) - //fmt.Printf("%s\n", string(body)) - - out := DRSObject{} - err = json.Unmarshal(body, &out) - if err != nil { - return nil, err - } - return &out, nil -} diff --git a/go.mod b/go.mod index 4e11a3f..c58f9ee 100644 --- a/go.mod +++ b/go.mod @@ -3,34 +3,21 @@ module github.com/bmeg/git-drs go 1.24.0 require ( - github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 // indirect + github.com/git-lfs/git-lfs/v3 v3.6.1 + github.com/spf13/cobra v1.9.1 + sigs.k8s.io/yaml v1.4.0 +) + +require ( github.com/avast/retry-go v2.4.2+incompatible // indirect - github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 // indirect - github.com/git-lfs/git-lfs/v3 v3.6.1 // indirect github.com/git-lfs/gitobj/v2 v2.1.1 // indirect - github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a // indirect github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect - github.com/hashicorp/go-uuid v1.0.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/jcmturner/aescts/v2 v2.0.0 // indirect - github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect - github.com/jcmturner/gofork v1.0.0 // indirect - github.com/jcmturner/goidentity/v6 v6.0.1 // indirect - github.com/jcmturner/gokrb5/v8 v8.4.2 // indirect - github.com/jcmturner/rpc/v2 v2.0.3 // indirect - github.com/jmhodges/clock v1.2.0 // indirect github.com/leonelquinteros/gotext v1.5.0 // indirect - github.com/mattn/go-isatty v0.0.4 // indirect - github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 // indirect github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 // indirect github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 // indirect - github.com/spf13/cobra v1.9.1 // indirect github.com/spf13/pflag v1.0.6 // indirect - github.com/ssgelm/cookiejarparser v1.0.1 // indirect - golang.org/x/crypto v0.21.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sync v0.1.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect ) diff --git a/go.sum b/go.sum index f79b8f3..7e48ed3 100644 --- a/go.sum +++ b/go.sum @@ -3,7 +3,7 @@ github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1L github.com/avast/retry-go v2.4.2+incompatible h1:+ZjCypQT/CyP0kyJO2EcU4d/ZEJWSbP8NENI578cPmA= github.com/avast/retry-go v2.4.2+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 h1:oempk9HjNt6rVKyKmpdnoN7XABQv3SXLWu3pxUI7Vlk= github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430/go.mod h1:AVSs/gZKt1bOd2AhkhbS7Qh56Hv7klde22yXVbwYJhc= @@ -17,8 +17,8 @@ github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= -github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= -github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -45,6 +45,7 @@ github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2 github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 h1:chPfVn+gpAM5CTpTyVU9j8J+xgRGwmoDlNDLjKnJiYo= github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= @@ -55,40 +56,31 @@ github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/ssgelm/cookiejarparser v1.0.1 h1:cRdXauUbOTFzTPJFaeiWbHnQ+tRGlpKKzvIK9PUekE4= github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9VrZuz3wSlI+OEI= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191027093000-83d349e8ac1a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= From 28179ff1c71e28c018ff9d6035b4b91052678523 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 14 May 2025 14:00:34 -0700 Subject: [PATCH 04/51] 1st draft DRS query and download, make sure to setup .drsconfig --- .drsconfig | 9 ++++ client/config.go | 1 + client/indexd.go | 106 +++++++++++++++++++++++++++++++++++++++++-- client/interface.go | 2 +- cmd/download/main.go | 13 ++++-- go.mod | 13 ++++++ go.sum | 20 ++++++++ 7 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 .drsconfig diff --git a/.drsconfig b/.drsconfig new file mode 100644 index 0000000..13553b9 --- /dev/null +++ b/.drsconfig @@ -0,0 +1,9 @@ +{ + "queryServer": { + "baseURL": "https://calypr.ohsu.edu/ga4gh" + }, + "writeServer": { + "baseURL": "https://calypr.ohsu.edu/ga4gh" + }, + "gen3Profile": "" +} diff --git a/client/config.go b/client/config.go index 08be1e4..12845db 100644 --- a/client/config.go +++ b/client/config.go @@ -18,6 +18,7 @@ type Server struct { type Config struct { QueryServer Server `json:"queryServer"` WriteServer Server `json:"writeServer"` + Gen3Profile string `json:"gen3Profile"` } const ( diff --git a/client/indexd.go b/client/indexd.go index d0e90a2..e6694f5 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -2,26 +2,116 @@ package client import ( "encoding/json" + "fmt" "io" "net/http" "net/url" + "os" "path/filepath" "github.com/bmeg/git-drs/drs" + "github.com/uc-cdis/gen3-client/gen3-client/jwt" ) +var conf jwt.Configure +var profileConfig jwt.Credential + type IndexDClient struct { base *url.URL } func NewIndexDClient(base string) (ObjectStoreClient, error) { baseURL, err := url.Parse(base) + // print baseURL + if err != nil { + return nil, err + } + fmt.Printf("Base URL: %s\n", baseURL.String()) + return &IndexDClient{baseURL}, err } // DownloadFile implements ObjectStoreClient. -func (cl *IndexDClient) DownloadFile(id string, dstPath string) (*drs.DRSObject, error) { - panic("unimplemented") +func (cl *IndexDClient) DownloadFile(id string, access_id string, profile string, dstPath string) (*drs.AccessURL, error) { + + // get file from indexd + a := *cl.base + a.Path = filepath.Join(a.Path, "drs/v1/objects", id, "access", access_id) + // a.Path = filepath.Join("https://calypr.ohsu.edu/user/data/download/", id) + + fmt.Print("Getting URL: ", a.String(), "\n") + + // unmarshal response + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + return nil, err + } + // extract accessToken from config and insert into header of request + profileConfig = conf.ParseConfig(profile) + if profileConfig.AccessToken == "" { + return nil, fmt.Errorf("access token not found in profile config") + } + + // Add headers to the request + authStr := fmt.Sprintf("Bearer %s", profileConfig.AccessToken) + fmt.Printf("Authorization header: %s\n", authStr) + req.Header.Set("Authorization", authStr) + + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + return nil, err + } + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + if err != nil { + return nil, err + } + + // print body + fmt.Printf("Response body: %s\n", string(body)) + + out := drs.AccessURL{} + err = json.Unmarshal(body, &out) + if err != nil { + return nil, err + } + + // Extract the signed URL from the response + signedURL := out.URL // Assuming `out.url` contains the signed URL + if signedURL == "" { + return nil, fmt.Errorf("signed URL not found in response") + } + + fmt.Print("Signed URL: ", signedURL, "\n") + + // Download the file using the signed URL + fileResponse, err := http.Get(signedURL) + if err != nil { + return nil, err + } + defer fileResponse.Body.Close() + + fmt.Printf("File response status: %s\n", fileResponse.Status) + + // Create the destination file + dstFile, err := os.Create(dstPath) + if err != nil { + return nil, err + } + defer dstFile.Close() + + // print file response as string + fmt.Printf("File response contents: %s\n", fileResponse.Body) + + // Write the file content to the destination file + _, err = io.Copy(dstFile, fileResponse.Body) + if err != nil { + return nil, err + } + + return &out, nil } // RegisterFile implements ObjectStoreClient. @@ -34,10 +124,20 @@ func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { a := *cl.base a.Path = filepath.Join(a.Path, "drs/v1/objects", id) - response, err := http.Get(a.String()) + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + return nil, err + } + // Add headers to the request + req.Header.Set("Authorization", "Bearer ") + req.Header.Set("Custom-Header", "HeaderValue") + + client := &http.Client{} + response, err := client.Do(req) if err != nil { return nil, err } + defer response.Body.Close() body, err := io.ReadAll(response.Body) if err != nil { diff --git a/client/interface.go b/client/interface.go index 97cce01..208e779 100644 --- a/client/interface.go +++ b/client/interface.go @@ -10,5 +10,5 @@ type ObjectStoreClient interface { RegisterFile(path string, name string) (*drs.DRSObject, error) //Download file given a DRS ID - DownloadFile(id string, dstPath string) (*drs.DRSObject, error) + DownloadFile(id string, access_id string, profile string, dstPath string) (*drs.AccessURL, error) } diff --git a/cmd/download/main.go b/cmd/download/main.go index b9b8972..4ce4d80 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -13,7 +13,7 @@ var server string = "https://calypr.ohsu.edu/ga4gh" // Cmd line declaration var Cmd = &cobra.Command{ Use: "download", - Short: "Query server for DRS ID", + Short: "Download file using s3", Long: ``, Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { @@ -23,11 +23,18 @@ var Cmd = &cobra.Command{ return err } - obj, err := client.QueryID(args[0]) + // // get file name from DRS object + // drs_obj, err := client.QueryID(args[0]) + // if err != nil { + // return err + // } + + access_url, err := client.DownloadFile(args[0], "s3", "cbds-prod", "./file.txt") if err != nil { return err } - out, err := json.MarshalIndent(*obj, "", " ") + + out, err := json.MarshalIndent(*access_url, "", " ") if err != nil { return err } diff --git a/go.mod b/go.mod index c58f9ee..1743ff5 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.24.0 require ( github.com/git-lfs/git-lfs/v3 v3.6.1 github.com/spf13/cobra v1.9.1 + github.com/uc-cdis/gen3-client v0.0.23 sigs.k8s.io/yaml v1.4.0 ) @@ -13,11 +14,23 @@ require ( github.com/git-lfs/gitobj/v2 v2.1.1 // indirect github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect + github.com/google/go-github v17.0.0+incompatible // indirect + github.com/google/go-querystring v1.1.0 // indirect + github.com/hashicorp/go-version v1.4.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/leonelquinteros/gotext v1.5.0 // indirect + github.com/mattn/go-runewidth v0.0.13 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 // indirect + github.com/rivo/uniseg v0.2.0 // indirect github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 // indirect github.com/spf13/pflag v1.0.6 // indirect + github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e // indirect + golang.org/x/net v0.23.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect + gopkg.in/cheggaaa/pb.v1 v1.0.28 // indirect + gopkg.in/ini.v1 v1.66.3 // indirect ) + +replace github.com/uc-cdis/gen3-client => ../cdis-data-client diff --git a/go.sum b/go.sum index 7e48ed3..e94a1a5 100644 --- a/go.sum +++ b/go.sum @@ -17,10 +17,17 @@ github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= +github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= +github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= +github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.4.0 h1:aAQzgqIrRKRa7w75CKpbBxYsmUoPjzVm1W59ca1L0J4= +github.com/hashicorp/go-version v1.4.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= @@ -41,12 +48,18 @@ github.com/leonelquinteros/gotext v1.5.0 h1:ODY7LzLpZWWSJdAHnzhreOr6cwLXTAmc914F github.com/leonelquinteros/gotext v1.5.0/go.mod h1:OCiUVHuhP9LGFBQ1oAmdtNCHJCiHiQA8lf4nAifHkr0= github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2bxbqI6DXV8bF8POAnArqvRrIyw= github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 h1:chPfVn+gpAM5CTpTyVU9j8J+xgRGwmoDlNDLjKnJiYo= github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -59,6 +72,8 @@ github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e h1:IWllFTiDjjLIf2oeKxpIUmtiDV5sn71VgeQgg6vcE7k= +github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e/go.mod h1:d7u6HkTYKSv5m6MCKkOQlHwaShTMl3HjqSGW3XtVhXM= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= @@ -78,8 +93,13 @@ golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/cheggaaa/pb.v1 v1.0.28 h1:n1tBJnnK2r7g9OW2btFH91V92STTUevLXYFb8gy9EMk= +gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= +gopkg.in/ini.v1 v1.66.3 h1:jRskFVxYaMGAMUbN0UZ7niA9gzL9B49DOqE78vg0k3w= +gopkg.in/ini.v1 v1.66.3/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= From e6fb636e16673d97def746cf1a844ba887dfc20a Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 14 May 2025 17:07:18 -0700 Subject: [PATCH 05/51] make DownloadFile more loosely coupled --- .gitignore | 3 +++ client/README.md | 14 +++++++++++ client/indexd.go | 59 ++++++++++++++++++++++++++------------------ client/interface.go | 2 +- cmd/download/main.go | 49 ++++++++++++++++++++++-------------- cmd/query/main.go | 6 ++--- 6 files changed, 87 insertions(+), 46 deletions(-) create mode 100644 .gitignore create mode 100644 client/README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0088ced --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +.DS_Store +/tmp \ No newline at end of file diff --git a/client/README.md b/client/README.md new file mode 100644 index 0000000..45e7cf8 --- /dev/null +++ b/client/README.md @@ -0,0 +1,14 @@ +# Git DRS Client + +## Getting Started + +1. Configure gen3 with your credentials ([docs](https://aced-idp.github.io/requirements/#1-download-gen3-client)) +2. Edit platform URL and gen3 profile in `.drsconfig` +3. Build from source + ```bash + go build + ``` +4. Access through command line + ```bash + ./git-drs --help + ``` diff --git a/client/indexd.go b/client/indexd.go index e6694f5..1d0a592 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -17,7 +17,8 @@ var conf jwt.Configure var profileConfig jwt.Credential type IndexDClient struct { - base *url.URL + base *url.URL + profile string } func NewIndexDClient(base string) (ObjectStoreClient, error) { @@ -26,35 +27,44 @@ func NewIndexDClient(base string) (ObjectStoreClient, error) { if err != nil { return nil, err } + + cfg, err := LoadConfig() + if err != nil { + return nil, err + } + + // get the gen3Profile from the config + profile := cfg.Gen3Profile + if profile == "" { + return nil, fmt.Errorf("No gen3 profile specified. Please provide a gen3Profile key in your .drsconfig") + } + fmt.Printf("Base URL: %s\n", baseURL.String()) + fmt.Printf("Profile: %s\n", profile) - return &IndexDClient{baseURL}, err + return &IndexDClient{baseURL, profile}, err } -// DownloadFile implements ObjectStoreClient. -func (cl *IndexDClient) DownloadFile(id string, access_id string, profile string, dstPath string) (*drs.AccessURL, error) { - +// DownloadFile implements ObjectStoreClient +func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) { // get file from indexd a := *cl.base a.Path = filepath.Join(a.Path, "drs/v1/objects", id, "access", access_id) // a.Path = filepath.Join("https://calypr.ohsu.edu/user/data/download/", id) - fmt.Print("Getting URL: ", a.String(), "\n") - // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) if err != nil { return nil, err } - // extract accessToken from config and insert into header of request - profileConfig = conf.ParseConfig(profile) + // extract accessToken from gen3 profile and insert into header of request + profileConfig = conf.ParseConfig(cl.profile) if profileConfig.AccessToken == "" { return nil, fmt.Errorf("access token not found in profile config") } // Add headers to the request - authStr := fmt.Sprintf("Bearer %s", profileConfig.AccessToken) - fmt.Printf("Authorization header: %s\n", authStr) + authStr := "Bearer " + profileConfig.AccessToken req.Header.Set("Authorization", authStr) client := &http.Client{} @@ -69,9 +79,6 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, profile string return nil, err } - // print body - fmt.Printf("Response body: %s\n", string(body)) - out := drs.AccessURL{} err = json.Unmarshal(body, &out) if err != nil { @@ -79,13 +86,11 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, profile string } // Extract the signed URL from the response - signedURL := out.URL // Assuming `out.url` contains the signed URL + signedURL := out.URL if signedURL == "" { - return nil, fmt.Errorf("signed URL not found in response") + return nil, fmt.Errorf("signed URL not found in response.") } - fmt.Print("Signed URL: ", signedURL, "\n") - // Download the file using the signed URL fileResponse, err := http.Get(signedURL) if err != nil { @@ -93,7 +98,16 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, profile string } defer fileResponse.Body.Close() - fmt.Printf("File response status: %s\n", fileResponse.Status) + // Check if the response status is OK + if fileResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) + } + + // Create the destination directory if it doesn't exist + err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm) + if err != nil { + return nil, err + } // Create the destination file dstFile, err := os.Create(dstPath) @@ -102,15 +116,14 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, profile string } defer dstFile.Close() - // print file response as string - fmt.Printf("File response contents: %s\n", fileResponse.Body) - // Write the file content to the destination file _, err = io.Copy(dstFile, fileResponse.Body) if err != nil { return nil, err } + fmt.Printf("File written to %s\n", dstFile.Name()) + return &out, nil } @@ -143,8 +156,6 @@ func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { if err != nil { return nil, err } - //log.Printf("Getting URL %s\n", a.String()) - //fmt.Printf("%s\n", string(body)) out := drs.DRSObject{} err = json.Unmarshal(body, &out) diff --git a/client/interface.go b/client/interface.go index 208e779..8b6bba1 100644 --- a/client/interface.go +++ b/client/interface.go @@ -10,5 +10,5 @@ type ObjectStoreClient interface { RegisterFile(path string, name string) (*drs.DRSObject, error) //Download file given a DRS ID - DownloadFile(id string, access_id string, profile string, dstPath string) (*drs.AccessURL, error) + DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) } diff --git a/cmd/download/main.go b/cmd/download/main.go index 4ce4d80..067d642 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -1,44 +1,57 @@ package download import ( - "encoding/json" - "fmt" - "github.com/bmeg/git-drs/client" + "github.com/bmeg/git-drs/drs" "github.com/spf13/cobra" ) -var server string = "https://calypr.ohsu.edu/ga4gh" +var ( + server string + dstPath string + drsObj *drs.DRSObject +) +// Cmd line declaration // Cmd line declaration var Cmd = &cobra.Command{ - Use: "download", - Short: "Download file using s3", - Long: ``, - Args: cobra.MinimumNArgs(1), + Use: "download ", + Short: "Download file using DRS ID and access ID", + Long: "Download file using DRS ID and access ID. The access ID is the access method used to download the file.", + Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { - - client, err := client.NewIndexDClient(server) + drsId := args[0] + accessId := args[1] + cfg, err := client.LoadConfig() if err != nil { return err } - // // get file name from DRS object - // drs_obj, err := client.QueryID(args[0]) - // if err != nil { - // return err - // } + baseURL := cfg.QueryServer.BaseURL - access_url, err := client.DownloadFile(args[0], "s3", "cbds-prod", "./file.txt") + client, err := client.NewIndexDClient(baseURL) if err != nil { return err } - out, err := json.MarshalIndent(*access_url, "", " ") + if dstPath == "" { + + drsObj, err = client.QueryID(drsId) + if err != nil { + return err + } + dstPath = drsObj.Name + } + + _, err = client.DownloadFile(drsId, accessId, dstPath) if err != nil { return err } - fmt.Printf("%s\n", string(out)) + return nil }, } + +func init() { + Cmd.Flags().StringVarP(&dstPath, "dstPath", "d", "", "Optional destination file path") +} diff --git a/cmd/query/main.go b/cmd/query/main.go index 8aef738..b9463d7 100644 --- a/cmd/query/main.go +++ b/cmd/query/main.go @@ -10,9 +10,9 @@ import ( // Cmd line declaration var Cmd = &cobra.Command{ - Use: "query", - Short: "Query server for DRS ID", - Long: ``, + Use: "query ", + Short: "Query DRS server by DRS ID", + Long: "Query DRS server by DRS ID", Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { From 08425c33879f5d3764cc018df881c9d0e90e1fec Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 19 May 2025 11:44:55 -0700 Subject: [PATCH 06/51] drafted README requirements --- docs/README-requirements.md | 128 ++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 docs/README-requirements.md diff --git a/docs/README-requirements.md b/docs/README-requirements.md new file mode 100644 index 0000000..316e4dc --- /dev/null +++ b/docs/README-requirements.md @@ -0,0 +1,128 @@ +# Clarifying Requirements + +## Requirements +_Inspiration from ([system design interview docs](https://www.hellointerview.com/learn/system-design/in-a-hurry/delivery))_ + +### Functional Requirements +- Users should be able to **create a new data project** +- Users should be able to **add references to files** that are either **local**, **symlinked**, or **external to the current machine** +- Users should be able to **pull down a subset of the files** from an existing project +- User should be able to **add data files to an existing project** +- User A should be able to **resolve version conflicts** in the project: eg if User B makes changes that aren't on User A's local version +- Users should be able to **add multiple file paths** referring to the same file +- Users should be able to **transfer local files to a remote bucket** +- Users should be able to see their **files updated on the gen3 platform** +- Users should be able to **associate files with other FHIR entities** (patients, specimens, etc) +- Users should be able to **associate files with other FHIR metadata** + +### Non-Functional Requirements +- The system should be able to handle over **100k files** in a single project +- The system should be able to handle over **___ TB of data ingestion** for a single project +- The system should be able to handle over **___ TB of data transfer** for a single project +- The system should be + +### Areas of Work +*(adapted from [main README](./README.md#-proposed-modular-architecture)), **bold** for new ones* + +1. Project Management (both auth and file version control) +2. **File Transfer** (this is only to upload files) +3. **File Indexing** (this is changing file paths, uploading files, etc) +4. Metadata Management (associate files with entities; tag files with metadata) +5. **Gen3 Integration** (sync git project with gen3 data systems) + +### Technical Details based on Requirements + +- Users should be able to **create a new data project** as a Git repo + - an executable containing custom git and gen3 dependencies + - some setup for a `git install` + - way to create repo: only `git clone` [template](./README-gitlfs-template-project.md) for setup? +- Users should be able to **add references to files** that are either **local**, **symlinked**, or **outside of the current machine** + - All of these references require you to implement a custom clean / smudge so that when data files go into Git's working directory (`.git/`), pointers are created as opposed to non-pointers + - **clean / smudge for a local file**: file is localized, pointer (hash, filesize, urls) is created from local file + - **clean for a symlinked file**: file is still local to file, grabbed from that disk and processed, stored as a pointer how is it stored? + - **clean for a external file**: + - - **QW TODO:** still need to add all other combos between clean v smudge and file type +- Users should be able to **pull down a subset of the files** from an existing project + - Pulling down no files by default (at most only the pointers) (eg `GIT_LFS_SKIP_SMUDGE=1` by default for git lfs) + - Ability to view and select files that need to be pulled down (**QW TODO:** remind me the use case why do we need to pull down files? Why do we need to edit files?) +- User should be able to **add data files to an existing project** +- User A should be able to **resolve version conflicts** in the project: eg if User B makes changes that aren't on User A's local version +- Users should be able to **add multiple file paths** referring to the same file +- Users should be able to **transfer local files to a remote bucket** +- Privileged users should be able to **grant access** of their project **to other users** +- Users should be able to have **different roles** (read only vs read and write vs read write and approve) +- Users should be able to see their **files updated on the gen3 platform** +- Users should be able to **associate files with other FHIR entities** (patients, specimens, etc) +- Users should be able to **associate files with other FHIR metadata** + +### User-Facing Design Concerns + +- Who is in charge of executing "custom code": whether it should be... + 1. automatically triggered by git hooks (`.git/hooks`) + 2. automatically triggered for specific files (`.gitattributes`) + 3. manually triggered by unique CLI commands (eg `git drs `) +- How a user interacts with DRS: is DRS the file pointer, an additional metadata store, or something else? +- Expectations of git vs expectations of git drs + +### General Design Concerns + +- At what level are we interfacing with git? Similarly, at what level are we making use of git lfs? In decreasing order of code reuse, are we... + 1. using git hooks before it gets to git lfs (eg to address the sha limitations *before* it hits git-lfs) + 2. using git lfs extensions to interact with git lfs after the fact (idts, git lfs I think will fail if we don't give it file contents in the file isn't localized case...) + 3. Using only git lfs source code and customizing it at will +- **QW TODO:** some of these answers might be in most recent commit ([be0294c](https://github.com/bmeg/git-drs/commit/be0294c1aac7aa74dade90c8166bbf1c5e1066f6)) +- For files that "cannot be localized", ie S3 bucket, how are they cleaned and smudged? Updated? +- Project vs program distinction + +### Use Cases +- As an OHSU user, I need to transfer files from outside the firewall into OHSU so that they are localized to internal OHSU resources (eg Evotypes) + - Why not use Globus? +- As an external user, I need to pull down OHSU-internal files so that I can do further processing, downstream analysis, etc on said files (eg Cambridge pulls down OHSU-processed files) + - This says external user as opposed to OHSU analyst, as a user doesn't need to go through us / gen3 to localize files if it's all internal to OHSU right? +- As an OHSU analyst, I need to index read-only S3 system files to so that ... (eg Jordan multimodal) +- As an OHSU analyst, I need to index files in my directory so that ... (is this a real use case?) +- As an FHIR-aware user, I need to upload FHIR metadata that +- As a OHSU user, I need to index files that exists on multiple buckets AND make each file downloadable from the right bucket so that I can consolidate my image files in a single project (eg imaging analysis?) + +### [WIP] User Types on CALIPER + +1. **Data steward**: creating data project(s) and ensuring that everything is up to date. Enabling access for folks within their project / program (eg Allison for SMMART datasets) +2. **Data submitter:** adding and editing files to data project, maybe also adding metadata. +3. **Data analyst**: Pulling down relevant files for processing, QA, downstream analysis. Viewing the results of the data project on CALYPR (eg Isabel) + +We will assume that our initial users will some level of Git familiarity and computational ability. + + +------ + +## Misc + +### Example Project (with Use Cases) + +1. **Initial file transfer**: I want to use gen3 to transfer my files from a remote server into OHSU premises +2. **Initial file tracking** (Data submitter): I want to create a project and upload files to it. How: + 1. Create a Github repo + 2. Setup of the git drs client (install cli + git hooks) + 3. of interest +3. **Initial File Upload**: Likely done along with 1, user needs + +### Enumerated List of Use Cases + +A list of use cases according to the inputs, output, and project states mentioned by the team in the past. + +Out-of-scope: +- "multiple inputs": combinatorial inputs of the below (eg pushing local and external files) + +### Inputs and Outputs + +- **Input**: Input data stored in different locations + - **Locally stored files within the project directory** (eg Isabel creating bams from fastqs: you have control over the directory where the files are, project dir initialized in a parent directory) + - **Locally stored files in a shared volume** (eg Jordan /mnt ARC use case, no control over directory where files are) + - **Externally stored files** in an inaccessible bucket (eg external file management of SMMART files with not access to them) + - **Locally stored FHIR metadata** (eg SMMART deep FHIR graph queries) +- **Output**: Where to write files to + - **gen3-registered bucket** (eg: Isabel Evotypes output analysis files shared) + - **non-gen3-registered bucket** (eg SMMART where we file paths only want to index what’s available to us) +- **[Extra] Project State**: whether project is new or existing + - **new project** (eg data steward initializing a project from scratch) + - **existing project** (eg EvoTypes collaborators writing new files onto Evotypes output project) \ No newline at end of file From 9493117ad17823312fdbf27e3eb87a03b478f48f Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 19 May 2025 11:45:19 -0700 Subject: [PATCH 07/51] typos --- cmd/filterprocess/main.go | 2 +- docs/README-git-plugin-dev.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/filterprocess/main.go b/cmd/filterprocess/main.go index 70fda3b..688d4f9 100644 --- a/cmd/filterprocess/main.go +++ b/cmd/filterprocess/main.go @@ -13,7 +13,7 @@ import ( // Cmd line declaration var Cmd = &cobra.Command{ Use: "filter-process", - Short: "filter proces", + Short: "filter process", Long: ``, Args: cobra.MinimumNArgs(0), RunE: func(cmd *cobra.Command, args []string) error { diff --git a/docs/README-git-plugin-dev.md b/docs/README-git-plugin-dev.md index f33fa15..2eae427 100644 --- a/docs/README-git-plugin-dev.md +++ b/docs/README-git-plugin-dev.md @@ -2,7 +2,7 @@ # Notes about the development of git plugins -To attach the plugin into the configutation. In the global config `~/.gitconfig` add the lines: +To attach the plugin into the configuration. In the global config `~/.gitconfig` add the lines: ``` [filter "drs"] clean = git-drs clean -- %f From 5912b71ce4d2058c2ff629885c0bf189832909db Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 20 May 2025 09:39:58 -0700 Subject: [PATCH 08/51] clarify requirements, provide git lfs vs git drs comparison --- docs/README-requirements.md | 172 ++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 98 deletions(-) diff --git a/docs/README-requirements.md b/docs/README-requirements.md index 316e4dc..2199a5f 100644 --- a/docs/README-requirements.md +++ b/docs/README-requirements.md @@ -3,76 +3,39 @@ ## Requirements _Inspiration from ([system design interview docs](https://www.hellointerview.com/learn/system-design/in-a-hurry/delivery))_ +We will assume that our initial users will have some level of Git familiarity and computational ability. + ### Functional Requirements -- Users should be able to **create a new data project** -- Users should be able to **add references to files** that are either **local**, **symlinked**, or **external to the current machine** -- Users should be able to **pull down a subset of the files** from an existing project -- User should be able to **add data files to an existing project** -- User A should be able to **resolve version conflicts** in the project: eg if User B makes changes that aren't on User A's local version -- Users should be able to **add multiple file paths** referring to the same file -- Users should be able to **transfer local files to a remote bucket** -- Users should be able to see their **files updated on the gen3 platform** -- Users should be able to **associate files with other FHIR entities** (patients, specimens, etc) -- Users should be able to **associate files with other FHIR metadata** +**General** +- Users should be able to use mostly conventional git to handle their repositories and only need a minimal set of non-git commands + +**File transfer** +- Users should be able to transfer files from outside the firewall into OHSU systems +- Users should be able to pull a subset of files onto their machine + +**File indexing** +- A user should be able to upload a set of data files to a common file repository +- Users should be able to update an existing file repository with new files +- Users should be able to refer to the same file even while it is in multiple file paths +- A user should be able to pull in changes on the repository that another user made + +**Metadata / Access Control** +- Users should be able to manage permissions for their projects +- Users should be able to associate data files with metadata about other important entities (patients, specimens, etc) ### Non-Functional Requirements - The system should be able to handle over **100k files** in a single project - The system should be able to handle over **___ TB of data ingestion** for a single project - The system should be able to handle over **___ TB of data transfer** for a single project - The system should be +### Categories of Functionality +*(adapted from [main README](./README.md#-proposed-modular-architecture), *unlinked* are new ones) -### Areas of Work -*(adapted from [main README](./README.md#-proposed-modular-architecture)), **bold** for new ones* - -1. Project Management (both auth and file version control) -2. **File Transfer** (this is only to upload files) -3. **File Indexing** (this is changing file paths, uploading files, etc) -4. Metadata Management (associate files with entities; tag files with metadata) -5. **Gen3 Integration** (sync git project with gen3 data systems) - -### Technical Details based on Requirements - -- Users should be able to **create a new data project** as a Git repo - - an executable containing custom git and gen3 dependencies - - some setup for a `git install` - - way to create repo: only `git clone` [template](./README-gitlfs-template-project.md) for setup? -- Users should be able to **add references to files** that are either **local**, **symlinked**, or **outside of the current machine** - - All of these references require you to implement a custom clean / smudge so that when data files go into Git's working directory (`.git/`), pointers are created as opposed to non-pointers - - **clean / smudge for a local file**: file is localized, pointer (hash, filesize, urls) is created from local file - - **clean for a symlinked file**: file is still local to file, grabbed from that disk and processed, stored as a pointer how is it stored? - - **clean for a external file**: - - - **QW TODO:** still need to add all other combos between clean v smudge and file type -- Users should be able to **pull down a subset of the files** from an existing project - - Pulling down no files by default (at most only the pointers) (eg `GIT_LFS_SKIP_SMUDGE=1` by default for git lfs) - - Ability to view and select files that need to be pulled down (**QW TODO:** remind me the use case why do we need to pull down files? Why do we need to edit files?) -- User should be able to **add data files to an existing project** -- User A should be able to **resolve version conflicts** in the project: eg if User B makes changes that aren't on User A's local version -- Users should be able to **add multiple file paths** referring to the same file -- Users should be able to **transfer local files to a remote bucket** -- Privileged users should be able to **grant access** of their project **to other users** -- Users should be able to have **different roles** (read only vs read and write vs read write and approve) -- Users should be able to see their **files updated on the gen3 platform** -- Users should be able to **associate files with other FHIR entities** (patients, specimens, etc) -- Users should be able to **associate files with other FHIR metadata** - -### User-Facing Design Concerns - -- Who is in charge of executing "custom code": whether it should be... - 1. automatically triggered by git hooks (`.git/hooks`) - 2. automatically triggered for specific files (`.gitattributes`) - 3. manually triggered by unique CLI commands (eg `git drs `) -- How a user interacts with DRS: is DRS the file pointer, an additional metadata store, or something else? -- Expectations of git vs expectations of git drs - -### General Design Concerns - -- At what level are we interfacing with git? Similarly, at what level are we making use of git lfs? In decreasing order of code reuse, are we... - 1. using git hooks before it gets to git lfs (eg to address the sha limitations *before* it hits git-lfs) - 2. using git lfs extensions to interact with git lfs after the fact (idts, git lfs I think will fail if we don't give it file contents in the file isn't localized case...) - 3. Using only git lfs source code and customizing it at will -- **QW TODO:** some of these answers might be in most recent commit ([be0294c](https://github.com/bmeg/git-drs/commit/be0294c1aac7aa74dade90c8166bbf1c5e1066f6)) -- For files that "cannot be localized", ie S3 bucket, how are they cleaned and smudged? Updated? -- Project vs program distinction +1. **[Project Management](https://github.com/bmeg/git-drs/blob/feature/documentation/docs/README.md#1-project-management-utility)** (both permissions management and project version control) +2. **[File Transfer](https://github.com/bmeg/git-drs/blob/feature/documentation/docs/README.md#2-file-transfer-utility)** (upload/download files) +3. ***File Indexing*** (changing file paths, indexing files with pointers, etc) +4. **[Metadata Management](https://github.com/bmeg/git-drs/blob/feature/documentation/docs/README.md#3-metadata-management-utility)** (associate files with entities; tag files with metadata) +5. ***Gen3 Integration*** (sync git project with gen3 data systems) ### Use Cases - As an OHSU user, I need to transfer files from outside the firewall into OHSU so that they are localized to internal OHSU resources (eg Evotypes) @@ -84,45 +47,58 @@ _Inspiration from ([system design interview docs](https://www.hellointerview.com - As an FHIR-aware user, I need to upload FHIR metadata that - As a OHSU user, I need to index files that exists on multiple buckets AND make each file downloadable from the right bucket so that I can consolidate my image files in a single project (eg imaging analysis?) -### [WIP] User Types on CALIPER - -1. **Data steward**: creating data project(s) and ensuring that everything is up to date. Enabling access for folks within their project / program (eg Allison for SMMART datasets) -2. **Data submitter:** adding and editing files to data project, maybe also adding metadata. -3. **Data analyst**: Pulling down relevant files for processing, QA, downstream analysis. Viewing the results of the data project on CALYPR (eg Isabel) - -We will assume that our initial users will some level of Git familiarity and computational ability. - - ------- - -## Misc +### Testing: Inputs and Outputs -### Example Project (with Use Cases) - -1. **Initial file transfer**: I want to use gen3 to transfer my files from a remote server into OHSU premises -2. **Initial file tracking** (Data submitter): I want to create a project and upload files to it. How: - 1. Create a Github repo - 2. Setup of the git drs client (install cli + git hooks) - 3. of interest -3. **Initial File Upload**: Likely done along with 1, user needs - -### Enumerated List of Use Cases - -A list of use cases according to the inputs, output, and project states mentioned by the team in the past. - -Out-of-scope: -- "multiple inputs": combinatorial inputs of the below (eg pushing local and external files) - -### Inputs and Outputs - -- **Input**: Input data stored in different locations +- **Data Input**: Input data stored in different locations - **Locally stored files within the project directory** (eg Isabel creating bams from fastqs: you have control over the directory where the files are, project dir initialized in a parent directory) - **Locally stored files in a shared volume** (eg Jordan /mnt ARC use case, no control over directory where files are) - - **Externally stored files** in an inaccessible bucket (eg external file management of SMMART files with not access to them) + - **Externally stored files** in an inaccessible bucket (eg external file management of SMMART files with no access to them) - **Locally stored FHIR metadata** (eg SMMART deep FHIR graph queries) + - **Project State**: whether project is new or existing + - **new project** (eg data steward initializing a project from scratch) + - **existing project** (eg Isabel adding analysis files onto an Evotypes project) - **Output**: Where to write files to - **gen3-registered bucket** (eg: Isabel Evotypes output analysis files shared) - - **non-gen3-registered bucket** (eg SMMART where we file paths only want to index what’s available to us) -- **[Extra] Project State**: whether project is new or existing - - **new project** (eg data steward initializing a project from scratch) - - **existing project** (eg EvoTypes collaborators writing new files onto Evotypes output project) \ No newline at end of file + - **no bucket** (eg SMMART where we have file paths but no access / only want to index what’s available to us) + +## Comparing LFS-based vs DRS-based design + +### Comparison of LFS-based vs DRS-based design +expanded table from [original git-gen3 vs git LFS table](https://github.com/bmeg/git-drs/pull/3#issuecomment-2835614773) + +Feature | git-gen3 | Git LFS | git-drs +-- | -- | -- | -- +Purpose | Manage external document references in research projects (esp. Gen3/DRS/Genomics data) | Manage large binary files directly attached to git repositories | manage external document references using DRS-compliant indexd server +Tracking Method | Metadata about files (e.g., path, etag, MD5, SHA256, multiple remote locations) | LFS pointer files (.gitattributes, .git/lfs/objects) point to large file storage | pointers files like LFS, but DRS ID / subset / entire DRS object +Download on Clone | No automatic download; metadata only on clone. Explicit git drs pull needed to retrieve files. | Automatically downloads necessary objects when needed, or lazily during checkout | no automatic download; only pointers +State Management | Tracks file states: Remote (R), Local (L), Modified (M), Untracked (U), Git-tracked (G) | Files either exist in repo checkout or not; no explicit remote vs. local state tracking | Localizes all project-specific DRS objects; optional download +Adding Files | Add files to metadata index (git drs add), choose between upload, symlink, external S3 or DRS refs. | git lfs track files, then git add to push objects into LFS server (gen3 backend via client side `transport customization`) | git lfs track for certain files +Remote Options | Supports multiple remote backends: Gen3 DRS, S3, local filesystems, others | Client side `transport customization` required to redirect to alternate backends | support multiple remote backends as well +Push Behavior | Push uploads only modified files; unchanged references remain metadata-only | Push uploads any committed LFS objects | push uploads any DRS objects, even if pointing to remote files +Symlink Support | Native symlink references supported (git drs add -l) | No native symlink tracking; must be handled manually | no native symlink support (blocked by git not git lfs) +Flexibility with External Sources | Easy to reference existing DRS URIs, S3 paths, shared file paths | Requires a) large objects to be added locally or b) separate handling for existing references, `transport customization` | only DRS, all file paths are referenced within DRS objects +Intended Usage Domain | Scientific data, genomics workflows, distributed datasets | General-purpose large file versioning (source code, game assets, media files, etc.) | Scientific data, genomics workflows, distributed datasets +Integration with Git Tools | Acts as a git plugin (git drs), not a transparent layer | Fully integrated into Git plumbing; transparent after setup | Ideally, fully integrated, plugin may be required +Maturity & Ecosystem | Early stage, focused on Calypr and Gen3 integrations | Mature, standardized, wide tooling ecosystem | Early stage +Integration with clinical metadata | requires integration | requires integration | requires integration + +### Pros and Cons +Since the auth-sync, project upload, and metadata tracking are common problems to solve, I'll list pros and cons more focused on the file indexing and file transfer use cases. + +git LFS | git DRS +-- | -- +[-] less flexible pointers | [+] pointers are customizable +[+] code is all written, just need to extend it | [-] have to manually copy and edit from source +[+] able to pull in changes from upstream LFS | [-] pulling in updates must be manual +[-] only sha can be used for diffing files | [+] greater control of checksum usage +[-] pulls files by default | [+] can pull only the pointers +[-] not compliant with DRS spec | [+] tool can be refactored to interop with other platforms (eg Terra) using DRS +... | [+] BOTH: enforce a unique pointer for a file +[-] BOTH: need to implement symlinking external of git | ...[-] BOTH: how to manage remote file uploads, especially how to diff, validate, store remote files. As well as integrating it to the user | ... +[-] handlings our + +## Common Technical Questions +- **Handling the source of truth**: In the "easy" use case, when a user has no access to gen3/OHSU object stores directly, we populate a git repo, then indexd, then the bucket in that order. What about when a user has access to the underlying bucket and makes updates there? How do we keep the up-to-date? + - How do we ensure that our file metadata is up to date? +- Diff'ing + file changes: how do we know when a file has changed if multiple checksums are being used? Do we have to validate them each time? +- Determining how to track remote files (+ clarifying what even this use case means) \ No newline at end of file From 66a370f651da190b878d46b036d95f5c483e61d4 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Sun, 8 Jun 2025 18:32:05 -0700 Subject: [PATCH 09/51] 1st draft: create hash -> drs map w/ precommit --- mvp/pre-commit-map.go | 118 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 mvp/pre-commit-map.go diff --git a/mvp/pre-commit-map.go b/mvp/pre-commit-map.go new file mode 100644 index 0000000..6100d04 --- /dev/null +++ b/mvp/pre-commit-map.go @@ -0,0 +1,118 @@ +package main + +import ( + "crypto/sha256" + "encoding/json" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// output of git lfs ls-files +type LfsLsOutput struct { + Files []struct { + Name string `json:"name"` + Size int64 `json:"size"` + Checkout bool `json:"checkout"` + Downloaded bool `json:"downloaded"` + OidType string `json:"oid_type"` + Oid string `json:"oid"` + Version string `json:"version"` + } `json:"files"` +} + +const ( + LFS_OBJS_PATH = ".git/lfs/objects" + DRS_MAP_FILE_NAME = "drs-map.json" +) + +var ( + lfsFiles LfsLsOutput + drsMap = make(map[string]string) + drsMapFilePath = filepath.Join(LFS_OBJS_PATH, DRS_MAP_FILE_NAME) +) + +func main() { + // Check if path exists and is a directory + info, err := os.Stat(LFS_OBJS_PATH) + if err != nil || !info.IsDir() { + fmt.Println("No LFS objects tracked in this repository.") + os.Exit(0) + } + + // Get all LFS file and info using json + // FIXME: use git-lfs internally instead of exec? + cmd := exec.Command("git", "lfs", "ls-files", "--long", "--json") + out, err := cmd.Output() + if err != nil { + log.Fatalf("error running git lfs ls-files: %v", err) + } + + err = json.Unmarshal(out, &lfsFiles) + if err != nil { + log.Fatalf("error unmarshalling git lfs ls-files output: %v", err) + } + + // get the name of repository + repoName, err := getRepoNameFromGit() + if err != nil { + log.Fatalf("error: %v", err) + } + fmt.Println("Repo Name:", repoName) + + // for each LFS file, calculate the UUID using repoName and the oid + for _, file := range lfsFiles.Files { + // Example: UUID = sha256(repoName + ":" + oid) + hashStr := fmt.Sprintf("%s:%s", repoName, file.Oid) + hash := sha256.New() + hash.Write([]byte(hashStr)) + drsId := fmt.Sprintf("%x", hash.Sum(nil)) + + // If the oid exists in drsMap, check if it matches the calculated uuid + if existing, ok := drsMap[file.Oid]; ok { + if existing != drsId { + fmt.Printf("Warning: OID %s has mismatched UUID. Updating.\n", file.Oid) + drsMap[file.Oid] = drsId + } + } else { + // Add new mapping + drsMap[file.Oid] = drsId + } + } + + // write drsMap to json at drsMapPath + drsMapBytes, err := json.Marshal(drsMap) + if err != nil { + log.Fatalf("error marshalling drs-map.json: %v", err) + } + + err = os.WriteFile(drsMapFilePath, drsMapBytes, 0644) + if err != nil { + log.Fatalf("error writing drs-map.json: %v", err) + } + + fmt.Println("Updated drs-map.json with", len(drsMap), "entries.") + + // stage the drsMap file + cmd = exec.Command("git", "add", drsMapFilePath) + _, err = cmd.Output() + if err != nil { + log.Fatalf("error adding drs-map.json to git: %v", err) + } +} + +func getRepoNameFromGit() (string, error) { + // FIXME: change to call git config directly? + cmd := exec.Command("git", "config", "--get", "remote.origin.url") + out, err := cmd.Output() + if err != nil { + return "", err + } + + remoteURL := strings.TrimSpace(string(out)) + repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") + return repoName, nil +} From 5a50247967f73e8846d70c2fe0d5be9943de08ac Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 9 Jun 2025 11:52:55 -0700 Subject: [PATCH 10/51] use uuid --- go.mod | 1 + go.sum | 2 ++ mvp/pre-commit-map.go | 26 +++++++++++++++++--------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 1743ff5..cbcc631 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/git-lfs/git-lfs/v3 v3.6.1 github.com/spf13/cobra v1.9.1 github.com/uc-cdis/gen3-client v0.0.23 + github.com/google/uuid v1.6.0 sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index e94a1a5..672dbe7 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,8 @@ github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4r github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.4.0 h1:aAQzgqIrRKRa7w75CKpbBxYsmUoPjzVm1W59ca1L0J4= diff --git a/mvp/pre-commit-map.go b/mvp/pre-commit-map.go index 6100d04..c34c91e 100644 --- a/mvp/pre-commit-map.go +++ b/mvp/pre-commit-map.go @@ -1,7 +1,6 @@ package main import ( - "crypto/sha256" "encoding/json" "fmt" "log" @@ -9,6 +8,8 @@ import ( "os/exec" "path/filepath" "strings" + + "github.com/google/uuid" ) // output of git lfs ls-files @@ -30,9 +31,10 @@ const ( ) var ( - lfsFiles LfsLsOutput - drsMap = make(map[string]string) - drsMapFilePath = filepath.Join(LFS_OBJS_PATH, DRS_MAP_FILE_NAME) + lfsFiles LfsLsOutput + drsMap = make(map[string]string) + // drsMapFilePath = filepath.Join(LFS_OBJS_PATH, DRS_MAP_FILE_NAME) + drsMapFilePath = DRS_MAP_FILE_NAME ) func main() { @@ -45,6 +47,8 @@ func main() { // Get all LFS file and info using json // FIXME: use git-lfs internally instead of exec? + // eg use git-lfs git.GetTrackedFiles + // https://github.com/git-lfs/git-lfs/blob/main/git/git.go/#L1515 cmd := exec.Command("git", "lfs", "ls-files", "--long", "--json") out, err := cmd.Output() if err != nil { @@ -63,13 +67,11 @@ func main() { } fmt.Println("Repo Name:", repoName) - // for each LFS file, calculate the UUID using repoName and the oid + // for each LFS file, calculate the DRS ID using repoName and the oid for _, file := range lfsFiles.Files { - // Example: UUID = sha256(repoName + ":" + oid) + // Example: DRS ID = sha1(repoName + ":" + oid) hashStr := fmt.Sprintf("%s:%s", repoName, file.Oid) - hash := sha256.New() - hash.Write([]byte(hashStr)) - drsId := fmt.Sprintf("%x", hash.Sum(nil)) + drsId := V5UUID(hashStr).String() // If the oid exists in drsMap, check if it matches the calculated uuid if existing, ok := drsMap[file.Oid]; ok { @@ -97,6 +99,7 @@ func main() { fmt.Println("Updated drs-map.json with", len(drsMap), "entries.") // stage the drsMap file + // FIXME: should this be in th pre-commit hook as opposed to the Go code? cmd = exec.Command("git", "add", drsMapFilePath) _, err = cmd.Output() if err != nil { @@ -116,3 +119,8 @@ func getRepoNameFromGit() (string, error) { repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") return repoName, nil } + +func V5UUID(data string) uuid.UUID { + // FIXME: use different UUID method? Used same method as g3t + return uuid.NewSHA1(uuid.NameSpaceURL, []byte(data)) +} From a537b3f06ab51aa50c109c6a21902a9fb98cd269 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Thu, 12 Jun 2025 15:58:02 -0700 Subject: [PATCH 11/51] working draft for indexd writing --- .drsconfig | 4 +- client/config.go | 5 +- client/drs-map.go | 207 +++++++++++++++++++++++++++++ client/indexd.go | 271 +++++++++++++++++++++++++++++++++++--- client/interface.go | 2 +- client/logger.go | 37 ++++++ client/pre-commit/main.go | 31 +++++ cmd/download/main.go | 11 ++ cmd/register/main.go | 3 +- go.mod | 4 +- go.sum | 8 +- transfer/transfer.go | 186 ++++++++++++++++++++++++++ 12 files changed, 737 insertions(+), 32 deletions(-) create mode 100644 client/drs-map.go create mode 100644 client/logger.go create mode 100644 client/pre-commit/main.go create mode 100644 transfer/transfer.go diff --git a/.drsconfig b/.drsconfig index 13553b9..8f4aa8e 100644 --- a/.drsconfig +++ b/.drsconfig @@ -1,9 +1,9 @@ { "queryServer": { - "baseURL": "https://calypr.ohsu.edu/ga4gh" + "baseURL": "https://caliper-training.ohsu.edu" }, "writeServer": { - "baseURL": "https://calypr.ohsu.edu/ga4gh" + "baseURL": "https://caliper-training.ohsu.edu" }, "gen3Profile": "" } diff --git a/client/config.go b/client/config.go index 12845db..1abe24e 100644 --- a/client/config.go +++ b/client/config.go @@ -2,7 +2,6 @@ package client import ( "io" - "log" "os" "path/filepath" @@ -36,7 +35,7 @@ func LoadConfig() (*Config, error) { configPath := filepath.Join(topLevel, DRS_CONFIG) - log.Printf("Looking for %s", configPath) + // log.Printf("Looking for %s", configPath) //check if config exists reader, err := os.Open(configPath) if err != nil { @@ -54,6 +53,6 @@ func LoadConfig() (*Config, error) { return nil, err } - log.Printf("Config: %s %#v", string(b), conf) + // log.Printf("Config: %s %#v", string(b), conf) return &conf, nil } diff --git a/client/drs-map.go b/client/drs-map.go new file mode 100644 index 0000000..19bdf73 --- /dev/null +++ b/client/drs-map.go @@ -0,0 +1,207 @@ +package client + +import ( + "encoding/json" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/google/uuid" +) + +// output of git lfs ls-files +type LfsLsOutput struct { + Files []struct { + Name string `json:"name"` + Size int64 `json:"size"` + Checkout bool `json:"checkout"` + Downloaded bool `json:"downloaded"` + OidType string `json:"oid_type"` + Oid string `json:"oid"` + Version string `json:"version"` + } `json:"files"` +} + +const ( + LFS_OBJS_PATH = ".git/lfs/objects" + DRS_MAP_FILE_NAME = "drs-map.json" +) + +var ( + lfsFiles LfsLsOutput + drsMap = make(map[string]IndexdRecord) + // drsMapFilePath = filepath.Join(LFS_OBJS_PATH, DRS_MAP_FILE_NAME) + drsMapFilePath = DRS_MAP_FILE_NAME +) + +func UpdateDrsMap() error { + // f, err := os.OpenFile("transfer.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + // if err != nil { + // // fallback to stderr + // log.SetOutput(os.Stderr) + // } else { + // log.SetOutput(f) + // defer f.Close() + // } + + logger, err := NewLogger("") + if err != nil { + log.Fatalf("Failed to open log file: %v", err) + } + defer logger.Close() // Ensures cleanup + logger.Log("updateDrsMap started") + + // [naive method] Get all LFS file and info using json + // and replace the drsMap file with the new data + // FIXME: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) + // https://github.com/git-lfs/git-lfs/blob/main/git/git.go/#L1515 + // or get diff directly in the commit ie git cat-files (if pointer info is stored there)? + cmd := exec.Command("git", "lfs", "ls-files", "--json") + out, err := cmd.Output() + if err != nil { + return fmt.Errorf("error running git lfs ls-files: %v", err) + } + logger.Log("git lfs ls-files output: %s", string(out)) + + err = json.Unmarshal(out, &lfsFiles) + if err != nil { + return fmt.Errorf("error unmarshaling git lfs ls-files output: %v", err) + } + + // get the name of repository + repoName, err := GetRepoNameFromGit() + if err != nil { + return fmt.Errorf("error: %v", err) + } + logger.Log("Repo Name: %s", repoName) + + // for each LFS file, calculate the DRS ID using repoName and the oid + for _, file := range lfsFiles.Files { + // make sure file is both checked out and downloaded + if !file.Checkout || !file.Downloaded { + logger.Log("Skipping file: %s (checked out: %v, downloaded: %v)", file.Name, file.Checkout, file.Downloaded) + continue + } + + drsId := DrsUUID(repoName, file.Oid) + logger.Log("Working with file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) + + // stat the file to use modification time later + path := GetObjectPath(file.Oid) + if _, err := os.Stat(path); os.IsNotExist(err) { + return fmt.Errorf("Error: File %s does not exist in LFS objects path %s. Aborting.", file.Name, path) + } + // fileInfo, err := os.Stat(path) + // if err != nil { + // return fmt.Errorf("error getting file info: %v", err) + // } + + // If the oid exists in drsMap, check if it matches the calculated uuid + // FIXME: naive method, where only the first file with the same oid is stored + // need to handle multiple files with the same oid + if existing, ok := drsMap[file.Name]; ok { + if existing.Did != drsId { + return fmt.Errorf("Error: OID %s for file %s has mismatched UUID (existing: %s, calculated: %s). Aborting.", file.Oid, file.Name, existing.Did, drsId) + } + } else { + // Add new mapping from the file name to the IndexdRecord with the correct DRS ID and OID + drsMap[file.Oid] = IndexdRecord{ + Did: drsId, + FileName: file.Name, + URLs: []string{file.Name}, // FIXME: This should be the URL to the file in the bucket + Hashes: HashInfo{SHA256: file.Oid}, + Size: file.Size, + Authz: []string{repoName}, + // CreatedDate: fileInfo.ModTime().Format("2025-05-07T21:29:09.585275"), // created date per RFC3339? + } + logger.Log("Adding to drsMap: %s -> %s", file.Name, drsMap[file.Name].Did) + } + } + + // write drsMap to json at drsMapPath + drsMapBytes, err := json.Marshal(drsMap) + if err != nil { + logger.Log("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) + return fmt.Errorf("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) + } + logger.Log("Writing drsMap to %s", drsMapFilePath) + + err = os.WriteFile(drsMapFilePath, drsMapBytes, 0644) + if err != nil { + return fmt.Errorf("error writing %s: %v", DRS_MAP_FILE_NAME, err) + } + logger.Log("Updated %s with %d entries", DRS_MAP_FILE_NAME, len(drsMap)) + + // stage the drsMap file + // FIXME: should this be in the pre-commit hook (.git/hooks/pre-commit) as opposed to the Go code? + cmd = exec.Command("git", "add", drsMapFilePath) + _, err = cmd.Output() + if err != nil { + return fmt.Errorf("error adding %s to git: %v", DRS_MAP_FILE_NAME, err) + } + + return nil +} + +func GetRepoNameFromGit() (string, error) { + // FIXME: change to retrieve from git config directly? Or use go-git? + cmd := exec.Command("git", "config", "--get", "remote.origin.url") + out, err := cmd.Output() + if err != nil { + return "", err + } + + remoteURL := strings.TrimSpace(string(out)) + repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") + return repoName, nil +} + +func DrsUUID(repoName string, hash string) string { + // FIXME: use different UUID method? Used same method as g3t + hashStr := fmt.Sprintf("%s:%s", repoName, hash) + return uuid.NewSHA1(uuid.NameSpaceURL, []byte(hashStr)).String() +} + +func loadDrsMap() (map[string]IndexdRecord, error) { + // Load the DRSMap json file + // FIXME: need to load the committed version as opposed to the working directory version + // see https://github.com/copilot/c/c56f0baa-66d0-4d33-924f-27ca701591e5 + if _, err := os.Stat(drsMapFilePath); os.IsNotExist(err) { + return nil, fmt.Errorf("%s does not exist at %s", DRS_MAP_FILE_NAME, drsMapFilePath) + } + data, err := os.ReadFile(drsMapFilePath) + if err != nil { + return nil, fmt.Errorf("error reading %s: %v", DRS_MAP_FILE_NAME, err) + } + var drsMap map[string]IndexdRecord + err = json.Unmarshal(data, &drsMap) + if err != nil { + return nil, fmt.Errorf("error unmarshaling %s: %v", DRS_MAP_FILE_NAME, err) + } + return drsMap, nil +} + +func DrsInfoFromOid(oid string) (IndexdRecord, error) { + drsMap, err := loadDrsMap() + if err != nil { + return IndexdRecord{}, fmt.Errorf("error loading %s: %v", DRS_MAP_FILE_NAME, err) + } + + // Check if the oid exists in the drsMap + if indexdObj, ok := drsMap[oid]; ok { + return indexdObj, nil + } + return IndexdRecord{}, fmt.Errorf("DRS object not found for oid %s in %s", oid, DRS_MAP_FILE_NAME) +} + +func GetObjectPath(oid string) string { + // check that oid is a valid sha256 hash + if len(oid) != 64 { + return fmt.Sprintf("Error: %s is not a valid sha256 hash", oid) + } + + return filepath.Join(LFS_OBJS_PATH, oid[:2], oid[2:4], oid) +} diff --git a/client/indexd.go b/client/indexd.go index 1d0a592..58d09b3 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -1,16 +1,22 @@ package client import ( + "bytes" "encoding/json" + "errors" "fmt" "io" + "log" "net/http" "net/url" "os" "path/filepath" + "strconv" "github.com/bmeg/git-drs/drs" + "github.com/uc-cdis/gen3-client/gen3-client/commonUtils" "github.com/uc-cdis/gen3-client/gen3-client/jwt" + "github.com/uc-cdis/gen3-client/gen3-client/logs" ) var conf jwt.Configure @@ -21,6 +27,60 @@ type IndexDClient struct { profile string } +// subset of the OpenAPI spec for the InputInfo object in indexd +// https://github.com/uc-cdis/indexd/blob/master/openapis/swagger.yaml +// TODO: use VersionInputInfo and indexd/ instead to allow writes to content_created_date +type IndexdRecord struct { + // Unique identifier for the record (UUID) + Did string `json:"did"` + + // Human-readable file name + FileName string `json:"file_name,omitempty"` + + // List of URLs where the file can be accessed + URLs []string `json:"urls"` + + // Hashes of the file (e.g., md5, sha256) + Size int64 `json:"size"` + + // List of access control lists (ACLs) + ACL []string `json:"acl,omitempty"` + + // List of authorization policies + Authz []string `json:"authz,omitempty"` + + Hashes HashInfo `json:"hashes,omitempty"` + + // Additional metadata as key-value pairs + Metadata map[string]string `json:"metadata,omitempty"` + + // Version of the record (optional) + Version string `json:"version,omitempty"` + + // // Created timestamp (RFC3339 format) + // CreatedDate string `json:"created_date,omitempty"` + + // // Updated timestamp (RFC3339 format) + // UpdatedDate string `json:"updated_date,omitempty"` +} + +// HashInfo represents file hash information as per OpenAPI spec +// Patterns are documented for reference, but not enforced at struct level +// md5: ^[0-9a-f]{32}$ +// sha: ^[0-9a-f]{40}$ +// sha256: ^[0-9a-f]{64}$ +// sha512: ^[0-9a-f]{128}$ +// crc: ^[0-9a-f]{8}$ +// etag: ^[0-9a-f]{32}(-\d+)?$ +type HashInfo struct { + MD5 string `json:"md5,omitempty"` + SHA string `json:"sha,omitempty"` + SHA256 string `json:"sha256,omitempty"` + SHA512 string `json:"sha512,omitempty"` + CRC string `json:"crc,omitempty"` + ETag string `json:"etag,omitempty"` +} + func NewIndexDClient(base string) (ObjectStoreClient, error) { baseURL, err := url.Parse(base) // print baseURL @@ -39,8 +99,8 @@ func NewIndexDClient(base string) (ObjectStoreClient, error) { return nil, fmt.Errorf("No gen3 profile specified. Please provide a gen3Profile key in your .drsconfig") } - fmt.Printf("Base URL: %s\n", baseURL.String()) - fmt.Printf("Profile: %s\n", profile) + // fmt.Printf("Base URL: %s\n", baseURL.String()) + // fmt.Printf("Profile: %s\n", profile) return &IndexDClient{baseURL, profile}, err } @@ -49,23 +109,23 @@ func NewIndexDClient(base string) (ObjectStoreClient, error) { func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) { // get file from indexd a := *cl.base - a.Path = filepath.Join(a.Path, "drs/v1/objects", id, "access", access_id) + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id, "access", access_id) // a.Path = filepath.Join("https://calypr.ohsu.edu/user/data/download/", id) + fmt.Printf("using API: %s\n", a.String()) + // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) if err != nil { return nil, err } - // extract accessToken from gen3 profile and insert into header of request - profileConfig = conf.ParseConfig(cl.profile) - if profileConfig.AccessToken == "" { - return nil, fmt.Errorf("access token not found in profile config") + + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) } - // Add headers to the request - authStr := "Bearer " + profileConfig.AccessToken - req.Header.Set("Authorization", authStr) + fmt.Printf("added auth header") client := &http.Client{} response, err := client.Do(req) @@ -74,6 +134,8 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string } defer response.Body.Close() + fmt.Printf("got a response") + body, err := io.ReadAll(response.Body) if err != nil { return nil, err @@ -82,9 +144,11 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string out := drs.AccessURL{} err = json.Unmarshal(body, &out) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to unmarshal response into drs.AccessURL, response looks like: %s", body) } + fmt.Printf("unmarshaled response into AccessURL struct") + // Extract the signed URL from the response signedURL := out.URL if signedURL == "" { @@ -98,6 +162,8 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string } defer fileResponse.Body.Close() + fmt.Printf("file download response status: %s\n", fileResponse.Status) + // Check if the response status is OK if fileResponse.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) @@ -122,28 +188,88 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, err } - fmt.Printf("File written to %s\n", dstFile.Name()) + // fmt.Printf("File written to %s\n", dstFile.Name()) return &out, nil } // RegisterFile implements ObjectStoreClient. -func (cl *IndexDClient) RegisterFile(path string, name string) (*drs.DRSObject, error) { - panic("unimplemented") +// This function registers a file with gen3 indexd, writes the file to the bucket, +// and returns the successful DRS object. +// This is done atomically, so a failed upload will not leave a record in indexd. +func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { + myLogger, err := NewLogger("") + if err != nil { + // Handle error (e.g., print to stderr and exit) + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() // Ensures cleanup + myLogger.Log("register file started for oid:%s", oid) + + drsObj, err := cl.registerIndexdRecord(*myLogger, oid) + if err != nil { + myLogger.Log("error registering indexd record: %s", err) + return nil, fmt.Errorf("error registering indexd record: %v", err) + } + + // // TODO: upload file to bucket using gen3-client code + // // pulled from gen3-client/g3cmd/upload.go + // // https://github.com/uc-cdis/cdis-data-client/blob/df9c0820ab30e25ba8399c2cc6cccbecc2f0407a/gen3-client/g3cmd/upload.go/#L106-L150 + // filePath := GetObjectPath(oid) + + // file, _ := os.Open(filePath) + // if fi, _ := file.Stat(); !fi.IsDir() { + // fmt.Println("\t" + filePath) + // } + // defer file.Close() + + // myLogger.Log("file path: %s", filePath) + + // uploadPath := filePath + // includeSubDirName := true + // hasMetadata := false + + // fileInfo, err := g3cmd.ProcessFilename(uploadPath, filePath, includeSubDirName, hasMetadata) + // if err != nil { + // logs.AddToFailedLog(filePath, filepath.Base(filePath), commonUtils.FileMetadata{}, "", 0, false, true) + // log.Println("Process filename error for file: " + err.Error()) + // } + // // The following flow is for singlepart upload flow + // gen3Interface := g3cmd.NewGen3Interface() + // bucketName := "cbds" + // respURL, guid, err := g3cmd.GeneratePresignedURL(gen3Interface, fileInfo.Filename, fileInfo.FileMetadata, bucketName) + // if err != nil { + // logs.AddToFailedLog(fileInfo.FilePath, fileInfo.Filename, fileInfo.FileMetadata, guid, 0, false, true) + // log.Println(err.Error()) + // } + // // update failed log with new guid + // logs.AddToFailedLog(fileInfo.FilePath, fileInfo.Filename, fileInfo.FileMetadata, guid, 0, false, true) + + // furObject := commonUtils.FileUploadRequestObject{FilePath: drsObj.Name, Filename: drsObj.Name, GUID: drsObj.Id, PresignedURL: respURL} + // furObject, err = g3cmd.GenerateUploadRequest(gen3Interface, furObject, file) + // if err != nil { + // log.Printf("Error occurred during request generation: %s\n", err.Error()) + // } + // err = uploadFile(furObject, 0) + // if err != nil { + // log.Println(err.Error()) + // } else { + // logs.IncrementScore(0) + // } + + // TODO: if upload unsuccessful, delete record from indexd + return drsObj, nil } func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { a := *cl.base - a.Path = filepath.Join(a.Path, "drs/v1/objects", id) + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id) req, err := http.NewRequest("GET", a.String(), nil) if err != nil { return nil, err } - // Add headers to the request - req.Header.Set("Authorization", "Bearer ") - req.Header.Set("Custom-Header", "HeaderValue") client := &http.Client{} response, err := client.Do(req) @@ -164,3 +290,112 @@ func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { } return &out, nil } + +func addGen3AuthHeader(req *http.Request, profile string) error { + // extract accessToken from gen3 profile and insert into header of request + profileConfig = conf.ParseConfig(profile) + if profileConfig.AccessToken == "" { + return fmt.Errorf("access token not found in profile config") + } + + // Add headers to the request + authStr := "Bearer " + profileConfig.AccessToken + req.Header.Set("Authorization", authStr) + + return nil +} + +func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs.DRSObject, error) { + // (get indexd object using drs map) + indexdObj, err := DrsInfoFromOid(oid) + if err != nil { + return nil, fmt.Errorf("error getting indexd object for oid %s: %v", oid, err) + } + + // create indexd object the long way + var data map[string]interface{} + var tempIndexdObj, _ = json.Marshal(indexdObj) + json.Unmarshal(tempIndexdObj, &data) + data["form"] = "object" + data["authz"] = []string{"/programs/cbds/projects/qw"} + + jsonBytes, _ := json.Marshal(data) + myLogger.Log("retrieved IndexdObj: %s", string(jsonBytes)) + + // register DRS object via /index POST + // (setup post request to indexd) + a := *cl.base + a.Path = filepath.Join(a.Path, "index", "index") + + req, err := http.NewRequest("POST", a.String(), bytes.NewBuffer(jsonBytes)) + if err != nil { + return nil, err + } + // set Content-Type header for JSON + req.Header.Set("accept", "application/json") + req.Header.Set("Content-Type", "application/json") + + // add auth token + // err = addGen3AuthHeader(req, cl.profile) + // if err != nil { + // return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) + // } + profileConfig = conf.ParseConfig(cl.profile) + if profileConfig.AccessToken == "" { + myLogger.Log("access token not found in profile config") + } + + // Add headers to the request + authStr := "Bearer " + profileConfig.AccessToken + req.Header.Set("Authorization", authStr) + + myLogger.Log("POST request created for Indexd:%s", a.String()) + + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + return nil, err + } + defer response.Body.Close() + + // check and see if the response status is OK + drsId := indexdObj.Did + if response.StatusCode != http.StatusOK { + body, _ := io.ReadAll(response.Body) + return nil, fmt.Errorf("failed to register DRS ID %s: %s", drsId, body) + } + myLogger.Log("POST successful: %s", response.Status) + + // query and return DRS object + drsObj, err := cl.QueryID(indexdObj.Did) + if err != nil { + return nil, fmt.Errorf("error querying DRS ID %s: %v", drsId, err) + } + myLogger.Log("GET for DRS ID successful: %s", drsObj.Id) + return drsObj, nil +} + +// copied from +// https://github.com/uc-cdis/cdis-data-client/blob/master/gen3-client/g3cmd/utils.go#L540 +func uploadFile(furObject commonUtils.FileUploadRequestObject, retryCount int) error { + log.Println("Uploading data ...") + furObject.Bar.Start() + + client := &http.Client{} + resp, err := client.Do(furObject.Request) + if err != nil { + logs.AddToFailedLog(furObject.FilePath, furObject.Filename, furObject.FileMetadata, furObject.GUID, retryCount, false, true) + furObject.Bar.Finish() + return errors.New("Error occurred during upload: " + err.Error()) + } + if resp.StatusCode != 200 { + logs.AddToFailedLog(furObject.FilePath, furObject.Filename, furObject.FileMetadata, furObject.GUID, retryCount, false, true) + furObject.Bar.Finish() + return errors.New("Upload request got a non-200 response with status code " + strconv.Itoa(resp.StatusCode)) + } + furObject.Bar.Finish() + log.Printf("Successfully uploaded file \"%s\" to GUID %s.\n", furObject.FilePath, furObject.GUID) + logs.DeleteFromFailedLog(furObject.FilePath, true) + logs.WriteToSucceededLog(furObject.FilePath, furObject.GUID, false) + return nil +} diff --git a/client/interface.go b/client/interface.go index 8b6bba1..652acdc 100644 --- a/client/interface.go +++ b/client/interface.go @@ -7,7 +7,7 @@ type ObjectStoreClient interface { QueryID(id string) (*drs.DRSObject, error) //Put file into object storage and obtain a DRS record pointing to it - RegisterFile(path string, name string) (*drs.DRSObject, error) + RegisterFile(oid string) (*drs.DRSObject, error) //Download file given a DRS ID DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) diff --git a/client/logger.go b/client/logger.go new file mode 100644 index 0000000..3e86385 --- /dev/null +++ b/client/logger.go @@ -0,0 +1,37 @@ +package client + +import ( + "fmt" + "log" + "os" +) + +// Logger wraps a log.Logger and the file it writes to. +type Logger struct { + file *os.File + logger *log.Logger +} + +// NewLogger opens the log file and returns a Logger. +func NewLogger(filename string) (*Logger, error) { + if filename == "" { + filename = "transfer.log" + } + + file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return nil, err + } + logger := log.New(file, "", log.LstdFlags) // Standard log flags + return &Logger{file: file, logger: logger}, nil +} + +// Log writes a formatted message to the log file. +func (l *Logger) Log(format string, args ...interface{}) { + l.logger.Println(fmt.Sprintf(format, args...)) +} + +// Close closes the log file, flushing all writes. +func (l *Logger) Close() error { + return l.file.Close() +} diff --git a/client/pre-commit/main.go b/client/pre-commit/main.go new file mode 100644 index 0000000..8ebfcc0 --- /dev/null +++ b/client/pre-commit/main.go @@ -0,0 +1,31 @@ +package main + +import ( + "fmt" + "log" + + "github.com/bmeg/git-drs/client" +) + +// should this be a main method or a separate command? +// TODO: might need to split this up into command and indexd-specific client code +func main() { + myLogger, err := client.NewLogger("") + if err != nil { + // Handle error (e.g., print to stderr and exit) + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() // Ensures cleanup + + myLogger.Log("~~~~~~~~~~~~~ START: pre-commit ~~~~~~~~~~~~~") + myLogger.Log(" started") + + err = client.UpdateDrsMap() + + // reopen log file + if err != nil { + fmt.Println("updateDrsMap failed:", err) + log.Fatalf("updateDrsMap failed: %v", err) + } + myLogger.Log("~~~~~~~~~~~~~ COMPLETED: pre-commit ~~~~~~~~~~~~~") +} diff --git a/cmd/download/main.go b/cmd/download/main.go index 067d642..8b30bc1 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -1,6 +1,8 @@ package download import ( + "fmt" + "github.com/bmeg/git-drs/client" "github.com/bmeg/git-drs/drs" "github.com/spf13/cobra" @@ -29,11 +31,16 @@ var Cmd = &cobra.Command{ baseURL := cfg.QueryServer.BaseURL + // print random string to stdout + fmt.Println("Using server:", cfg.QueryServer.BaseURL) + client, err := client.NewIndexDClient(baseURL) if err != nil { return err } + fmt.Println("created indexd client:", cfg.QueryServer.BaseURL) + if dstPath == "" { drsObj, err = client.QueryID(drsId) @@ -43,11 +50,15 @@ var Cmd = &cobra.Command{ dstPath = drsObj.Name } + fmt.Println("downloading file:", drsObj.Name) + _, err = client.DownloadFile(drsId, accessId, dstPath) if err != nil { return err } + fmt.Println("file downloaded") + return nil }, } diff --git a/cmd/register/main.go b/cmd/register/main.go index 2f8bd41..77e36a9 100644 --- a/cmd/register/main.go +++ b/cmd/register/main.go @@ -2,7 +2,6 @@ package register import ( "log" - "path/filepath" "github.com/bmeg/git-drs/client" "github.com/spf13/cobra" @@ -24,7 +23,7 @@ var Cmd = &cobra.Command{ } //upload the file, name would probably be relative to the base of the git repo - client.RegisterFile(args[0], filepath.Base(args[0])) + client.RegisterFile(args[0]) //remove later _ = client diff --git a/go.mod b/go.mod index cbcc631..c6772bc 100644 --- a/go.mod +++ b/go.mod @@ -4,9 +4,9 @@ go 1.24.0 require ( github.com/git-lfs/git-lfs/v3 v3.6.1 + github.com/google/uuid v1.6.0 github.com/spf13/cobra v1.9.1 github.com/uc-cdis/gen3-client v0.0.23 - github.com/google/uuid v1.6.0 sigs.k8s.io/yaml v1.4.0 ) @@ -28,7 +28,7 @@ require ( github.com/spf13/pflag v1.0.6 // indirect github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e // indirect golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.18.0 // indirect + golang.org/x/sys v0.29.0 // indirect golang.org/x/text v0.14.0 // indirect gopkg.in/cheggaaa/pb.v1 v1.0.28 // indirect gopkg.in/ini.v1 v1.66.3 // indirect diff --git a/go.sum b/go.sum index 672dbe7..e45c977 100644 --- a/go.sum +++ b/go.sum @@ -48,8 +48,8 @@ github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= github.com/leonelquinteros/gotext v1.5.0 h1:ODY7LzLpZWWSJdAHnzhreOr6cwLXTAmc914FOauSkBM= github.com/leonelquinteros/gotext v1.5.0/go.mod h1:OCiUVHuhP9LGFBQ1oAmdtNCHJCiHiQA8lf4nAifHkr0= -github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= @@ -88,8 +88,8 @@ golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= diff --git a/transfer/transfer.go b/transfer/transfer.go new file mode 100644 index 0000000..7cfe5f9 --- /dev/null +++ b/transfer/transfer.go @@ -0,0 +1,186 @@ +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "log" + "os" + + "github.com/bmeg/git-drs/client" +) + +// InitMessage represents the structure of the initiation data +type InitMessage struct { + Event string `json:"event"` // Always "init" to identify this message + Operation string `json:"operation"` // "upload" or "download" depending on transfer direction + Remote string `json:"remote"` // Git remote name or URL + Concurrent bool `json:"concurrent"` // Reflects lfs.customtransfer..concurrent + ConcurrentTransfers int `json:"concurrenttransfers"` // Reflects lfs.concurrenttransfers value +} + +// CompleteMessage is a minimal response to signal transfer is "complete" +type CompleteMessage struct { + Event string `json:"event"` + Oid string `json:"oid,omitempty"` + Path string `json:"path,omitempty"` +} + +// UploadMessage represents a request to upload an object. +type UploadMessage struct { + Event string `json:"event"` // "upload" + Oid string `json:"oid"` // Object ID (SHA-256 hash) + Size int64 `json:"size"` // Size in bytes + Path string `json:"path"` // Local path to file + Action *Action `json:"action"` // Transfer action details (optional, may be omitted) +} + +// DownloadMessage represents a request to download an object. +type DownloadMessage struct { + Event string `json:"event"` // "download" + Oid string `json:"oid"` // Object ID (SHA-256 hash) + Size int64 `json:"size"` // Size in bytes + Action *Action `json:"action"` // Transfer action details (optional, may be omitted) + Path string `json:"path"` // Where to store the downloaded file +} + +// TerminateMessage is sent when the agent should terminate. +type TerminateMessage struct { + Event string `json:"event"` // "terminate" +} + +// ErrorResponse is sent when an error occurs during a transfer. +type ErrorResponse struct { + Event string `json:"event"` // "error" + Oid string `json:"oid"` // Object ID involved in the error + Code int `json:"code"` // Error code (standard or custom) + Message string `json:"message"` // Human-readable error message +} + +// ProgressResponse provides progress updates for an object transfer. +type ProgressResponse struct { + Event string `json:"event"` // "progress" + Oid string `json:"oid"` // Object ID being transferred + BytesSoFar int64 `json:"bytesSoFar"` // Bytes transferred so far + BytesSinceLast int64 `json:"bytesSinceLast"` // Bytes transferred since last progress message +} + +// TerminateResponse signals the agent has completed termination. +type TerminateResponse struct { + Event string `json:"event"` // "terminate" +} + +// Action is an optional struct representing transfer actions (upload/download URLs, etc.) +type Action struct { + Href string `json:"href"` + Header map[string]string `json:"header,omitempty"` + ExpiresIn int `json:"expires_in,omitempty"` +} + +var ( + req InitMessage + drsClient client.ObjectStoreClient + operation string // "upload" or "download", set by the init message +) + +func main() { + // f, err := os.OpenFile("transfer.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + // if err != nil { + // // fallback to stderr + // log.SetOutput(os.Stderr) + // } else { + // log.SetOutput(f) + // defer f.Close() + // } + // log.Println("~~~~~~~~~~~~~ custom transfer started ~~~~~~~~~~~~~") + + //setup logging to file for debugging + myLogger, err := client.NewLogger("") + if err != nil { + // Handle error (e.g., print to stderr and exit) + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() // Ensures cleanup + + myLogger.Log("~~~~~~~~~~~~~ START: custom transfer ~~~~~~~~~~~~~") + + scanner := bufio.NewScanner(os.Stdin) + encoder := json.NewEncoder(os.Stdout) + + for scanner.Scan() { + var msg map[string]interface{} + err := json.Unmarshal(scanner.Bytes(), &msg) + if err != nil { + myLogger.Log(fmt.Sprintf("error decoding JSON: %s", err)) + continue + } + myLogger.Log(fmt.Sprintf("Received message: %s", msg)) + + // Example: handle only "init" event + if evt, ok := msg["event"]; ok && evt == "init" { + // Log for debugging + myLogger.Log(fmt.Sprintf("Handling init: %s", msg)) + + // setup indexd client + cfg, err := client.LoadConfig() + if err != nil { + myLogger.Log(fmt.Sprintf("Error loading config: %s", err)) + } + + baseURL := cfg.QueryServer.BaseURL + drsClient, err = client.NewIndexDClient(baseURL) + if err != nil { + myLogger.Log(fmt.Sprintf("Error creating indexd client: %s", err)) + continue + } + + // Respond with an empty json object via stdout + encoder.Encode(struct{}{}) + myLogger.Log("Responding to init with empty object") + } else if evt, ok := msg["event"]; ok && evt == "download" { + // Handle download event + myLogger.Log(fmt.Sprintf("Handling download event: %s", msg)) + + // FIXME: Here you would implement the logic to handle the download + } else if evt, ok := msg["event"]; ok && evt == "upload" { + // Handle upload event + myLogger.Log(fmt.Sprintf("Handling upload event: %s", msg)) + + // create UploadMessage from the received message + var uploadMsg UploadMessage + if err := json.Unmarshal(scanner.Bytes(), &uploadMsg); err != nil { + myLogger.Log(fmt.Sprintf("Error parsing UploadMessage: %v\n", err)) + continue + } + myLogger.Log(fmt.Sprintf("Got UploadMessage: %+v\n", uploadMsg)) + + // FIXME: Here you would implement the logic to handle the upload + drsObj, err := drsClient.RegisterFile(uploadMsg.Oid) + if err != nil { + myLogger.Log(fmt.Sprintf("Error, DRS Object: %+v\n", drsObj)) + continue + } + + myLogger.Log("creating response message with oid %s", uploadMsg.Oid) + + // send success message back + completeMsg := CompleteMessage{ + Event: "complete", + Oid: uploadMsg.Oid, + } + myLogger.Log(fmt.Sprintf("Complete message: %+v", completeMsg)) + encoder.Encode(completeMsg) + + myLogger.Log("Upload for oid %s complete", uploadMsg.Oid) + } else if evt, ok := msg["event"]; ok && evt == "terminate" { + // Handle terminate event + myLogger.Log(fmt.Sprintf("terminate event received: %s", msg)) + } + } + + if err := scanner.Err(); err != nil { + myLogger.Log(fmt.Sprintf("stdin error: %s", err)) + } + + myLogger.Log("~~~~~~~~~~~~~ COMPLETED: custom transfer ~~~~~~~~~~~~~") +} From 6b512a8ad7a78c04e251ebd4e4ce51e00bfb8f51 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 16 Jun 2025 00:18:07 -0700 Subject: [PATCH 12/51] parse gen3 project info from config, drafted upload-single refactor still 404ing --- client/config.go | 2 + client/drs-map.go | 18 +++++++-- client/indexd.go | 93 +++++++++++++++++++++++++++++++++----------- cmd/download/main.go | 3 ++ cmd/register/main.go | 15 ++++--- 5 files changed, 99 insertions(+), 32 deletions(-) diff --git a/client/config.go b/client/config.go index 1abe24e..b4a2c65 100644 --- a/client/config.go +++ b/client/config.go @@ -18,6 +18,8 @@ type Config struct { QueryServer Server `json:"queryServer"` WriteServer Server `json:"writeServer"` Gen3Profile string `json:"gen3Profile"` + Gen3Project string `json:"gen3Project"` + Gen3Bucket string `json:"gen3Bucket"` } const ( diff --git a/client/drs-map.go b/client/drs-map.go index 19bdf73..2a65d63 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -99,10 +99,21 @@ func UpdateDrsMap() error { // return fmt.Errorf("error getting file info: %v", err) // } + // get url using bucket name, drsId, and file name + cfg, err := LoadConfig() // should this be handled only via indexd client? + if err != nil { + return fmt.Errorf("error loading config: %v", err) + } + bucketName := cfg.Gen3Bucket + if bucketName == "" { + return fmt.Errorf("error: bucket name is empty in config file") + } + fileURL := fmt.Sprintf("s3://" + filepath.Join(bucketName, drsId, file.Name)) + // If the oid exists in drsMap, check if it matches the calculated uuid // FIXME: naive method, where only the first file with the same oid is stored - // need to handle multiple files with the same oid - if existing, ok := drsMap[file.Name]; ok { + // in the future, will need to handle multiple files with the same oid + if existing, ok := drsMap[drsId]; ok { if existing.Did != drsId { return fmt.Errorf("Error: OID %s for file %s has mismatched UUID (existing: %s, calculated: %s). Aborting.", file.Oid, file.Name, existing.Did, drsId) } @@ -111,7 +122,7 @@ func UpdateDrsMap() error { drsMap[file.Oid] = IndexdRecord{ Did: drsId, FileName: file.Name, - URLs: []string{file.Name}, // FIXME: This should be the URL to the file in the bucket + URLs: []string{fileURL}, Hashes: HashInfo{SHA256: file.Oid}, Size: file.Size, Authz: []string{repoName}, @@ -136,7 +147,6 @@ func UpdateDrsMap() error { logger.Log("Updated %s with %d entries", DRS_MAP_FILE_NAME, len(drsMap)) // stage the drsMap file - // FIXME: should this be in the pre-commit hook (.git/hooks/pre-commit) as opposed to the Go code? cmd = exec.Command("git", "add", drsMapFilePath) _, err = cmd.Output() if err != nil { diff --git a/client/indexd.go b/client/indexd.go index 58d09b3..3664d0b 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -12,9 +12,11 @@ import ( "os" "path/filepath" "strconv" + "strings" "github.com/bmeg/git-drs/drs" "github.com/uc-cdis/gen3-client/gen3-client/commonUtils" + "github.com/uc-cdis/gen3-client/gen3-client/g3cmd" "github.com/uc-cdis/gen3-client/gen3-client/jwt" "github.com/uc-cdis/gen3-client/gen3-client/logs" ) @@ -23,8 +25,10 @@ var conf jwt.Configure var profileConfig jwt.Credential type IndexDClient struct { - base *url.URL - profile string + base *url.URL + profile string + projectId string + bucketName string } // subset of the OpenAPI spec for the InputInfo object in indexd @@ -93,16 +97,26 @@ func NewIndexDClient(base string) (ObjectStoreClient, error) { return nil, err } - // get the gen3Profile from the config + // get the gen3Profile, gen3Project, and gen3Bucket from the config profile := cfg.Gen3Profile if profile == "" { return nil, fmt.Errorf("No gen3 profile specified. Please provide a gen3Profile key in your .drsconfig") } + projectId := cfg.Gen3Project + if projectId == "" { + return nil, fmt.Errorf("No gen3 project specified. Please provide a gen3Project key in your .drsconfig") + } + + bucketName := cfg.Gen3Bucket + if bucketName == "" { + return nil, fmt.Errorf("No gen3 bucket specified. Please provide a gen3Bucket key in your .drsconfig") + } + // fmt.Printf("Base URL: %s\n", baseURL.String()) // fmt.Printf("Profile: %s\n", profile) - return &IndexDClient{baseURL, profile}, err + return &IndexDClient{baseURL, profile, projectId, bucketName}, err } // DownloadFile implements ObjectStoreClient @@ -198,25 +212,32 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string // and returns the successful DRS object. // This is done atomically, so a failed upload will not leave a record in indexd. func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { + // setup logging myLogger, err := NewLogger("") if err != nil { // Handle error (e.g., print to stderr and exit) log.Fatalf("Failed to open log file: %v", err) } defer myLogger.Close() // Ensures cleanup - myLogger.Log("register file started for oid:%s", oid) + myLogger.Log("register file started for oid: %s", oid) + // create indexd record drsObj, err := cl.registerIndexdRecord(*myLogger, oid) if err != nil { myLogger.Log("error registering indexd record: %s", err) return nil, fmt.Errorf("error registering indexd record: %v", err) } - // // TODO: upload file to bucket using gen3-client code - // // pulled from gen3-client/g3cmd/upload.go - // // https://github.com/uc-cdis/cdis-data-client/blob/df9c0820ab30e25ba8399c2cc6cccbecc2f0407a/gen3-client/g3cmd/upload.go/#L106-L150 + // TODO: upload file to bucket using gen3-client code + // pulled from gen3-client/g3cmd/upload.go + // https://github.com/uc-cdis/cdis-data-client/blob/df9c0820ab30e25ba8399c2cc6cccbecc2f0407a/gen3-client/g3cmd/upload.go/#L106-L150 + + filePath := GetObjectPath(oid) + g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) + // filePath := GetObjectPath(oid) + // // get file // file, _ := os.Open(filePath) // if fi, _ := file.Stat(); !fi.IsDir() { // fmt.Println("\t" + filePath) @@ -225,29 +246,55 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // myLogger.Log("file path: %s", filePath) + // // get file info // uploadPath := filePath // includeSubDirName := true // hasMetadata := false // fileInfo, err := g3cmd.ProcessFilename(uploadPath, filePath, includeSubDirName, hasMetadata) // if err != nil { + // myLogger.Log("error processing filename: %s", err) // logs.AddToFailedLog(filePath, filepath.Base(filePath), commonUtils.FileMetadata{}, "", 0, false, true) // log.Println("Process filename error for file: " + err.Error()) // } - // // The following flow is for singlepart upload flow + + // // connect up gen3 profile for auth // gen3Interface := g3cmd.NewGen3Interface() - // bucketName := "cbds" + // myLogger.Log("parsing profile: %s", cl.profile) + // profileConfig = conf.ParseConfig(cl.profile) + + // // if hasMetadata { + // // hasShepherd, err := gen3Interface.CheckForShepherdAPI(&profileConfig) + // // if err != nil { + // // myLogger.Log("WARNING: Error when checking for Shepherd API: %v", err) + // // } else { + // // if !hasShepherd { + // // myLogger.Log("ERROR: Metadata upload (`--metadata`) is not supported in the environment you are uploading to. Double check that you are uploading to the right profile.") + // // } + // // } + // // } + + // a, b, err := gen3Interface.CheckPrivileges(&profileConfig) + + // myLogger.Log("Privileges: %s ---- %s ----- %s", a, b, err) + + // // get presigned URL for upload + // bucketName := "cbds" // TODO: match bucket to program or project (as determined by fence config?) + // fileInfo.FileMetadata.Authz = []string{"/programs/cbds/projects/qw"} // TODO: determine how to define gen3 project name // respURL, guid, err := g3cmd.GeneratePresignedURL(gen3Interface, fileInfo.Filename, fileInfo.FileMetadata, bucketName) // if err != nil { + // myLogger.Log("error generating presigned URL: %s", err) // logs.AddToFailedLog(fileInfo.FilePath, fileInfo.Filename, fileInfo.FileMetadata, guid, 0, false, true) // log.Println(err.Error()) // } // // update failed log with new guid // logs.AddToFailedLog(fileInfo.FilePath, fileInfo.Filename, fileInfo.FileMetadata, guid, 0, false, true) + // // upload actual file // furObject := commonUtils.FileUploadRequestObject{FilePath: drsObj.Name, Filename: drsObj.Name, GUID: drsObj.Id, PresignedURL: respURL} // furObject, err = g3cmd.GenerateUploadRequest(gen3Interface, furObject, file) // if err != nil { + // myLogger.Log("Error occurred during request generation: %s", err) // log.Printf("Error occurred during request generation: %s\n", err.Error()) // } // err = uploadFile(furObject, 0) @@ -258,6 +305,8 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // } // TODO: if upload unsuccessful, delete record from indexd + + // return return drsObj, nil } @@ -317,7 +366,11 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. var tempIndexdObj, _ = json.Marshal(indexdObj) json.Unmarshal(tempIndexdObj, &data) data["form"] = "object" - data["authz"] = []string{"/programs/cbds/projects/qw"} + + // parse project ID to form authz string + projectId := strings.Split(cl.projectId, "-") + authz := fmt.Sprintf("/programs/%s/projects/%s", projectId[0], projectId[1]) + data["authz"] = []string{authz} jsonBytes, _ := json.Marshal(data) myLogger.Log("retrieved IndexdObj: %s", string(jsonBytes)) @@ -336,20 +389,14 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. req.Header.Set("Content-Type", "application/json") // add auth token - // err = addGen3AuthHeader(req, cl.profile) - // if err != nil { - // return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) - // } - profileConfig = conf.ParseConfig(cl.profile) - if profileConfig.AccessToken == "" { - myLogger.Log("access token not found in profile config") + // FIXME: token expires earlier than expected, error looks like + // [401] - request to arborist failed: error decoding token: expired at time: 1749844905 + addGen3AuthHeader(req, cl.profile) + if err != nil { + return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) } - // Add headers to the request - authStr := "Bearer " + profileConfig.AccessToken - req.Header.Set("Authorization", authStr) - - myLogger.Log("POST request created for Indexd:%s", a.String()) + myLogger.Log("POST request created for indexd: %s", a.String()) client := &http.Client{} response, err := client.Do(req) diff --git a/cmd/download/main.go b/cmd/download/main.go index 8b30bc1..bbe1899 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -36,6 +36,7 @@ var Cmd = &cobra.Command{ client, err := client.NewIndexDClient(baseURL) if err != nil { + fmt.Printf("\nerror creating indexd client: %s", err) return err } @@ -45,6 +46,7 @@ var Cmd = &cobra.Command{ drsObj, err = client.QueryID(drsId) if err != nil { + fmt.Printf("\nerror querying DRS ID %s: %s", drsId, err) return err } dstPath = drsObj.Name @@ -54,6 +56,7 @@ var Cmd = &cobra.Command{ _, err = client.DownloadFile(drsId, accessId, dstPath) if err != nil { + fmt.Printf("\nerror downloading file %s: %s", drsId, err) return err } diff --git a/cmd/register/main.go b/cmd/register/main.go index 77e36a9..3d8cbdb 100644 --- a/cmd/register/main.go +++ b/cmd/register/main.go @@ -1,23 +1,28 @@ package register import ( + "fmt" "log" "github.com/bmeg/git-drs/client" "github.com/spf13/cobra" ) -var server string = "https://calypr.ohsu.edu/ga4gh" - // Cmd line declaration var Cmd = &cobra.Command{ Use: "register", - Short: "", - Long: ``, + Short: "", + Long: `accepts one parameter: `, Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { log.Printf("Registering file %s", args[0]) - client, err := client.NewIndexDClient(server) + + cfg, err := client.LoadConfig() + if err != nil { + fmt.Println("error loading config:", err) + return err + } + client, err := client.NewIndexDClient(cfg.QueryServer.BaseURL) if err != nil { return err } From 46749c2a29071d70c50e52917743699a6ec72a17 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 16 Jun 2025 12:04:42 -0700 Subject: [PATCH 13/51] successful push using oid (sha) as filename --- client/drs-map.go | 2 +- client/indexd.go | 114 +++---------------------------------------- transfer/transfer.go | 13 +---- 3 files changed, 9 insertions(+), 120 deletions(-) diff --git a/client/drs-map.go b/client/drs-map.go index 2a65d63..4d12ade 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -108,7 +108,7 @@ func UpdateDrsMap() error { if bucketName == "" { return fmt.Errorf("error: bucket name is empty in config file") } - fileURL := fmt.Sprintf("s3://" + filepath.Join(bucketName, drsId, file.Name)) + fileURL := fmt.Sprintf("s3://" + filepath.Join(bucketName, drsId, file.Oid)) // If the oid exists in drsMap, check if it matches the calculated uuid // FIXME: naive method, where only the first file with the same oid is stored diff --git a/client/indexd.go b/client/indexd.go index 3664d0b..2551d9b 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -3,7 +3,6 @@ package client import ( "bytes" "encoding/json" - "errors" "fmt" "io" "log" @@ -11,14 +10,11 @@ import ( "net/url" "os" "path/filepath" - "strconv" "strings" "github.com/bmeg/git-drs/drs" - "github.com/uc-cdis/gen3-client/gen3-client/commonUtils" "github.com/uc-cdis/gen3-client/gen3-client/g3cmd" "github.com/uc-cdis/gen3-client/gen3-client/jwt" - "github.com/uc-cdis/gen3-client/gen3-client/logs" ) var conf jwt.Configure @@ -124,9 +120,8 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string // get file from indexd a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id, "access", access_id) - // a.Path = filepath.Join("https://calypr.ohsu.edu/user/data/download/", id) - fmt.Printf("using API: %s\n", a.String()) + fmt.Printf("\nusing API: %s\n", a.String()) // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) @@ -139,7 +134,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) } - fmt.Printf("added auth header") + fmt.Printf("\nadded auth header") client := &http.Client{} response, err := client.Do(req) @@ -148,7 +143,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string } defer response.Body.Close() - fmt.Printf("got a response") + fmt.Printf("\ngot a response") body, err := io.ReadAll(response.Body) if err != nil { @@ -161,12 +156,12 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, fmt.Errorf("unable to unmarshal response into drs.AccessURL, response looks like: %s", body) } - fmt.Printf("unmarshaled response into AccessURL struct") + fmt.Printf("\nunmarshaled response into AccessURL struct") // Extract the signed URL from the response signedURL := out.URL if signedURL == "" { - return nil, fmt.Errorf("signed URL not found in response.") + return nil, fmt.Errorf("\nsigned URL not found in response.") } // Download the file using the signed URL @@ -176,7 +171,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string } defer fileResponse.Body.Close() - fmt.Printf("file download response status: %s\n", fileResponse.Status) + fmt.Printf("\nfile download response status: %s\n", fileResponse.Status) // Check if the response status is OK if fileResponse.StatusCode != http.StatusOK { @@ -202,7 +197,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, err } - // fmt.Printf("File written to %s\n", dstFile.Name()) + fmt.Printf("\nFile written to %s\n", dstFile.Name()) return &out, nil } @@ -234,76 +229,6 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { filePath := GetObjectPath(oid) g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) - - // filePath := GetObjectPath(oid) - - // // get file - // file, _ := os.Open(filePath) - // if fi, _ := file.Stat(); !fi.IsDir() { - // fmt.Println("\t" + filePath) - // } - // defer file.Close() - - // myLogger.Log("file path: %s", filePath) - - // // get file info - // uploadPath := filePath - // includeSubDirName := true - // hasMetadata := false - - // fileInfo, err := g3cmd.ProcessFilename(uploadPath, filePath, includeSubDirName, hasMetadata) - // if err != nil { - // myLogger.Log("error processing filename: %s", err) - // logs.AddToFailedLog(filePath, filepath.Base(filePath), commonUtils.FileMetadata{}, "", 0, false, true) - // log.Println("Process filename error for file: " + err.Error()) - // } - - // // connect up gen3 profile for auth - // gen3Interface := g3cmd.NewGen3Interface() - // myLogger.Log("parsing profile: %s", cl.profile) - // profileConfig = conf.ParseConfig(cl.profile) - - // // if hasMetadata { - // // hasShepherd, err := gen3Interface.CheckForShepherdAPI(&profileConfig) - // // if err != nil { - // // myLogger.Log("WARNING: Error when checking for Shepherd API: %v", err) - // // } else { - // // if !hasShepherd { - // // myLogger.Log("ERROR: Metadata upload (`--metadata`) is not supported in the environment you are uploading to. Double check that you are uploading to the right profile.") - // // } - // // } - // // } - - // a, b, err := gen3Interface.CheckPrivileges(&profileConfig) - - // myLogger.Log("Privileges: %s ---- %s ----- %s", a, b, err) - - // // get presigned URL for upload - // bucketName := "cbds" // TODO: match bucket to program or project (as determined by fence config?) - // fileInfo.FileMetadata.Authz = []string{"/programs/cbds/projects/qw"} // TODO: determine how to define gen3 project name - // respURL, guid, err := g3cmd.GeneratePresignedURL(gen3Interface, fileInfo.Filename, fileInfo.FileMetadata, bucketName) - // if err != nil { - // myLogger.Log("error generating presigned URL: %s", err) - // logs.AddToFailedLog(fileInfo.FilePath, fileInfo.Filename, fileInfo.FileMetadata, guid, 0, false, true) - // log.Println(err.Error()) - // } - // // update failed log with new guid - // logs.AddToFailedLog(fileInfo.FilePath, fileInfo.Filename, fileInfo.FileMetadata, guid, 0, false, true) - - // // upload actual file - // furObject := commonUtils.FileUploadRequestObject{FilePath: drsObj.Name, Filename: drsObj.Name, GUID: drsObj.Id, PresignedURL: respURL} - // furObject, err = g3cmd.GenerateUploadRequest(gen3Interface, furObject, file) - // if err != nil { - // myLogger.Log("Error occurred during request generation: %s", err) - // log.Printf("Error occurred during request generation: %s\n", err.Error()) - // } - // err = uploadFile(furObject, 0) - // if err != nil { - // log.Println(err.Error()) - // } else { - // logs.IncrementScore(0) - // } - // TODO: if upload unsuccessful, delete record from indexd // return @@ -421,28 +346,3 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. myLogger.Log("GET for DRS ID successful: %s", drsObj.Id) return drsObj, nil } - -// copied from -// https://github.com/uc-cdis/cdis-data-client/blob/master/gen3-client/g3cmd/utils.go#L540 -func uploadFile(furObject commonUtils.FileUploadRequestObject, retryCount int) error { - log.Println("Uploading data ...") - furObject.Bar.Start() - - client := &http.Client{} - resp, err := client.Do(furObject.Request) - if err != nil { - logs.AddToFailedLog(furObject.FilePath, furObject.Filename, furObject.FileMetadata, furObject.GUID, retryCount, false, true) - furObject.Bar.Finish() - return errors.New("Error occurred during upload: " + err.Error()) - } - if resp.StatusCode != 200 { - logs.AddToFailedLog(furObject.FilePath, furObject.Filename, furObject.FileMetadata, furObject.GUID, retryCount, false, true) - furObject.Bar.Finish() - return errors.New("Upload request got a non-200 response with status code " + strconv.Itoa(resp.StatusCode)) - } - furObject.Bar.Finish() - log.Printf("Successfully uploaded file \"%s\" to GUID %s.\n", furObject.FilePath, furObject.GUID) - logs.DeleteFromFailedLog(furObject.FilePath, true) - logs.WriteToSucceededLog(furObject.FilePath, furObject.GUID, false) - return nil -} diff --git a/transfer/transfer.go b/transfer/transfer.go index 7cfe5f9..23830d0 100644 --- a/transfer/transfer.go +++ b/transfer/transfer.go @@ -84,23 +84,12 @@ var ( ) func main() { - // f, err := os.OpenFile("transfer.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - // if err != nil { - // // fallback to stderr - // log.SetOutput(os.Stderr) - // } else { - // log.SetOutput(f) - // defer f.Close() - // } - // log.Println("~~~~~~~~~~~~~ custom transfer started ~~~~~~~~~~~~~") - //setup logging to file for debugging myLogger, err := client.NewLogger("") if err != nil { - // Handle error (e.g., print to stderr and exit) log.Fatalf("Failed to open log file: %v", err) } - defer myLogger.Close() // Ensures cleanup + defer myLogger.Close() myLogger.Log("~~~~~~~~~~~~~ START: custom transfer ~~~~~~~~~~~~~") From d14717821fa215be8771ede255492de521cd7a39 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 16 Jun 2025 16:47:39 -0700 Subject: [PATCH 14/51] improve download to localize to .git/lfs/objects --- client/indexd.go | 30 +++++++++------ transfer/transfer.go | 92 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 19 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index 2551d9b..443a415 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -117,11 +117,20 @@ func NewIndexDClient(base string) (ObjectStoreClient, error) { // DownloadFile implements ObjectStoreClient func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) { + // setup logging + myLogger, err := NewLogger("") + if err != nil { + // Handle error (e.g., print to stderr and exit) + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() // Ensures cleanup + myLogger.Log("download file started for id: %s", id) + // get file from indexd a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id, "access", access_id) - fmt.Printf("\nusing API: %s\n", a.String()) + myLogger.Log("using API: %s\n", a.String()) // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) @@ -134,7 +143,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) } - fmt.Printf("\nadded auth header") + myLogger.Log("added auth header") client := &http.Client{} response, err := client.Do(req) @@ -143,7 +152,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string } defer response.Body.Close() - fmt.Printf("\ngot a response") + myLogger.Log("got a response") body, err := io.ReadAll(response.Body) if err != nil { @@ -156,12 +165,12 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, fmt.Errorf("unable to unmarshal response into drs.AccessURL, response looks like: %s", body) } - fmt.Printf("\nunmarshaled response into AccessURL struct") + myLogger.Log("unmarshaled response into AccessURL struct") // Extract the signed URL from the response signedURL := out.URL if signedURL == "" { - return nil, fmt.Errorf("\nsigned URL not found in response.") + return nil, fmt.Errorf("signed URL not found in response.") } // Download the file using the signed URL @@ -171,7 +180,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string } defer fileResponse.Body.Close() - fmt.Printf("\nfile download response status: %s\n", fileResponse.Status) + myLogger.Log("file download response status: %s\n", fileResponse.Status) // Check if the response status is OK if fileResponse.StatusCode != http.StatusOK { @@ -197,7 +206,7 @@ func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string return nil, err } - fmt.Printf("\nFile written to %s\n", dstFile.Name()) + myLogger.Log("File written to %s\n", dstFile.Name()) return &out, nil } @@ -223,12 +232,11 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { return nil, fmt.Errorf("error registering indexd record: %v", err) } - // TODO: upload file to bucket using gen3-client code - // pulled from gen3-client/g3cmd/upload.go - // https://github.com/uc-cdis/cdis-data-client/blob/df9c0820ab30e25ba8399c2cc6cccbecc2f0407a/gen3-client/g3cmd/upload.go/#L106-L150 - + // upload file to bucket using gen3-client code + // modified from gen3-client/g3cmd/upload-single.go filePath := GetObjectPath(oid) g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) + // TODO: if upload unsuccessful, delete record from indexd // return diff --git a/transfer/transfer.go b/transfer/transfer.go index 23830d0..c15fb34 100644 --- a/transfer/transfer.go +++ b/transfer/transfer.go @@ -6,6 +6,7 @@ import ( "fmt" "log" "os" + "strings" "github.com/bmeg/git-drs/client" ) @@ -22,8 +23,8 @@ type InitMessage struct { // CompleteMessage is a minimal response to signal transfer is "complete" type CompleteMessage struct { Event string `json:"event"` - Oid string `json:"oid,omitempty"` - Path string `json:"path,omitempty"` + Oid string `json:"oid"` + Path string `json:"path"` } // UploadMessage represents a request to upload an object. @@ -49,10 +50,14 @@ type TerminateMessage struct { Event string `json:"event"` // "terminate" } -// ErrorResponse is sent when an error occurs during a transfer. -type ErrorResponse struct { - Event string `json:"event"` // "error" - Oid string `json:"oid"` // Object ID involved in the error +// ErrorMessage is sent when an error occurs during a transfer. +type ErrorMessage struct { + Event string `json:"event"` // "error" + Oid string `json:"oid"` // Object ID involved in the error + Error Error `json:"error"` // Error details +} + +type Error struct { Code int `json:"code"` // Error code (standard or custom) Message string `json:"message"` // Human-readable error message } @@ -130,7 +135,66 @@ func main() { // Handle download event myLogger.Log(fmt.Sprintf("Handling download event: %s", msg)) - // FIXME: Here you would implement the logic to handle the download + // get download message + var downloadMsg DownloadMessage + if err := json.Unmarshal(scanner.Bytes(), &downloadMsg); err != nil { + myLogger.Log(fmt.Sprintf("Error parsing downloadMessage: %v\n", err)) + continue + } + + // get the DRS object using the OID + indexdObj, err := client.DrsInfoFromOid(downloadMsg.Oid) + if err != nil { + myLogger.Log(fmt.Sprintf("Error getting DRS info for OID %s: %v", downloadMsg.Oid, err)) + // create failure message and send it back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: downloadMsg.Oid, + Error: Error{ + Code: 500, + Message: "Error retrieving DRS info: " + err.Error(), + }, + } + encoder.Encode(errorResponse) + continue + } + + // download file using the DRS object + myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", downloadMsg.Oid, indexdObj)) + + // FIXME: generalize access ID method, + // naively get access ID from splitting first path into : + accessId := strings.Split(indexdObj.URLs[0], ":")[0] + myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", downloadMsg.Oid, accessId, indexdObj.FileName)) + + // download the file using the indexd client + dstPath := client.GetObjectPath(downloadMsg.Oid) + _, err = drsClient.DownloadFile(indexdObj.Did, accessId, dstPath) + if err != nil { + myLogger.Log(fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)) + + // create failure message and send it back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: downloadMsg.Oid, + Error: Error{ + Code: 500, + Message: "Error downloading file: " + err.Error(), + }, + } + encoder.Encode(errorResponse) + continue + } + myLogger.Log(fmt.Sprintf("Download for OID %s complete", downloadMsg.Oid)) + + // send success message back + completeMsg := CompleteMessage{ + Event: "complete", + Oid: downloadMsg.Oid, + Path: dstPath, + } + encoder.Encode(completeMsg) + } else if evt, ok := msg["event"]; ok && evt == "upload" { // Handle upload event myLogger.Log(fmt.Sprintf("Handling upload event: %s", msg)) @@ -143,10 +207,21 @@ func main() { } myLogger.Log(fmt.Sprintf("Got UploadMessage: %+v\n", uploadMsg)) - // FIXME: Here you would implement the logic to handle the upload + // handle the upload via drs client (indexd client) drsObj, err := drsClient.RegisterFile(uploadMsg.Oid) if err != nil { myLogger.Log(fmt.Sprintf("Error, DRS Object: %+v\n", drsObj)) + + // create failure message and send it to back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: uploadMsg.Oid, + Error: Error{ + Code: 500, + Message: "Error registering file: " + err.Error(), + }, + } + encoder.Encode(errorResponse) continue } @@ -156,6 +231,7 @@ func main() { completeMsg := CompleteMessage{ Event: "complete", Oid: uploadMsg.Oid, + Path: drsObj.Name, } myLogger.Log(fmt.Sprintf("Complete message: %+v", completeMsg)) encoder.Encode(completeMsg) From 50cdbdd3af61fa344e518151ce5e82eddd4e903f Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 17 Jun 2025 15:49:03 -0700 Subject: [PATCH 15/51] creation date stub --- client/drs-map.go | 11 +++++++---- client/indexd.go | 14 +++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/client/drs-map.go b/client/drs-map.go index 4d12ade..a4af0d1 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -86,10 +86,11 @@ func UpdateDrsMap() error { continue } + // FIXME: do we want to hash this with the project ID instead? drsId := DrsUUID(repoName, file.Oid) logger.Log("Working with file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) - // stat the file to use modification time later + // get file info needed to create indexd record path := GetObjectPath(file.Oid) if _, err := os.Stat(path); os.IsNotExist(err) { return fmt.Errorf("Error: File %s does not exist in LFS objects path %s. Aborting.", file.Name, path) @@ -98,6 +99,7 @@ func UpdateDrsMap() error { // if err != nil { // return fmt.Errorf("error getting file info: %v", err) // } + // modDate := fileInfo.ModTime().Format("2025-05-07T21:29:09.585275") // created date per RFC3339 // get url using bucket name, drsId, and file name cfg, err := LoadConfig() // should this be handled only via indexd client? @@ -111,7 +113,7 @@ func UpdateDrsMap() error { fileURL := fmt.Sprintf("s3://" + filepath.Join(bucketName, drsId, file.Oid)) // If the oid exists in drsMap, check if it matches the calculated uuid - // FIXME: naive method, where only the first file with the same oid is stored + // TODO: naive method, where only the first file with the same oid is stored // in the future, will need to handle multiple files with the same oid if existing, ok := drsMap[drsId]; ok { if existing.Did != drsId { @@ -126,7 +128,8 @@ func UpdateDrsMap() error { Hashes: HashInfo{SHA256: file.Oid}, Size: file.Size, Authz: []string{repoName}, - // CreatedDate: fileInfo.ModTime().Format("2025-05-07T21:29:09.585275"), // created date per RFC3339? + // ContentCreatedDate: modDate, + // ContentUpdatedDate: modDate, } logger.Log("Adding to drsMap: %s -> %s", file.Name, drsMap[file.Name].Did) } @@ -157,7 +160,7 @@ func UpdateDrsMap() error { } func GetRepoNameFromGit() (string, error) { - // FIXME: change to retrieve from git config directly? Or use go-git? + // TODO: change to retrieve from git config directly? Or use go-git? cmd := exec.Command("git", "config", "--get", "remote.origin.url") out, err := cmd.Output() if err != nil { diff --git a/client/indexd.go b/client/indexd.go index 443a415..c528d1a 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -29,7 +29,7 @@ type IndexDClient struct { // subset of the OpenAPI spec for the InputInfo object in indexd // https://github.com/uc-cdis/indexd/blob/master/openapis/swagger.yaml -// TODO: use VersionInputInfo and indexd/ instead to allow writes to content_created_date +// TODO: make another object based on VersionInputInfo that has content_created_date and so can handle a POST of dates via indexd/ type IndexdRecord struct { // Unique identifier for the record (UUID) Did string `json:"did"` @@ -58,10 +58,10 @@ type IndexdRecord struct { Version string `json:"version,omitempty"` // // Created timestamp (RFC3339 format) - // CreatedDate string `json:"created_date,omitempty"` + // ContentCreatedDate string `json:"content_created_date,omitempty"` // // Updated timestamp (RFC3339 format) - // UpdatedDate string `json:"updated_date,omitempty"` + // ContentUpdatedDate string `json:"content_updated_date,omitempty"` } // HashInfo represents file hash information as per OpenAPI spec @@ -310,10 +310,10 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. // register DRS object via /index POST // (setup post request to indexd) - a := *cl.base - a.Path = filepath.Join(a.Path, "index", "index") + endpt := *cl.base + endpt.Path = filepath.Join(endpt.Path, "index", "index") - req, err := http.NewRequest("POST", a.String(), bytes.NewBuffer(jsonBytes)) + req, err := http.NewRequest("POST", endpt.String(), bytes.NewBuffer(jsonBytes)) if err != nil { return nil, err } @@ -329,7 +329,7 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) } - myLogger.Log("POST request created for indexd: %s", a.String()) + myLogger.Log("POST request created for indexd: %s", endpt.String()) client := &http.Client{} response, err := client.Do(req) From bfc5713ab5337bd932d6b8722c58e35f418d62bc Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 17 Jun 2025 15:54:41 -0700 Subject: [PATCH 16/51] update gen3-client location to be within directory --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index c6772bc..c9c0243 100644 --- a/go.mod +++ b/go.mod @@ -34,4 +34,4 @@ require ( gopkg.in/ini.v1 v1.66.3 // indirect ) -replace github.com/uc-cdis/gen3-client => ../cdis-data-client +replace github.com/uc-cdis/gen3-client => ./cdis-data-client From c185674d297829e297ccf43b96eab85c5ad633db Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 17 Jun 2025 15:56:19 -0700 Subject: [PATCH 17/51] logs --- client/indexd.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/client/indexd.go b/client/indexd.go index c528d1a..29be62d 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -235,9 +235,15 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // upload file to bucket using gen3-client code // modified from gen3-client/g3cmd/upload-single.go filePath := GetObjectPath(oid) - g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) + err = g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) // TODO: if upload unsuccessful, delete record from indexd + if err != nil { + myLogger.Log("error uploading file to bucket: %s", err) + myLogger.Log("please delete the indexd record manually if needed for DRS ID: %s", drsObj.Id) + myLogger.Log("see https://uc-cdis.github.io/gen3sdk-python/_build/html/indexing.html") + return nil, fmt.Errorf("error uploading file to bucket: %v", err) + } // return return drsObj, nil From be629da02fdc0bb03179cbaa0b023c44ad7ef30c Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 18 Jun 2025 11:02:53 -0700 Subject: [PATCH 18/51] fix build warning --- client/drs-map.go | 23 +++++++++-------------- client/indexd.go | 6 +++++- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/client/drs-map.go b/client/drs-map.go index a4af0d1..8b24a3e 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -2,6 +2,7 @@ package client import ( "encoding/json" + "errors" "fmt" "log" "os" @@ -38,15 +39,6 @@ var ( ) func UpdateDrsMap() error { - // f, err := os.OpenFile("transfer.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - // if err != nil { - // // fallback to stderr - // log.SetOutput(os.Stderr) - // } else { - // log.SetOutput(f) - // defer f.Close() - // } - logger, err := NewLogger("") if err != nil { log.Fatalf("Failed to open log file: %v", err) @@ -91,7 +83,10 @@ func UpdateDrsMap() error { logger.Log("Working with file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) // get file info needed to create indexd record - path := GetObjectPath(file.Oid) + path, err := GetObjectPath(file.Oid) + if err != nil { + return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err) + } if _, err := os.Stat(path); os.IsNotExist(err) { return fmt.Errorf("Error: File %s does not exist in LFS objects path %s. Aborting.", file.Name, path) } @@ -110,7 +105,7 @@ func UpdateDrsMap() error { if bucketName == "" { return fmt.Errorf("error: bucket name is empty in config file") } - fileURL := fmt.Sprintf("s3://" + filepath.Join(bucketName, drsId, file.Oid)) + fileURL := fmt.Sprintf("s3://%s", filepath.Join(bucketName, drsId, file.Oid)) // If the oid exists in drsMap, check if it matches the calculated uuid // TODO: naive method, where only the first file with the same oid is stored @@ -210,11 +205,11 @@ func DrsInfoFromOid(oid string) (IndexdRecord, error) { return IndexdRecord{}, fmt.Errorf("DRS object not found for oid %s in %s", oid, DRS_MAP_FILE_NAME) } -func GetObjectPath(oid string) string { +func GetObjectPath(oid string) (string, error) { // check that oid is a valid sha256 hash if len(oid) != 64 { - return fmt.Sprintf("Error: %s is not a valid sha256 hash", oid) + return "", errors.New(fmt.Sprintf("Error: %s is not a valid sha256 hash", oid)) } - return filepath.Join(LFS_OBJS_PATH, oid[:2], oid[2:4], oid) + return filepath.Join(LFS_OBJS_PATH, oid[:2], oid[2:4], oid), nil } diff --git a/client/indexd.go b/client/indexd.go index 29be62d..64812c4 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -234,7 +234,11 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // upload file to bucket using gen3-client code // modified from gen3-client/g3cmd/upload-single.go - filePath := GetObjectPath(oid) + filePath, err := GetObjectPath(oid) + if err != nil { + myLogger.Log("error getting object path for oid %s: %s", oid, err) + return nil, fmt.Errorf("error getting object path for oid %s: %v", oid, err) + } err = g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) // TODO: if upload unsuccessful, delete record from indexd From a1755fc8b7bc6a31dc8c5791a34de16b42690bea Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 18 Jun 2025 15:57:23 -0700 Subject: [PATCH 19/51] add cdis-data-client submodule --- .gitmodules | 4 ++++ cdis-data-client | 1 + 2 files changed, 5 insertions(+) create mode 100644 .gitmodules create mode 160000 cdis-data-client diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e8b495c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "cdis-data-client"] + path = cdis-data-client + url = https://github.com/ACED-IDP/cdis-data-client.git + branch = lfs-usage diff --git a/cdis-data-client b/cdis-data-client new file mode 160000 index 0000000..6c9fab3 --- /dev/null +++ b/cdis-data-client @@ -0,0 +1 @@ +Subproject commit 6c9fab350913eb7fb17360bd47b4633fc7bb90dc From 7b89b86a4300b4ab52c51c01988cc0a0d197ff7a Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 18 Jun 2025 16:47:35 -0700 Subject: [PATCH 20/51] refactor precommit and transfer into cli --- client/drs-map.go | 11 +- client/pre-commit/main.go | 31 ----- client/transfer.log | 24 ++++ cmd/precommit/main.go | 50 ++++++++ cmd/root.go | 14 ++- cmd/transfer/main.go | 259 ++++++++++++++++++++++++++++++++++++++ transfer/transfer.go | 251 ------------------------------------ 7 files changed, 352 insertions(+), 288 deletions(-) delete mode 100644 client/pre-commit/main.go create mode 100644 client/transfer.log create mode 100644 cmd/precommit/main.go create mode 100644 cmd/transfer/main.go delete mode 100644 transfer/transfer.go diff --git a/client/drs-map.go b/client/drs-map.go index 8b24a3e..c6f222e 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -107,6 +107,15 @@ func UpdateDrsMap() error { } fileURL := fmt.Sprintf("s3://%s", filepath.Join(bucketName, drsId, file.Oid)) + // create authz string from profile + fmt.Println("cfg.Gen3Project:", cfg.Gen3Project) + // check if project ID is valid + if !strings.Contains(cfg.Gen3Project, "-") { + return fmt.Errorf("error: invalid project ID %s in config file, ID should look like -", cfg.Gen3Project) + } + projectIdArr := strings.SplitN(cfg.Gen3Project, "-", 2) + authzStr := "/programs/" + projectIdArr[0] + "/projects/" + projectIdArr[1] + // If the oid exists in drsMap, check if it matches the calculated uuid // TODO: naive method, where only the first file with the same oid is stored // in the future, will need to handle multiple files with the same oid @@ -122,7 +131,7 @@ func UpdateDrsMap() error { URLs: []string{fileURL}, Hashes: HashInfo{SHA256: file.Oid}, Size: file.Size, - Authz: []string{repoName}, + Authz: []string{authzStr}, // ContentCreatedDate: modDate, // ContentUpdatedDate: modDate, } diff --git a/client/pre-commit/main.go b/client/pre-commit/main.go deleted file mode 100644 index 8ebfcc0..0000000 --- a/client/pre-commit/main.go +++ /dev/null @@ -1,31 +0,0 @@ -package main - -import ( - "fmt" - "log" - - "github.com/bmeg/git-drs/client" -) - -// should this be a main method or a separate command? -// TODO: might need to split this up into command and indexd-specific client code -func main() { - myLogger, err := client.NewLogger("") - if err != nil { - // Handle error (e.g., print to stderr and exit) - log.Fatalf("Failed to open log file: %v", err) - } - defer myLogger.Close() // Ensures cleanup - - myLogger.Log("~~~~~~~~~~~~~ START: pre-commit ~~~~~~~~~~~~~") - myLogger.Log(" started") - - err = client.UpdateDrsMap() - - // reopen log file - if err != nil { - fmt.Println("updateDrsMap failed:", err) - log.Fatalf("updateDrsMap failed: %v", err) - } - myLogger.Log("~~~~~~~~~~~~~ COMPLETED: pre-commit ~~~~~~~~~~~~~") -} diff --git a/client/transfer.log b/client/transfer.log new file mode 100644 index 0000000..acb4a37 --- /dev/null +++ b/client/transfer.log @@ -0,0 +1,24 @@ +2025/06/17 17:29:06 updateDrsMap started +2025/06/17 17:29:06 git lfs ls-files output: { + "files": null +} + +2025/06/17 17:29:06 Repo Name: git-gen3 +2025/06/17 17:29:06 Writing drsMap to /var/folders/nq/88_4pk_s25z4g3g52gvm5b88px118k/T/drs-map-4230269377.json +2025/06/17 17:29:06 Updated drs-map.json with 0 entries +2025/06/17 17:30:13 updateDrsMap started +2025/06/17 17:30:13 git lfs ls-files output: { + "files": null +} + +2025/06/17 17:30:13 Repo Name: git-gen3 +2025/06/17 17:30:13 Writing drsMap to /var/folders/nq/88_4pk_s25z4g3g52gvm5b88px118k/T/drs-map-3449762568.json +2025/06/17 17:30:13 Updated drs-map.json with 0 entries +2025/06/17 17:32:20 updateDrsMap started +2025/06/17 17:32:20 git lfs ls-files output: { + "files": null +} + +2025/06/17 17:32:20 Repo Name: git-gen3 +2025/06/17 17:32:20 Writing drsMap to /var/folders/nq/88_4pk_s25z4g3g52gvm5b88px118k/T/drs-map-1558809153.json +2025/06/17 17:32:20 Updated drs-map.json with 0 entries diff --git a/cmd/precommit/main.go b/cmd/precommit/main.go new file mode 100644 index 0000000..41a88d7 --- /dev/null +++ b/cmd/precommit/main.go @@ -0,0 +1,50 @@ +package precommit + +import ( + "fmt" + "log" + "os" + + "github.com/bmeg/git-drs/client" + "github.com/bmeg/git-drs/drs" + "github.com/spf13/cobra" +) + +var ( + server string + dstPath string + drsObj *drs.DRSObject +) + +// Cmd line declaration +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "precommit", + Short: "pre-commit hook to create DRS objects", + Long: "Pre-commit hook that creates DRS objects based on LFS files in the repo. Stores it to a drs-map.json", + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 0 { + fmt.Fprintln(os.Stderr, "This command does not take any arguments.") + os.Exit(1) + } + + myLogger, err := client.NewLogger("") + if err != nil { + // Handle error (e.g., print to stderr and exit) + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() // Ensures cleanup + + myLogger.Log("~~~~~~~~~~~~~ START: pre-commit ~~~~~~~~~~~~~") + + err = client.UpdateDrsMap() + if err != nil { + fmt.Println("updateDrsMap failed:", err) + log.Fatalf("updateDrsMap failed: %v", err) + return err + } + + myLogger.Log("~~~~~~~~~~~~~ COMPLETED: pre-commit ~~~~~~~~~~~~~") + return nil + }, +} diff --git a/cmd/root.go b/cmd/root.go index cb463eb..2afa390 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -8,10 +8,12 @@ import ( "github.com/bmeg/git-drs/cmd/filterprocess" "github.com/bmeg/git-drs/cmd/initialize" "github.com/bmeg/git-drs/cmd/list" + "github.com/bmeg/git-drs/cmd/precommit" "github.com/bmeg/git-drs/cmd/pull" "github.com/bmeg/git-drs/cmd/push" "github.com/bmeg/git-drs/cmd/query" "github.com/bmeg/git-drs/cmd/register" + "github.com/bmeg/git-drs/cmd/transfer" "github.com/spf13/cobra" ) @@ -26,16 +28,18 @@ var RootCmd = &cobra.Command{ } func init() { + RootCmd.AddCommand(add.Cmd) + RootCmd.AddCommand(download.Cmd) + RootCmd.AddCommand(filterprocess.Cmd) + RootCmd.AddCommand(genBashCompletionCmd) RootCmd.AddCommand(initialize.Cmd) + RootCmd.AddCommand(list.Cmd) + RootCmd.AddCommand(precommit.Cmd) RootCmd.AddCommand(push.Cmd) RootCmd.AddCommand(pull.Cmd) - RootCmd.AddCommand(list.Cmd) - RootCmd.AddCommand(add.Cmd) - RootCmd.AddCommand(filterprocess.Cmd) RootCmd.AddCommand(query.Cmd) RootCmd.AddCommand(register.Cmd) - RootCmd.AddCommand(download.Cmd) - RootCmd.AddCommand(genBashCompletionCmd) + RootCmd.AddCommand(transfer.Cmd) } var genBashCompletionCmd = &cobra.Command{ diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go new file mode 100644 index 0000000..2c70c49 --- /dev/null +++ b/cmd/transfer/main.go @@ -0,0 +1,259 @@ +package transfer + +import ( + "bufio" + "encoding/json" + "fmt" + "log" + "os" + "strings" + + "github.com/bmeg/git-drs/client" + "github.com/spf13/cobra" +) + +// InitMessage represents the structure of the initiation data +type InitMessage struct { + Event string `json:"event"` // Always "init" to identify this message + Operation string `json:"operation"` // "upload" or "download" depending on transfer direction + Remote string `json:"remote"` // Git remote name or URL + Concurrent bool `json:"concurrent"` // Reflects lfs.customtransfer..concurrent + ConcurrentTransfers int `json:"concurrenttransfers"` // Reflects lfs.concurrenttransfers value +} + +// CompleteMessage is a minimal response to signal transfer is "complete" +type CompleteMessage struct { + Event string `json:"event"` + Oid string `json:"oid"` + Path string `json:"path"` +} + +// UploadMessage represents a request to upload an object. +type UploadMessage struct { + Event string `json:"event"` // "upload" + Oid string `json:"oid"` // Object ID (SHA-256 hash) + Size int64 `json:"size"` // Size in bytes + Path string `json:"path"` // Local path to file + Action *Action `json:"action"` // Transfer action details (optional, may be omitted) +} + +// DownloadMessage represents a request to download an object. +type DownloadMessage struct { + Event string `json:"event"` // "download" + Oid string `json:"oid"` // Object ID (SHA-256 hash) + Size int64 `json:"size"` // Size in bytes + Action *Action `json:"action"` // Transfer action details (optional, may be omitted) + Path string `json:"path"` // Where to store the downloaded file +} + +// TerminateMessage is sent when the agent should terminate. +type TerminateMessage struct { + Event string `json:"event"` // "terminate" +} + +// ErrorMessage is sent when an error occurs during a transfer. +type ErrorMessage struct { + Event string `json:"event"` // "error" + Oid string `json:"oid"` // Object ID involved in the error + Error Error `json:"error"` // Error details +} + +type Error struct { + Code int `json:"code"` // Error code (standard or custom) + Message string `json:"message"` // Human-readable error message +} + +// ProgressResponse provides progress updates for an object transfer. +type ProgressResponse struct { + Event string `json:"event"` // "progress" + Oid string `json:"oid"` // Object ID being transferred + BytesSoFar int64 `json:"bytesSoFar"` // Bytes transferred so far + BytesSinceLast int64 `json:"bytesSinceLast"` // Bytes transferred since last progress message +} + +// TerminateResponse signals the agent has completed termination. +type TerminateResponse struct { + Event string `json:"event"` // "terminate" +} + +// Action is an optional struct representing transfer actions (upload/download URLs, etc.) +type Action struct { + Href string `json:"href"` + Header map[string]string `json:"header,omitempty"` + ExpiresIn int `json:"expires_in,omitempty"` +} + +var ( + req InitMessage + drsClient client.ObjectStoreClient + operation string // "upload" or "download", set by the init message +) + +var Cmd = &cobra.Command{ + Use: "transfer", + Short: "register LFS files into gen3 during git push", + Long: "custom transfer mechanism to register LFS files up to gen3 during git push. For new files, creates an indexd record and uploads to the bucket", + RunE: func(cmd *cobra.Command, args []string) error { + //setup logging to file for debugging + myLogger, err := client.NewLogger("") + if err != nil { + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() + + myLogger.Log("~~~~~~~~~~~~~ START: custom transfer ~~~~~~~~~~~~~") + + scanner := bufio.NewScanner(os.Stdin) + encoder := json.NewEncoder(os.Stdout) + + for scanner.Scan() { + var msg map[string]interface{} + err := json.Unmarshal(scanner.Bytes(), &msg) + if err != nil { + myLogger.Log(fmt.Sprintf("error decoding JSON: %s", err)) + continue + } + myLogger.Log(fmt.Sprintf("Received message: %s", msg)) + + // Example: handle only "init" event + if evt, ok := msg["event"]; ok && evt == "init" { + // Log for debugging + myLogger.Log(fmt.Sprintf("Handling init: %s", msg)) + + // setup indexd client + cfg, err := client.LoadConfig() + if err != nil { + myLogger.Log(fmt.Sprintf("Error loading config: %s", err)) + } + + baseURL := cfg.QueryServer.BaseURL + drsClient, err = client.NewIndexDClient(baseURL) + if err != nil { + myLogger.Log(fmt.Sprintf("Error creating indexd client: %s", err)) + continue + } + + // Respond with an empty json object via stdout + encoder.Encode(struct{}{}) + myLogger.Log("Responding to init with empty object") + } else if evt, ok := msg["event"]; ok && evt == "download" { + // Handle download event + myLogger.Log(fmt.Sprintf("Handling download event: %s", msg)) + + // get download message + var downloadMsg DownloadMessage + if err := json.Unmarshal(scanner.Bytes(), &downloadMsg); err != nil { + myLogger.Log(fmt.Sprintf("Error parsing downloadMessage: %v\n", err)) + continue + } + + // get the DRS object using the OID + indexdObj, err := client.DrsInfoFromOid(downloadMsg.Oid) + if err != nil { + myLogger.Log(fmt.Sprintf("Error getting DRS info for OID %s: %v", downloadMsg.Oid, err)) + // create failure message and send it back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: downloadMsg.Oid, + Error: Error{ + Code: 500, + Message: "Error retrieving DRS info: " + err.Error(), + }, + } + encoder.Encode(errorResponse) + continue + } + + // download file using the DRS object + myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", downloadMsg.Oid, indexdObj)) + + // FIXME: generalize access ID method, + // naively get access ID from splitting first path into : + accessId := strings.Split(indexdObj.URLs[0], ":")[0] + myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", downloadMsg.Oid, accessId, indexdObj.FileName)) + + // download the file using the indexd client + dstPath, err := client.GetObjectPath(downloadMsg.Oid) + _, err = drsClient.DownloadFile(indexdObj.Did, accessId, dstPath) + if err != nil { + myLogger.Log(fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)) + + // create failure message and send it back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: downloadMsg.Oid, + Error: Error{ + Code: 500, + Message: "Error downloading file: " + err.Error(), + }, + } + encoder.Encode(errorResponse) + continue + } + myLogger.Log(fmt.Sprintf("Download for OID %s complete", downloadMsg.Oid)) + + // send success message back + completeMsg := CompleteMessage{ + Event: "complete", + Oid: downloadMsg.Oid, + Path: dstPath, + } + encoder.Encode(completeMsg) + + } else if evt, ok := msg["event"]; ok && evt == "upload" { + // Handle upload event + myLogger.Log(fmt.Sprintf("Handling upload event: %s", msg)) + + // create UploadMessage from the received message + var uploadMsg UploadMessage + if err := json.Unmarshal(scanner.Bytes(), &uploadMsg); err != nil { + myLogger.Log(fmt.Sprintf("Error parsing UploadMessage: %v\n", err)) + continue + } + myLogger.Log(fmt.Sprintf("Got UploadMessage: %+v\n", uploadMsg)) + + // handle the upload via drs client (indexd client) + drsObj, err := drsClient.RegisterFile(uploadMsg.Oid) + if err != nil { + myLogger.Log(fmt.Sprintf("Error, DRS Object: %+v\n", drsObj)) + + // create failure message and send it to back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: uploadMsg.Oid, + Error: Error{ + Code: 500, + Message: "Error registering file: " + err.Error(), + }, + } + encoder.Encode(errorResponse) + continue + } + + myLogger.Log("creating response message with oid %s", uploadMsg.Oid) + + // send success message back + completeMsg := CompleteMessage{ + Event: "complete", + Oid: uploadMsg.Oid, + Path: drsObj.Name, + } + myLogger.Log(fmt.Sprintf("Complete message: %+v", completeMsg)) + encoder.Encode(completeMsg) + + myLogger.Log("Upload for oid %s complete", uploadMsg.Oid) + } else if evt, ok := msg["event"]; ok && evt == "terminate" { + // Handle terminate event + myLogger.Log(fmt.Sprintf("terminate event received: %s", msg)) + } + } + + if err := scanner.Err(); err != nil { + myLogger.Log(fmt.Sprintf("stdin error: %s", err)) + } + + myLogger.Log("~~~~~~~~~~~~~ COMPLETED: custom transfer ~~~~~~~~~~~~~") + + return nil + }, +} diff --git a/transfer/transfer.go b/transfer/transfer.go deleted file mode 100644 index c15fb34..0000000 --- a/transfer/transfer.go +++ /dev/null @@ -1,251 +0,0 @@ -package main - -import ( - "bufio" - "encoding/json" - "fmt" - "log" - "os" - "strings" - - "github.com/bmeg/git-drs/client" -) - -// InitMessage represents the structure of the initiation data -type InitMessage struct { - Event string `json:"event"` // Always "init" to identify this message - Operation string `json:"operation"` // "upload" or "download" depending on transfer direction - Remote string `json:"remote"` // Git remote name or URL - Concurrent bool `json:"concurrent"` // Reflects lfs.customtransfer..concurrent - ConcurrentTransfers int `json:"concurrenttransfers"` // Reflects lfs.concurrenttransfers value -} - -// CompleteMessage is a minimal response to signal transfer is "complete" -type CompleteMessage struct { - Event string `json:"event"` - Oid string `json:"oid"` - Path string `json:"path"` -} - -// UploadMessage represents a request to upload an object. -type UploadMessage struct { - Event string `json:"event"` // "upload" - Oid string `json:"oid"` // Object ID (SHA-256 hash) - Size int64 `json:"size"` // Size in bytes - Path string `json:"path"` // Local path to file - Action *Action `json:"action"` // Transfer action details (optional, may be omitted) -} - -// DownloadMessage represents a request to download an object. -type DownloadMessage struct { - Event string `json:"event"` // "download" - Oid string `json:"oid"` // Object ID (SHA-256 hash) - Size int64 `json:"size"` // Size in bytes - Action *Action `json:"action"` // Transfer action details (optional, may be omitted) - Path string `json:"path"` // Where to store the downloaded file -} - -// TerminateMessage is sent when the agent should terminate. -type TerminateMessage struct { - Event string `json:"event"` // "terminate" -} - -// ErrorMessage is sent when an error occurs during a transfer. -type ErrorMessage struct { - Event string `json:"event"` // "error" - Oid string `json:"oid"` // Object ID involved in the error - Error Error `json:"error"` // Error details -} - -type Error struct { - Code int `json:"code"` // Error code (standard or custom) - Message string `json:"message"` // Human-readable error message -} - -// ProgressResponse provides progress updates for an object transfer. -type ProgressResponse struct { - Event string `json:"event"` // "progress" - Oid string `json:"oid"` // Object ID being transferred - BytesSoFar int64 `json:"bytesSoFar"` // Bytes transferred so far - BytesSinceLast int64 `json:"bytesSinceLast"` // Bytes transferred since last progress message -} - -// TerminateResponse signals the agent has completed termination. -type TerminateResponse struct { - Event string `json:"event"` // "terminate" -} - -// Action is an optional struct representing transfer actions (upload/download URLs, etc.) -type Action struct { - Href string `json:"href"` - Header map[string]string `json:"header,omitempty"` - ExpiresIn int `json:"expires_in,omitempty"` -} - -var ( - req InitMessage - drsClient client.ObjectStoreClient - operation string // "upload" or "download", set by the init message -) - -func main() { - //setup logging to file for debugging - myLogger, err := client.NewLogger("") - if err != nil { - log.Fatalf("Failed to open log file: %v", err) - } - defer myLogger.Close() - - myLogger.Log("~~~~~~~~~~~~~ START: custom transfer ~~~~~~~~~~~~~") - - scanner := bufio.NewScanner(os.Stdin) - encoder := json.NewEncoder(os.Stdout) - - for scanner.Scan() { - var msg map[string]interface{} - err := json.Unmarshal(scanner.Bytes(), &msg) - if err != nil { - myLogger.Log(fmt.Sprintf("error decoding JSON: %s", err)) - continue - } - myLogger.Log(fmt.Sprintf("Received message: %s", msg)) - - // Example: handle only "init" event - if evt, ok := msg["event"]; ok && evt == "init" { - // Log for debugging - myLogger.Log(fmt.Sprintf("Handling init: %s", msg)) - - // setup indexd client - cfg, err := client.LoadConfig() - if err != nil { - myLogger.Log(fmt.Sprintf("Error loading config: %s", err)) - } - - baseURL := cfg.QueryServer.BaseURL - drsClient, err = client.NewIndexDClient(baseURL) - if err != nil { - myLogger.Log(fmt.Sprintf("Error creating indexd client: %s", err)) - continue - } - - // Respond with an empty json object via stdout - encoder.Encode(struct{}{}) - myLogger.Log("Responding to init with empty object") - } else if evt, ok := msg["event"]; ok && evt == "download" { - // Handle download event - myLogger.Log(fmt.Sprintf("Handling download event: %s", msg)) - - // get download message - var downloadMsg DownloadMessage - if err := json.Unmarshal(scanner.Bytes(), &downloadMsg); err != nil { - myLogger.Log(fmt.Sprintf("Error parsing downloadMessage: %v\n", err)) - continue - } - - // get the DRS object using the OID - indexdObj, err := client.DrsInfoFromOid(downloadMsg.Oid) - if err != nil { - myLogger.Log(fmt.Sprintf("Error getting DRS info for OID %s: %v", downloadMsg.Oid, err)) - // create failure message and send it back - errorResponse := ErrorMessage{ - Event: "complete", - Oid: downloadMsg.Oid, - Error: Error{ - Code: 500, - Message: "Error retrieving DRS info: " + err.Error(), - }, - } - encoder.Encode(errorResponse) - continue - } - - // download file using the DRS object - myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", downloadMsg.Oid, indexdObj)) - - // FIXME: generalize access ID method, - // naively get access ID from splitting first path into : - accessId := strings.Split(indexdObj.URLs[0], ":")[0] - myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", downloadMsg.Oid, accessId, indexdObj.FileName)) - - // download the file using the indexd client - dstPath := client.GetObjectPath(downloadMsg.Oid) - _, err = drsClient.DownloadFile(indexdObj.Did, accessId, dstPath) - if err != nil { - myLogger.Log(fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)) - - // create failure message and send it back - errorResponse := ErrorMessage{ - Event: "complete", - Oid: downloadMsg.Oid, - Error: Error{ - Code: 500, - Message: "Error downloading file: " + err.Error(), - }, - } - encoder.Encode(errorResponse) - continue - } - myLogger.Log(fmt.Sprintf("Download for OID %s complete", downloadMsg.Oid)) - - // send success message back - completeMsg := CompleteMessage{ - Event: "complete", - Oid: downloadMsg.Oid, - Path: dstPath, - } - encoder.Encode(completeMsg) - - } else if evt, ok := msg["event"]; ok && evt == "upload" { - // Handle upload event - myLogger.Log(fmt.Sprintf("Handling upload event: %s", msg)) - - // create UploadMessage from the received message - var uploadMsg UploadMessage - if err := json.Unmarshal(scanner.Bytes(), &uploadMsg); err != nil { - myLogger.Log(fmt.Sprintf("Error parsing UploadMessage: %v\n", err)) - continue - } - myLogger.Log(fmt.Sprintf("Got UploadMessage: %+v\n", uploadMsg)) - - // handle the upload via drs client (indexd client) - drsObj, err := drsClient.RegisterFile(uploadMsg.Oid) - if err != nil { - myLogger.Log(fmt.Sprintf("Error, DRS Object: %+v\n", drsObj)) - - // create failure message and send it to back - errorResponse := ErrorMessage{ - Event: "complete", - Oid: uploadMsg.Oid, - Error: Error{ - Code: 500, - Message: "Error registering file: " + err.Error(), - }, - } - encoder.Encode(errorResponse) - continue - } - - myLogger.Log("creating response message with oid %s", uploadMsg.Oid) - - // send success message back - completeMsg := CompleteMessage{ - Event: "complete", - Oid: uploadMsg.Oid, - Path: drsObj.Name, - } - myLogger.Log(fmt.Sprintf("Complete message: %+v", completeMsg)) - encoder.Encode(completeMsg) - - myLogger.Log("Upload for oid %s complete", uploadMsg.Oid) - } else if evt, ok := msg["event"]; ok && evt == "terminate" { - // Handle terminate event - myLogger.Log(fmt.Sprintf("terminate event received: %s", msg)) - } - } - - if err := scanner.Err(); err != nil { - myLogger.Log(fmt.Sprintf("stdin error: %s", err)) - } - - myLogger.Log("~~~~~~~~~~~~~ COMPLETED: custom transfer ~~~~~~~~~~~~~") -} From 7b841ddc2be362f7c181bb733b2a4d3ed3361985 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 18 Jun 2025 17:07:02 -0700 Subject: [PATCH 21/51] implement init to create pre-commit hook and git config for custom transfer --- cmd/initialize/main.go | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/cmd/initialize/main.go b/cmd/initialize/main.go index 74a6fba..df221ad 100644 --- a/cmd/initialize/main.go +++ b/cmd/initialize/main.go @@ -1,7 +1,9 @@ package initialize import ( - "fmt" + "os" + "os/exec" + "path/filepath" "github.com/spf13/cobra" ) @@ -9,11 +11,35 @@ import ( // Cmd line declaration var Cmd = &cobra.Command{ Use: "init", - Short: "Initialize a repo", - Long: ``, + Short: "initialize required setup for git-drs", + Long: "initialize hooks and config required for git-drs", Args: cobra.ExactArgs(0), RunE: func(cmd *cobra.Command, args []string) error { - fmt.Printf("Running init\n") + // Create .git/hooks/pre-commit file + hooksDir := filepath.Join(".git", "hooks") + preCommitPath := filepath.Join(hooksDir, "pre-commit") + if err := os.MkdirAll(hooksDir, 0755); err != nil { + return err + } + hookContent := "#!/bin/sh\ngit drs precommit\n" + if err := os.WriteFile(preCommitPath, []byte(hookContent), 0755); err != nil { + return err + } + + // set git config so git lfs uses gen3 custom transfer agent + configs := [][]string{ + {"lfs.standalonetransferagent", "gen3"}, + {"lfs.customtransfer.gen3.path", "git-drs"}, + {"lfs.customtransfer.gen3.args", "transfer"}, + {"lfs.customtransfer.gen3.concurrent", "false"}, + } + for _, cfg := range configs { + cmd := exec.Command("git", "config", cfg[0], cfg[1]) + if err := cmd.Run(); err != nil { + return err + } + } + return nil }, } From a49db2bbf91eab2eef376bef0acf9aa44b4a7ee1 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Thu, 19 Jun 2025 13:23:30 -0700 Subject: [PATCH 22/51] update branch for submodules --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index e8b495c..ee33556 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "cdis-data-client"] path = cdis-data-client url = https://github.com/ACED-IDP/cdis-data-client.git - branch = lfs-usage + branch = fixes/code-cleanup From fa2249abab0231b91f9dac4647e921ebca398e9e Mon Sep 17 00:00:00 2001 From: quinnwai Date: Thu, 19 Jun 2025 14:19:39 -0700 Subject: [PATCH 23/51] enable init to pass in the right keys --- .gitmodules | 2 +- cdis-data-client | 2 +- cmd/initialize/main.go | 29 ++++++++++++++++++++++++++++- cmd/root.go | 6 +++--- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/.gitmodules b/.gitmodules index ee33556..5593b2f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "cdis-data-client"] path = cdis-data-client url = https://github.com/ACED-IDP/cdis-data-client.git - branch = fixes/code-cleanup + branch = feature/lfs-usage \ No newline at end of file diff --git a/cdis-data-client b/cdis-data-client index 6c9fab3..e5ea947 160000 --- a/cdis-data-client +++ b/cdis-data-client @@ -1 +1 @@ -Subproject commit 6c9fab350913eb7fb17360bd47b4633fc7bb90dc +Subproject commit e5ea9478f173059bf6a75b42392198efb97c2d3c diff --git a/cmd/initialize/main.go b/cmd/initialize/main.go index df221ad..7dcd55c 100644 --- a/cmd/initialize/main.go +++ b/cmd/initialize/main.go @@ -1,28 +1,38 @@ package initialize import ( + "fmt" "os" "os/exec" "path/filepath" "github.com/spf13/cobra" + "github.com/uc-cdis/gen3-client/gen3-client/jwt" +) + +var ( + profile string + credFile string + apiEndpoint string ) // Cmd line declaration var Cmd = &cobra.Command{ Use: "init", Short: "initialize required setup for git-drs", - Long: "initialize hooks and config required for git-drs", + Long: "initialize hooks, config required for git-drs", Args: cobra.ExactArgs(0), RunE: func(cmd *cobra.Command, args []string) error { // Create .git/hooks/pre-commit file hooksDir := filepath.Join(".git", "hooks") preCommitPath := filepath.Join(hooksDir, "pre-commit") if err := os.MkdirAll(hooksDir, 0755); err != nil { + fmt.Println("[ERROR] unable to create pre-commit hook file:", err) return err } hookContent := "#!/bin/sh\ngit drs precommit\n" if err := os.WriteFile(preCommitPath, []byte(hookContent), 0755); err != nil { + fmt.Println("[ERROR] unable to write to pre-commit hook:", err) return err } @@ -36,10 +46,27 @@ var Cmd = &cobra.Command{ for _, cfg := range configs { cmd := exec.Command("git", "config", cfg[0], cfg[1]) if err := cmd.Run(); err != nil { + fmt.Printf("Error: unable to set git config %s: %v\n", cfg[0], err) return err } } + // Call jwt.UpdateConfig with CLI parameters + err := jwt.UpdateConfig(profile, apiEndpoint, credFile, "false", "") + if err != nil { + fmt.Printf("[ERROR] unable to configure your gen3 profile: %v\n", err) + return err + } + return nil }, } + +func init() { + Cmd.Flags().StringVar(&profile, "profile", "", "Specify the profile to use") + Cmd.MarkFlagRequired("profile") + Cmd.Flags().StringVar(&credFile, "cred", "", "Specify the credential file that you want to use") + Cmd.MarkFlagRequired("cred") + Cmd.Flags().StringVar(&apiEndpoint, "apiendpoint", "", "Specify the API endpoint of the data commons") + Cmd.MarkFlagRequired("apiendpoint") +} diff --git a/cmd/root.go b/cmd/root.go index 2afa390..ff0731b 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -19,9 +19,9 @@ import ( // RootCmd represents the root command var RootCmd = &cobra.Command{ - Use: "git-drs", - SilenceErrors: true, - SilenceUsage: true, + Use: "git-drs", + Short: "Git DRS - Git-LFS file management for DRS servers", + Long: "Git DRS provides the benefits of Git-LFS file management using DRS for seamless integration with Gen3 servers", PersistentPreRun: func(cmd *cobra.Command, args []string) { //pre-run code can go here }, From 02195578b53e5d9b5401a0428ed35a9f73b0bc83 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 20 Jun 2025 12:47:20 -0700 Subject: [PATCH 24/51] delete indexd record if drs object registration (ie file upload) fails --- .gitignore | 3 +- cdis-data-client | 2 +- client/config.go | 3 -- client/drs-map.go | 2 +- client/indexd.go | 107 ++++++++++++++++++++++++++++++++++++++++-- cmd/precommit/main.go | 11 ++--- 6 files changed, 109 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 0088ced..06f5380 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .idea/ .DS_Store -/tmp \ No newline at end of file +/tmp +/build \ No newline at end of file diff --git a/cdis-data-client b/cdis-data-client index e5ea947..3d771f5 160000 --- a/cdis-data-client +++ b/cdis-data-client @@ -1 +1 @@ -Subproject commit e5ea9478f173059bf6a75b42392198efb97c2d3c +Subproject commit 3d771f5ff6e5c5942c0ee5ca0c13de75356544b8 diff --git a/client/config.go b/client/config.go index b4a2c65..b3d1ddc 100644 --- a/client/config.go +++ b/client/config.go @@ -30,14 +30,12 @@ func LoadConfig() (*Config, error) { //look in Git base dir and find .drsconfig file topLevel, err := utils.GitTopLevel() - if err != nil { return nil, err } configPath := filepath.Join(topLevel, DRS_CONFIG) - // log.Printf("Looking for %s", configPath) //check if config exists reader, err := os.Open(configPath) if err != nil { @@ -55,6 +53,5 @@ func LoadConfig() (*Config, error) { return nil, err } - // log.Printf("Config: %s %#v", string(b), conf) return &conf, nil } diff --git a/client/drs-map.go b/client/drs-map.go index c6f222e..477b5cb 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -108,7 +108,6 @@ func UpdateDrsMap() error { fileURL := fmt.Sprintf("s3://%s", filepath.Join(bucketName, drsId, file.Oid)) // create authz string from profile - fmt.Println("cfg.Gen3Project:", cfg.Gen3Project) // check if project ID is valid if !strings.Contains(cfg.Gen3Project, "-") { return fmt.Errorf("error: invalid project ID %s in config file, ID should look like -", cfg.Gen3Project) @@ -202,6 +201,7 @@ func loadDrsMap() (map[string]IndexdRecord, error) { } func DrsInfoFromOid(oid string) (IndexdRecord, error) { + drsMap, err := loadDrsMap() if err != nil { return IndexdRecord{}, fmt.Errorf("error loading %s: %v", DRS_MAP_FILE_NAME, err) diff --git a/client/indexd.go b/client/indexd.go index 64812c4..3939323 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -64,6 +64,25 @@ type IndexdRecord struct { // ContentUpdatedDate string `json:"content_updated_date,omitempty"` } +type OutputInfo struct { + Did string `json:"did"` + BaseID string `json:"baseid"` + Rev string `json:"rev"` + Form string `json:"form"` + Size int64 `json:"size"` + FileName string `json:"file_name"` + Version string `json:"version"` + Uploader string `json:"uploader"` + URLs []string `json:"urls"` + ACL []string `json:"acl"` + Authz []string `json:"authz"` + Hashes HashInfo `json:"hashes"` + UpdatedDate string `json:"updated_date"` + CreatedDate string `json:"created_date"` + Metadata map[string]interface{} `json:"metadata"` + URLsMetadata map[string]interface{} `json:"urls_metadata"` +} + // HashInfo represents file hash information as per OpenAPI spec // Patterns are documented for reference, but not enforced at struct level // md5: ^[0-9a-f]{32}$ @@ -232,6 +251,34 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { return nil, fmt.Errorf("error registering indexd record: %v", err) } + // if upload unsuccessful (panic or error), delete record from indexd + defer func() { + myLogger.Log("registration incomplete, cleaning up indexd record for oid %s", oid) + if r := recover(); r != nil { + // Handle panic + cl.deleteIndexdRecord(drsObj.Id) + if err != nil { + myLogger.Log("error cleaning up indexd record on failed registration for oid %s: %s", oid, err) + myLogger.Log("please delete the indexd record manually if needed for DRS ID: %s", drsObj.Id) + myLogger.Log("see https://uc-cdis.github.io/gen3sdk-python/_build/html/indexing.html") + panic(r) + } + myLogger.Log("cleaned up indexd record for oid %s", oid) + myLogger.Log("exiting: %v", r) + panic(r) // re-throw if you want the CLI to still terminate + } + if err != nil { + err = cl.deleteIndexdRecord(drsObj.Id) + if err != nil { + myLogger.Log("error cleaning up indexd record on failed registration for oid %s: %s", oid, err) + myLogger.Log("please delete the indexd record manually if needed for DRS ID: %s", drsObj.Id) + myLogger.Log("see https://uc-cdis.github.io/gen3sdk-python/_build/html/indexing.html") + return + } + myLogger.Log("cleaned up indexd record for oid %s", oid) + } + }() + // upload file to bucket using gen3-client code // modified from gen3-client/g3cmd/upload-single.go filePath, err := GetObjectPath(oid) @@ -240,12 +287,8 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { return nil, fmt.Errorf("error getting object path for oid %s: %v", oid, err) } err = g3cmd.UploadSingle(cl.profile, drsObj.Id, filePath, cl.bucketName) - - // TODO: if upload unsuccessful, delete record from indexd if err != nil { myLogger.Log("error uploading file to bucket: %s", err) - myLogger.Log("please delete the indexd record manually if needed for DRS ID: %s", drsObj.Id) - myLogger.Log("see https://uc-cdis.github.io/gen3sdk-python/_build/html/indexing.html") return nil, fmt.Errorf("error uploading file to bucket: %v", err) } @@ -334,7 +377,7 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. // add auth token // FIXME: token expires earlier than expected, error looks like // [401] - request to arborist failed: error decoding token: expired at time: 1749844905 - addGen3AuthHeader(req, cl.profile) + err = addGen3AuthHeader(req, cl.profile) if err != nil { return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) } @@ -364,3 +407,57 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. myLogger.Log("GET for DRS ID successful: %s", drsObj.Id) return drsObj, nil } + +func (cl *IndexDClient) deleteIndexdRecord(did string) error { + // get the indexd record, can't use queryId cause the DRS object doesn't contain the rev + a := *cl.base + a.Path = filepath.Join(a.Path, "index", did) + + getReq, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + return err + } + + client := &http.Client{} + getResp, err := client.Do(getReq) + if err != nil { + return err + } + defer getResp.Body.Close() + + body, err := io.ReadAll(getResp.Body) + if err != nil { + return err + } + + record := OutputInfo{} + err = json.Unmarshal(body, &record) + if err != nil { + return fmt.Errorf("could not query index record for did %s: %v", did, err) + } + + // delete indexd record using did and rev + url := fmt.Sprintf("%s/index/index/%s?rev=%s", cl.base.String(), did, record.Rev) + delReq, err := http.NewRequest("DELETE", url, nil) + if err != nil { + return err + } + + err = addGen3AuthHeader(delReq, cl.profile) + if err != nil { + return fmt.Errorf("error adding Gen3 auth header to delete record: %v", err) + } + // set Content-Type header for JSON + delReq.Header.Set("accept", "application/json") + + delResp, err := client.Do(delReq) + if err != nil { + return err + } + defer delResp.Body.Close() + + if delResp.StatusCode >= 400 { + return fmt.Errorf("delete failed: %s", delResp.Status) + } + return nil +} diff --git a/cmd/precommit/main.go b/cmd/precommit/main.go index 41a88d7..4c48df8 100644 --- a/cmd/precommit/main.go +++ b/cmd/precommit/main.go @@ -3,7 +3,6 @@ package precommit import ( "fmt" "log" - "os" "github.com/bmeg/git-drs/client" "github.com/bmeg/git-drs/drs" @@ -22,18 +21,14 @@ var Cmd = &cobra.Command{ Use: "precommit", Short: "pre-commit hook to create DRS objects", Long: "Pre-commit hook that creates DRS objects based on LFS files in the repo. Stores it to a drs-map.json", + Args: cobra.ExactArgs(0), RunE: func(cmd *cobra.Command, args []string) error { - if len(args) != 0 { - fmt.Fprintln(os.Stderr, "This command does not take any arguments.") - os.Exit(1) - } - + // set up logger myLogger, err := client.NewLogger("") if err != nil { - // Handle error (e.g., print to stderr and exit) log.Fatalf("Failed to open log file: %v", err) } - defer myLogger.Close() // Ensures cleanup + defer myLogger.Close() myLogger.Log("~~~~~~~~~~~~~ START: pre-commit ~~~~~~~~~~~~~") From 5a39eb691475bda52cfd520868074f9c648091f8 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 20 Jun 2025 14:01:37 -0700 Subject: [PATCH 25/51] first pass refactor drs map into separate objects in to .drs directory --- client/drs-map.go | 127 ++++++++++++++++++++++++++++-------------- client/indexd.go | 2 +- client/transfer.log | 24 -------- cmd/precommit/main.go | 2 +- cmd/transfer/main.go | 2 +- go.mod | 29 ++++++++-- go.sum | 91 +++++++++++++++++++++++++----- 7 files changed, 191 insertions(+), 86 deletions(-) delete mode 100644 client/transfer.log diff --git a/client/drs-map.go b/client/drs-map.go index 477b5cb..38697e0 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" + "github.com/go-git/go-git/v6" "github.com/google/uuid" ) @@ -28,6 +29,8 @@ type LfsLsOutput struct { const ( LFS_OBJS_PATH = ".git/lfs/objects" + DRS_DIR = ".drs" + DRS_OBJS_PATH = DRS_DIR + "/lfs/objects" DRS_MAP_FILE_NAME = "drs-map.json" ) @@ -38,7 +41,7 @@ var ( drsMapFilePath = DRS_MAP_FILE_NAME ) -func UpdateDrsMap() error { +func UpdateDrsObjects() error { logger, err := NewLogger("") if err != nil { log.Fatalf("Failed to open log file: %v", err) @@ -46,11 +49,8 @@ func UpdateDrsMap() error { defer logger.Close() // Ensures cleanup logger.Log("updateDrsMap started") - // [naive method] Get all LFS file and info using json - // and replace the drsMap file with the new data + // [naive method] Get all LFS files' info using json and overwrite file with new drsMap // FIXME: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) - // https://github.com/git-lfs/git-lfs/blob/main/git/git.go/#L1515 - // or get diff directly in the commit ie git cat-files (if pointer info is stored there)? cmd := exec.Command("git", "lfs", "ls-files", "--json") out, err := cmd.Output() if err != nil { @@ -78,12 +78,13 @@ func UpdateDrsMap() error { continue } - // FIXME: do we want to hash this with the project ID instead? + // FIXME: do we want to hash this with the project ID instead of the repoName? + // TODO: determine git to gen3 project hierarchy mapping drsId := DrsUUID(repoName, file.Oid) logger.Log("Working with file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) // get file info needed to create indexd record - path, err := GetObjectPath(file.Oid) + path, err := GetObjectPath(LFS_OBJS_PATH, file.Oid) if err != nil { return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err) } @@ -138,41 +139,42 @@ func UpdateDrsMap() error { } } - // write drsMap to json at drsMapPath - drsMapBytes, err := json.Marshal(drsMap) - if err != nil { - logger.Log("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) - return fmt.Errorf("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) - } - logger.Log("Writing drsMap to %s", drsMapFilePath) - - err = os.WriteFile(drsMapFilePath, drsMapBytes, 0644) - if err != nil { - return fmt.Errorf("error writing %s: %v", DRS_MAP_FILE_NAME, err) - } - logger.Log("Updated %s with %d entries", DRS_MAP_FILE_NAME, len(drsMap)) + // write drs objects to DRS_OBJS_PATH + for oid, indexdObj := range drsMap { + // get object bytes + indexdObjBytes, err := json.Marshal(indexdObj) + if err != nil { + logger.Log("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) + return fmt.Errorf("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) + } - // stage the drsMap file - cmd = exec.Command("git", "add", drsMapFilePath) - _, err = cmd.Output() - if err != nil { - return fmt.Errorf("error adding %s to git: %v", DRS_MAP_FILE_NAME, err) - } + // get and create obj file path + objFilePath, err := GetObjectPath(DRS_OBJS_PATH, oid) + if err != nil { + logger.Log("error getting object path for oid %s: %v", oid, err) + return fmt.Errorf("error getting object path for oid %s: %v", oid, err) + } + if err := os.MkdirAll(filepath.Dir(objFilePath), 0755); err != nil { + return fmt.Errorf("error creating directory for %s: %v", objFilePath, err) + } - return nil -} + // write indexd obj to file as json + logger.Log("Writing drsMap to %s", objFilePath) + err = os.WriteFile(objFilePath, indexdObjBytes, 0644) + if err != nil { + return fmt.Errorf("error writing %s: %v", DRS_MAP_FILE_NAME, err) + } + logger.Log("Created %s for file %s", objFilePath, indexdObjBytes) -func GetRepoNameFromGit() (string, error) { - // TODO: change to retrieve from git config directly? Or use go-git? - cmd := exec.Command("git", "config", "--get", "remote.origin.url") - out, err := cmd.Output() - if err != nil { - return "", err + // stage the object file + cmd = exec.Command("git", "add", objFilePath) + _, err = cmd.Output() + if err != nil { + return fmt.Errorf("error adding %s to git: %v", objFilePath, err) + } } - remoteURL := strings.TrimSpace(string(out)) - repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") - return repoName, nil + return nil } func DrsUUID(repoName string, hash string) string { @@ -201,24 +203,67 @@ func loadDrsMap() (map[string]IndexdRecord, error) { } func DrsInfoFromOid(oid string) (IndexdRecord, error) { - drsMap, err := loadDrsMap() if err != nil { return IndexdRecord{}, fmt.Errorf("error loading %s: %v", DRS_MAP_FILE_NAME, err) } - // Check if the oid exists in the drsMap if indexdObj, ok := drsMap[oid]; ok { return indexdObj, nil } return IndexdRecord{}, fmt.Errorf("DRS object not found for oid %s in %s", oid, DRS_MAP_FILE_NAME) } -func GetObjectPath(oid string) (string, error) { +func GetObjectPath(basePath string, oid string) (string, error) { // check that oid is a valid sha256 hash if len(oid) != 64 { return "", errors.New(fmt.Sprintf("Error: %s is not a valid sha256 hash", oid)) } - return filepath.Join(LFS_OBJS_PATH, oid[:2], oid[2:4], oid), nil + return filepath.Join(basePath, oid[:2], oid[2:4], oid), nil +} + +//////////////// +// git helpers / +//////////////// + +func getStagedFiles() (git.Status, error) { + repo, err := git.PlainOpen(".") + if err != nil { + return nil, errors.New(fmt.Sprintln("Could not open repo:", err)) + } + + wt, err := repo.Worktree() + if err != nil { + return nil, errors.New(fmt.Sprintln("Could not get worktree:", err)) + } + + status, err := wt.Status() + if err != nil { + return nil, errors.New(fmt.Sprintln("Could not get status:", err)) + } + return status, nil +} + +func GetRepoNameFromGit() (string, error) { + // Open the Git repository in the current directory + repo, err := git.PlainOpen(".") + if err != nil { + log.Fatalf("Failed to open repo: %v", err) + } + + // Get the config object + config, err := repo.Config() + if err != nil { + log.Fatalf("Failed to get config: %v", err) + } + + // Get the remote origin URL + if remote, ok := config.Remotes["origin"]; ok && len(remote.URLs) > 0 { + remoteURL := strings.TrimSpace(string(remote.URLs[0])) + repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") + return repoName, nil + } else { + return "", errors.New("Origin remote not found") + } } diff --git a/client/indexd.go b/client/indexd.go index 3939323..576c3f8 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -281,7 +281,7 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // upload file to bucket using gen3-client code // modified from gen3-client/g3cmd/upload-single.go - filePath, err := GetObjectPath(oid) + filePath, err := GetObjectPath(LFS_OBJS_PATH, oid) if err != nil { myLogger.Log("error getting object path for oid %s: %s", oid, err) return nil, fmt.Errorf("error getting object path for oid %s: %v", oid, err) diff --git a/client/transfer.log b/client/transfer.log deleted file mode 100644 index acb4a37..0000000 --- a/client/transfer.log +++ /dev/null @@ -1,24 +0,0 @@ -2025/06/17 17:29:06 updateDrsMap started -2025/06/17 17:29:06 git lfs ls-files output: { - "files": null -} - -2025/06/17 17:29:06 Repo Name: git-gen3 -2025/06/17 17:29:06 Writing drsMap to /var/folders/nq/88_4pk_s25z4g3g52gvm5b88px118k/T/drs-map-4230269377.json -2025/06/17 17:29:06 Updated drs-map.json with 0 entries -2025/06/17 17:30:13 updateDrsMap started -2025/06/17 17:30:13 git lfs ls-files output: { - "files": null -} - -2025/06/17 17:30:13 Repo Name: git-gen3 -2025/06/17 17:30:13 Writing drsMap to /var/folders/nq/88_4pk_s25z4g3g52gvm5b88px118k/T/drs-map-3449762568.json -2025/06/17 17:30:13 Updated drs-map.json with 0 entries -2025/06/17 17:32:20 updateDrsMap started -2025/06/17 17:32:20 git lfs ls-files output: { - "files": null -} - -2025/06/17 17:32:20 Repo Name: git-gen3 -2025/06/17 17:32:20 Writing drsMap to /var/folders/nq/88_4pk_s25z4g3g52gvm5b88px118k/T/drs-map-1558809153.json -2025/06/17 17:32:20 Updated drs-map.json with 0 entries diff --git a/cmd/precommit/main.go b/cmd/precommit/main.go index 4c48df8..15685f0 100644 --- a/cmd/precommit/main.go +++ b/cmd/precommit/main.go @@ -32,7 +32,7 @@ var Cmd = &cobra.Command{ myLogger.Log("~~~~~~~~~~~~~ START: pre-commit ~~~~~~~~~~~~~") - err = client.UpdateDrsMap() + err = client.UpdateDrsObjects() if err != nil { fmt.Println("updateDrsMap failed:", err) log.Fatalf("updateDrsMap failed: %v", err) diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 2c70c49..2b804eb 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -173,7 +173,7 @@ var Cmd = &cobra.Command{ myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", downloadMsg.Oid, accessId, indexdObj.FileName)) // download the file using the indexd client - dstPath, err := client.GetObjectPath(downloadMsg.Oid) + dstPath, err := client.GetObjectPath(client.LFS_OBJS_PATH, downloadMsg.Oid) _, err = drsClient.DownloadFile(indexdObj.Did, accessId, dstPath) if err != nil { myLogger.Log(fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)) diff --git a/go.mod b/go.mod index c9c0243..805fda3 100644 --- a/go.mod +++ b/go.mod @@ -10,11 +10,32 @@ require ( sigs.k8s.io/yaml v1.4.0 ) +require ( + dario.cat/mergo v1.0.1 // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/ProtonMail/go-crypto v1.3.0 // indirect + github.com/cloudflare/circl v1.6.1 // indirect + github.com/cyphar/filepath-securejoin v0.4.1 // indirect + github.com/emirpasic/gods v1.18.1 // indirect + github.com/fatih/color v1.18.0 // indirect + github.com/go-git/gcfg/v2 v2.0.1 // indirect + github.com/go-git/go-billy/v5 v5.6.2 // indirect + github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/kevinburke/ssh_config v1.2.0 // indirect + github.com/mattn/go-colorable v0.1.14 // indirect + github.com/pjbgf/sha1cd v0.3.2 // indirect + github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect + golang.org/x/crypto v0.39.0 // indirect + golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b // indirect +) + require ( github.com/avast/retry-go v2.4.2+incompatible // indirect github.com/git-lfs/gitobj/v2 v2.1.1 // indirect github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect + github.com/go-git/go-git/v6 v6.0.0-20250618100032-7bc22667c9e1 github.com/google/go-github v17.0.0+incompatible // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/hashicorp/go-version v1.4.0 // indirect @@ -22,14 +43,14 @@ require ( github.com/leonelquinteros/gotext v1.5.0 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect - github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 // indirect github.com/spf13/pflag v1.0.6 // indirect github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.29.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.41.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/text v0.26.0 // indirect gopkg.in/cheggaaa/pb.v1 v1.0.28 // indirect gopkg.in/ini.v1 v1.66.3 // indirect ) diff --git a/go.sum b/go.sum index e45c977..93ce033 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,33 @@ +dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= +dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw= +github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE= github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 h1:Kk6a4nehpJ3UuJRqlA3JxYxBZEqCeOmATOvrbT4p9RA= github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= +github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= +github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/avast/retry-go v2.4.2+incompatible h1:+ZjCypQT/CyP0kyJO2EcU4d/ZEJWSbP8NENI578cPmA= github.com/avast/retry-go v2.4.2+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= +github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0= +github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= +github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 h1:oempk9HjNt6rVKyKmpdnoN7XABQv3SXLWu3pxUI7Vlk= github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430/go.mod h1:AVSs/gZKt1bOd2AhkhbS7Qh56Hv7klde22yXVbwYJhc= +github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= +github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= +github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= +github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/git-lfs/git-lfs/v3 v3.6.1 h1:0RA2HzkMVl69KE5zCGY1PxqkDSbd/f/O7Du6CNkTYtY= github.com/git-lfs/git-lfs/v3 v3.6.1/go.mod h1:1YO3nafGw2wKBR5LTZ7/LXJ7U7ELdvIGvcCBrLt6mfM= github.com/git-lfs/gitobj/v2 v2.1.1 h1:tf/VU6zL1kxa3he+nf6FO/syX+LGkm6WGDsMpfuXV7Q= @@ -17,9 +38,22 @@ github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= +github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= +github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= +github.com/go-git/gcfg/v2 v2.0.1 h1:vIDPEdcmkwmbMCHs/0Fv/HFA9SH9ZVVI/gglNeLztF0= +github.com/go-git/gcfg/v2 v2.0.1/go.mod h1:/lv2NsxvhepuMrldsFilrgct6pxzpGdSRC13ydTLSLs= +github.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM= +github.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU= +github.com/go-git/go-git-fixtures/v5 v5.0.0-20241203230421-0753e18f8f03 h1:LumE+tQdnYW24a9RoO08w64LHTzkNkdUqBD/0QPtlEY= +github.com/go-git/go-git-fixtures/v5 v5.0.0-20241203230421-0753e18f8f03/go.mod h1:hMKrMnUE4W0SJ7bFyM00dyz/HoknZoptGWzrj6M+dEM= +github.com/go-git/go-git/v6 v6.0.0-20250618100032-7bc22667c9e1 h1:/IFaZq5TSTYdMZoTJzPnRk8mU0KZ/NZVu4E5Q+prlY8= +github.com/go-git/go-git/v6 v6.0.0-20250618100032-7bc22667c9e1/go.mod h1:/Uu/Qt4LfnBg/ajxrKxBQ8zTCZyk8SKM2aHTMFMe48U= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= @@ -46,8 +80,19 @@ github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZ github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= +github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= +github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leonelquinteros/gotext v1.5.0 h1:ODY7LzLpZWWSJdAHnzhreOr6cwLXTAmc914FOauSkBM= github.com/leonelquinteros/gotext v1.5.0/go.mod h1:OCiUVHuhP9LGFBQ1oAmdtNCHJCiHiQA8lf4nAifHkr0= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= @@ -56,52 +101,70 @@ github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2bxbqI6DXV8bF8POAnArqvRrIyw= github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= -github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 h1:chPfVn+gpAM5CTpTyVU9j8J+xgRGwmoDlNDLjKnJiYo= -github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= +github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= +github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= +github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/ssgelm/cookiejarparser v1.0.1 h1:cRdXauUbOTFzTPJFaeiWbHnQ+tRGlpKKzvIK9PUekE4= github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9VrZuz3wSlI+OEI= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e h1:IWllFTiDjjLIf2oeKxpIUmtiDV5sn71VgeQgg6vcE7k= github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e/go.mod h1:d7u6HkTYKSv5m6MCKkOQlHwaShTMl3HjqSGW3XtVhXM= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= +golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= +golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/cheggaaa/pb.v1 v1.0.28 h1:n1tBJnnK2r7g9OW2btFH91V92STTUevLXYFb8gy9EMk= gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/ini.v1 v1.66.3 h1:jRskFVxYaMGAMUbN0UZ7niA9gzL9B49DOqE78vg0k3w= gopkg.in/ini.v1 v1.66.3/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= From 7c1231d14124be1e565f77e796e728c8dff29b3d Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 20 Jun 2025 14:23:52 -0700 Subject: [PATCH 26/51] fully deprecate drs-map usage in favor of .drs/objects --- client/drs-map.go | 50 ++++++----------- client/indexd.go | 5 +- cmd/precommit/main.go | 2 +- mvp/pre-commit-map.go | 126 ------------------------------------------ 4 files changed, 23 insertions(+), 160 deletions(-) delete mode 100644 mvp/pre-commit-map.go diff --git a/client/drs-map.go b/client/drs-map.go index 38697e0..5a6e7f4 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -28,17 +28,14 @@ type LfsLsOutput struct { } const ( - LFS_OBJS_PATH = ".git/lfs/objects" - DRS_DIR = ".drs" - DRS_OBJS_PATH = DRS_DIR + "/lfs/objects" - DRS_MAP_FILE_NAME = "drs-map.json" + LFS_OBJS_PATH = ".git/lfs/objects" + DRS_DIR = ".drs" + DRS_OBJS_PATH = DRS_DIR + "/lfs/objects" ) var ( lfsFiles LfsLsOutput drsMap = make(map[string]IndexdRecord) - // drsMapFilePath = filepath.Join(LFS_OBJS_PATH, DRS_MAP_FILE_NAME) - drsMapFilePath = DRS_MAP_FILE_NAME ) func UpdateDrsObjects() error { @@ -117,8 +114,7 @@ func UpdateDrsObjects() error { authzStr := "/programs/" + projectIdArr[0] + "/projects/" + projectIdArr[1] // If the oid exists in drsMap, check if it matches the calculated uuid - // TODO: naive method, where only the first file with the same oid is stored - // in the future, will need to handle multiple files with the same oid + // TODO: currently only the first filename for a given oid is used if existing, ok := drsMap[drsId]; ok { if existing.Did != drsId { return fmt.Errorf("Error: OID %s for file %s has mismatched UUID (existing: %s, calculated: %s). Aborting.", file.Oid, file.Name, existing.Did, drsId) @@ -144,8 +140,8 @@ func UpdateDrsObjects() error { // get object bytes indexdObjBytes, err := json.Marshal(indexdObj) if err != nil { - logger.Log("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) - return fmt.Errorf("error marshalling %s: %v", DRS_MAP_FILE_NAME, err) + logger.Log("error marshalling indexd object for oid %s: %v", oid, err) + return fmt.Errorf("error marshalling indexd object for oid %s: %v", oid, err) } // get and create obj file path @@ -162,7 +158,7 @@ func UpdateDrsObjects() error { logger.Log("Writing drsMap to %s", objFilePath) err = os.WriteFile(objFilePath, indexdObjBytes, 0644) if err != nil { - return fmt.Errorf("error writing %s: %v", DRS_MAP_FILE_NAME, err) + return fmt.Errorf("error writing %s: %v", objFilePath, err) } logger.Log("Created %s for file %s", objFilePath, indexdObjBytes) @@ -183,35 +179,25 @@ func DrsUUID(repoName string, hash string) string { return uuid.NewSHA1(uuid.NameSpaceURL, []byte(hashStr)).String() } -func loadDrsMap() (map[string]IndexdRecord, error) { - // Load the DRSMap json file - // FIXME: need to load the committed version as opposed to the working directory version - // see https://github.com/copilot/c/c56f0baa-66d0-4d33-924f-27ca701591e5 - if _, err := os.Stat(drsMapFilePath); os.IsNotExist(err) { - return nil, fmt.Errorf("%s does not exist at %s", DRS_MAP_FILE_NAME, drsMapFilePath) - } - data, err := os.ReadFile(drsMapFilePath) +func DrsInfoFromOid(oid string) (IndexdRecord, error) { + // unmarshal the DRS object + path, err := GetObjectPath(DRS_OBJS_PATH, oid) if err != nil { - return nil, fmt.Errorf("error reading %s: %v", DRS_MAP_FILE_NAME, err) + return IndexdRecord{}, fmt.Errorf("error getting object path for oid %s: %v", oid, err) } - var drsMap map[string]IndexdRecord - err = json.Unmarshal(data, &drsMap) + + indexdObjBytes, err := os.ReadFile(path) if err != nil { - return nil, fmt.Errorf("error unmarshaling %s: %v", DRS_MAP_FILE_NAME, err) + return IndexdRecord{}, fmt.Errorf("error reading DRS object for oid %s: %v", oid, err) } - return drsMap, nil -} -func DrsInfoFromOid(oid string) (IndexdRecord, error) { - drsMap, err := loadDrsMap() + var indexdObj IndexdRecord + err = json.Unmarshal(indexdObjBytes, &indexdObj) if err != nil { - return IndexdRecord{}, fmt.Errorf("error loading %s: %v", DRS_MAP_FILE_NAME, err) + return IndexdRecord{}, fmt.Errorf("error unmarshaling DRS object for oid %s: %v", oid, err) } - if indexdObj, ok := drsMap[oid]; ok { - return indexdObj, nil - } - return IndexdRecord{}, fmt.Errorf("DRS object not found for oid %s in %s", oid, DRS_MAP_FILE_NAME) + return indexdObj, nil } func GetObjectPath(basePath string, oid string) (string, error) { diff --git a/client/indexd.go b/client/indexd.go index 576c3f8..7ce382d 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -253,8 +253,10 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // if upload unsuccessful (panic or error), delete record from indexd defer func() { - myLogger.Log("registration incomplete, cleaning up indexd record for oid %s", oid) + if r := recover(); r != nil { + // TODO: this panic isn't getting triggered + myLogger.Log("panic occurred, cleaning up indexd record for oid %s", oid) // Handle panic cl.deleteIndexdRecord(drsObj.Id) if err != nil { @@ -268,6 +270,7 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { panic(r) // re-throw if you want the CLI to still terminate } if err != nil { + myLogger.Log("registration incomplete, cleaning up indexd record for oid %s", oid) err = cl.deleteIndexdRecord(drsObj.Id) if err != nil { myLogger.Log("error cleaning up indexd record on failed registration for oid %s: %s", oid, err) diff --git a/cmd/precommit/main.go b/cmd/precommit/main.go index 15685f0..dc5aa0a 100644 --- a/cmd/precommit/main.go +++ b/cmd/precommit/main.go @@ -20,7 +20,7 @@ var ( var Cmd = &cobra.Command{ Use: "precommit", Short: "pre-commit hook to create DRS objects", - Long: "Pre-commit hook that creates DRS objects based on LFS files in the repo. Stores it to a drs-map.json", + Long: "Pre-commit hook that creates and commits a DRS object to the repo for every LFS file committed", Args: cobra.ExactArgs(0), RunE: func(cmd *cobra.Command, args []string) error { // set up logger diff --git a/mvp/pre-commit-map.go b/mvp/pre-commit-map.go deleted file mode 100644 index c34c91e..0000000 --- a/mvp/pre-commit-map.go +++ /dev/null @@ -1,126 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - "os" - "os/exec" - "path/filepath" - "strings" - - "github.com/google/uuid" -) - -// output of git lfs ls-files -type LfsLsOutput struct { - Files []struct { - Name string `json:"name"` - Size int64 `json:"size"` - Checkout bool `json:"checkout"` - Downloaded bool `json:"downloaded"` - OidType string `json:"oid_type"` - Oid string `json:"oid"` - Version string `json:"version"` - } `json:"files"` -} - -const ( - LFS_OBJS_PATH = ".git/lfs/objects" - DRS_MAP_FILE_NAME = "drs-map.json" -) - -var ( - lfsFiles LfsLsOutput - drsMap = make(map[string]string) - // drsMapFilePath = filepath.Join(LFS_OBJS_PATH, DRS_MAP_FILE_NAME) - drsMapFilePath = DRS_MAP_FILE_NAME -) - -func main() { - // Check if path exists and is a directory - info, err := os.Stat(LFS_OBJS_PATH) - if err != nil || !info.IsDir() { - fmt.Println("No LFS objects tracked in this repository.") - os.Exit(0) - } - - // Get all LFS file and info using json - // FIXME: use git-lfs internally instead of exec? - // eg use git-lfs git.GetTrackedFiles - // https://github.com/git-lfs/git-lfs/blob/main/git/git.go/#L1515 - cmd := exec.Command("git", "lfs", "ls-files", "--long", "--json") - out, err := cmd.Output() - if err != nil { - log.Fatalf("error running git lfs ls-files: %v", err) - } - - err = json.Unmarshal(out, &lfsFiles) - if err != nil { - log.Fatalf("error unmarshalling git lfs ls-files output: %v", err) - } - - // get the name of repository - repoName, err := getRepoNameFromGit() - if err != nil { - log.Fatalf("error: %v", err) - } - fmt.Println("Repo Name:", repoName) - - // for each LFS file, calculate the DRS ID using repoName and the oid - for _, file := range lfsFiles.Files { - // Example: DRS ID = sha1(repoName + ":" + oid) - hashStr := fmt.Sprintf("%s:%s", repoName, file.Oid) - drsId := V5UUID(hashStr).String() - - // If the oid exists in drsMap, check if it matches the calculated uuid - if existing, ok := drsMap[file.Oid]; ok { - if existing != drsId { - fmt.Printf("Warning: OID %s has mismatched UUID. Updating.\n", file.Oid) - drsMap[file.Oid] = drsId - } - } else { - // Add new mapping - drsMap[file.Oid] = drsId - } - } - - // write drsMap to json at drsMapPath - drsMapBytes, err := json.Marshal(drsMap) - if err != nil { - log.Fatalf("error marshalling drs-map.json: %v", err) - } - - err = os.WriteFile(drsMapFilePath, drsMapBytes, 0644) - if err != nil { - log.Fatalf("error writing drs-map.json: %v", err) - } - - fmt.Println("Updated drs-map.json with", len(drsMap), "entries.") - - // stage the drsMap file - // FIXME: should this be in th pre-commit hook as opposed to the Go code? - cmd = exec.Command("git", "add", drsMapFilePath) - _, err = cmd.Output() - if err != nil { - log.Fatalf("error adding drs-map.json to git: %v", err) - } -} - -func getRepoNameFromGit() (string, error) { - // FIXME: change to call git config directly? - cmd := exec.Command("git", "config", "--get", "remote.origin.url") - out, err := cmd.Output() - if err != nil { - return "", err - } - - remoteURL := strings.TrimSpace(string(out)) - repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") - return repoName, nil -} - -func V5UUID(data string) uuid.UUID { - // FIXME: use different UUID method? Used same method as g3t - return uuid.NewSHA1(uuid.NameSpaceURL, []byte(data)) -} From 95147c751cbd83581742086849531290c57aff28 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 20 Jun 2025 18:05:09 -0700 Subject: [PATCH 27/51] update only staged files // delete final drsmap usages // revert use of go-git bc slow --- client/drs-map.go | 177 ++++++++++++++++++++++-------------------- cmd/precommit/main.go | 4 +- go.mod | 19 +---- go.sum | 53 +------------ 4 files changed, 101 insertions(+), 152 deletions(-) diff --git a/client/drs-map.go b/client/drs-map.go index 5a6e7f4..c8ba437 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -1,6 +1,7 @@ package client import ( + "bytes" "encoding/json" "errors" "fmt" @@ -10,7 +11,6 @@ import ( "path/filepath" "strings" - "github.com/go-git/go-git/v6" "github.com/google/uuid" ) @@ -35,25 +35,25 @@ const ( var ( lfsFiles LfsLsOutput - drsMap = make(map[string]IndexdRecord) ) func UpdateDrsObjects() error { + // TODO: only change staged files with new oids instead of writing all objects each time logger, err := NewLogger("") if err != nil { log.Fatalf("Failed to open log file: %v", err) } defer logger.Close() // Ensures cleanup - logger.Log("updateDrsMap started") + logger.Log("Update to DRS objects started") - // [naive method] Get all LFS files' info using json and overwrite file with new drsMap - // FIXME: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) + // get all LFS files' info using json + // TODO: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) cmd := exec.Command("git", "lfs", "ls-files", "--json") out, err := cmd.Output() if err != nil { return fmt.Errorf("error running git lfs ls-files: %v", err) } - logger.Log("git lfs ls-files output: %s", string(out)) + logger.Log("git lfs ls-files output") err = json.Unmarshal(out, &lfsFiles) if err != nil { @@ -67,18 +67,46 @@ func UpdateDrsObjects() error { } logger.Log("Repo Name: %s", repoName) + // get list of staged files as a set + stagedFiles, err := getStagedFiles() + if err != nil { + return fmt.Errorf("error getting staged files: %v", err) + } + stagedFilesSet := make(map[string]struct{}) + for _, file := range stagedFiles { + stagedFilesSet[file] = struct{}{} + } + logger.Log("Creating DRS objects for staged files: %v", stagedFiles) + // for each LFS file, calculate the DRS ID using repoName and the oid for _, file := range lfsFiles.Files { - // make sure file is both checked out and downloaded - if !file.Checkout || !file.Downloaded { - logger.Log("Skipping file: %s (checked out: %v, downloaded: %v)", file.Name, file.Checkout, file.Downloaded) + // check if the file is staged + if _, ok := stagedFilesSet[file.Name]; !ok { + continue + } + + // check if oid already exists + // TODO: need to determine how to manage indexd file name + // right now, chooses the path of the first committed copy or + // if there's multiple copies in one commit, the first occurrence from ls-files + drsObjPath, err := GetObjectPath(DRS_OBJS_PATH, file.Oid) + if err != nil { + return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err) + } + if _, err := os.Stat(drsObjPath); err == nil { + logger.Log("Skipping staged file %s with OID %s, already exists in DRS objects path %s", file.Name, file.Oid, drsObjPath) continue } + // check file exists in the local cache + if !file.Downloaded { + return fmt.Errorf("Staged file %s is not cached. Please unstage the file, then git add the file again", file.Name) + } + // FIXME: do we want to hash this with the project ID instead of the repoName? // TODO: determine git to gen3 project hierarchy mapping drsId := DrsUUID(repoName, file.Oid) - logger.Log("Working with file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) + logger.Log("Processing staged file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) // get file info needed to create indexd record path, err := GetObjectPath(LFS_OBJS_PATH, file.Oid) @@ -88,6 +116,7 @@ func UpdateDrsObjects() error { if _, err := os.Stat(path); os.IsNotExist(err) { return fmt.Errorf("Error: File %s does not exist in LFS objects path %s. Aborting.", file.Name, path) } + // fileInfo, err := os.Stat(path) // if err != nil { // return fmt.Errorf("error getting file info: %v", err) @@ -106,73 +135,61 @@ func UpdateDrsObjects() error { fileURL := fmt.Sprintf("s3://%s", filepath.Join(bucketName, drsId, file.Oid)) // create authz string from profile - // check if project ID is valid if !strings.Contains(cfg.Gen3Project, "-") { return fmt.Errorf("error: invalid project ID %s in config file, ID should look like -", cfg.Gen3Project) } projectIdArr := strings.SplitN(cfg.Gen3Project, "-", 2) authzStr := "/programs/" + projectIdArr[0] + "/projects/" + projectIdArr[1] - // If the oid exists in drsMap, check if it matches the calculated uuid - // TODO: currently only the first filename for a given oid is used - if existing, ok := drsMap[drsId]; ok { - if existing.Did != drsId { - return fmt.Errorf("Error: OID %s for file %s has mismatched UUID (existing: %s, calculated: %s). Aborting.", file.Oid, file.Name, existing.Did, drsId) - } - } else { - // Add new mapping from the file name to the IndexdRecord with the correct DRS ID and OID - drsMap[file.Oid] = IndexdRecord{ - Did: drsId, - FileName: file.Name, - URLs: []string{fileURL}, - Hashes: HashInfo{SHA256: file.Oid}, - Size: file.Size, - Authz: []string{authzStr}, - // ContentCreatedDate: modDate, - // ContentUpdatedDate: modDate, - } - logger.Log("Adding to drsMap: %s -> %s", file.Name, drsMap[file.Name].Did) - } - } - - // write drs objects to DRS_OBJS_PATH - for oid, indexdObj := range drsMap { - // get object bytes - indexdObjBytes, err := json.Marshal(indexdObj) - if err != nil { - logger.Log("error marshalling indexd object for oid %s: %v", oid, err) - return fmt.Errorf("error marshalling indexd object for oid %s: %v", oid, err) + // create IndexdRecord + indexdObj := IndexdRecord{ + Did: drsId, + FileName: file.Name, + URLs: []string{fileURL}, + Hashes: HashInfo{SHA256: file.Oid}, + Size: file.Size, + Authz: []string{authzStr}, + // ContentCreatedDate: modDate, + // ContentUpdatedDate: modDate, } + logger.Log("Adding to DRS Objects: %s -> %s", file.Name, indexdObj.Did) - // get and create obj file path - objFilePath, err := GetObjectPath(DRS_OBJS_PATH, oid) + // write drs objects to DRS_OBJS_PATH + err = writeDrsObj(indexdObj, file.Oid, drsObjPath) if err != nil { - logger.Log("error getting object path for oid %s: %v", oid, err) - return fmt.Errorf("error getting object path for oid %s: %v", oid, err) - } - if err := os.MkdirAll(filepath.Dir(objFilePath), 0755); err != nil { - return fmt.Errorf("error creating directory for %s: %v", objFilePath, err) + return fmt.Errorf("error writing DRS object for oid %s: %v", file.Oid, err) } - - // write indexd obj to file as json - logger.Log("Writing drsMap to %s", objFilePath) - err = os.WriteFile(objFilePath, indexdObjBytes, 0644) - if err != nil { - return fmt.Errorf("error writing %s: %v", objFilePath, err) - } - logger.Log("Created %s for file %s", objFilePath, indexdObjBytes) + logger.Log("Created %s for file %s", drsObjPath, file.Name) // stage the object file - cmd = exec.Command("git", "add", objFilePath) + cmd = exec.Command("git", "add", drsObjPath) _, err = cmd.Output() if err != nil { - return fmt.Errorf("error adding %s to git: %v", objFilePath, err) + return fmt.Errorf("error adding %s to git: %v", drsObjPath, err) } } return nil } +func writeDrsObj(indexdObj IndexdRecord, oid string, drsObjPath string) error { + // get object bytes + indexdObjBytes, err := json.Marshal(indexdObj) + if err != nil { + return fmt.Errorf("error marshalling indexd object for oid %s: %v", oid, err) + } + if err := os.MkdirAll(filepath.Dir(drsObjPath), 0755); err != nil { + return fmt.Errorf("error creating directory for %s: %v", drsObjPath, err) + } + + // write indexd obj to file as json + err = os.WriteFile(drsObjPath, indexdObjBytes, 0644) + if err != nil { + return fmt.Errorf("error writing %s: %v", drsObjPath, err) + } + return nil +} + func DrsUUID(repoName string, hash string) string { // FIXME: use different UUID method? Used same method as g3t hashStr := fmt.Sprintf("%s:%s", repoName, hash) @@ -213,43 +230,31 @@ func GetObjectPath(basePath string, oid string) (string, error) { // git helpers / //////////////// -func getStagedFiles() (git.Status, error) { - repo, err := git.PlainOpen(".") +func getStagedFiles() ([]string, error) { + // chose exec here for performance over using go-git + // tradeoff is very rare concurrency problems which currently aren't relevant to the pre-commit + cmd := exec.Command("git", "diff", "--name-only", "--cached") + var out bytes.Buffer + cmd.Stdout = &out + err := cmd.Run() if err != nil { - return nil, errors.New(fmt.Sprintln("Could not open repo:", err)) + return nil, fmt.Errorf("Error running git command: %s", err) } - wt, err := repo.Worktree() - if err != nil { - return nil, errors.New(fmt.Sprintln("Could not get worktree:", err)) - } + stagedFiles := strings.Split(strings.TrimSpace(out.String()), "\n") - status, err := wt.Status() - if err != nil { - return nil, errors.New(fmt.Sprintln("Could not get status:", err)) - } - return status, nil + return stagedFiles, nil } func GetRepoNameFromGit() (string, error) { - // Open the Git repository in the current directory - repo, err := git.PlainOpen(".") - if err != nil { - log.Fatalf("Failed to open repo: %v", err) - } - - // Get the config object - config, err := repo.Config() + // prefer simple os.Exec over using go-git + cmd := exec.Command("git", "config", "--get", "remote.origin.url") + out, err := cmd.Output() if err != nil { - log.Fatalf("Failed to get config: %v", err) + return "", err } - // Get the remote origin URL - if remote, ok := config.Remotes["origin"]; ok && len(remote.URLs) > 0 { - remoteURL := strings.TrimSpace(string(remote.URLs[0])) - repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") - return repoName, nil - } else { - return "", errors.New("Origin remote not found") - } + remoteURL := strings.TrimSpace(string(out)) + repoName := strings.TrimSuffix(filepath.Base(remoteURL), ".git") + return repoName, nil } diff --git a/cmd/precommit/main.go b/cmd/precommit/main.go index dc5aa0a..0020d44 100644 --- a/cmd/precommit/main.go +++ b/cmd/precommit/main.go @@ -34,8 +34,8 @@ var Cmd = &cobra.Command{ err = client.UpdateDrsObjects() if err != nil { - fmt.Println("updateDrsMap failed:", err) - log.Fatalf("updateDrsMap failed: %v", err) + fmt.Println("UpdateDrsObjects failed:", err) + log.Fatalf("UpdateDrsObjects failed: %v", err) return err } diff --git a/go.mod b/go.mod index 805fda3..7b82f43 100644 --- a/go.mod +++ b/go.mod @@ -11,23 +11,13 @@ require ( ) require ( - dario.cat/mergo v1.0.1 // indirect - github.com/Microsoft/go-winio v0.6.2 // indirect - github.com/ProtonMail/go-crypto v1.3.0 // indirect - github.com/cloudflare/circl v1.6.1 // indirect - github.com/cyphar/filepath-securejoin v0.4.1 // indirect - github.com/emirpasic/gods v1.18.1 // indirect github.com/fatih/color v1.18.0 // indirect - github.com/go-git/gcfg/v2 v2.0.1 // indirect - github.com/go-git/go-billy/v5 v5.6.2 // indirect - github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/kevinburke/ssh_config v1.2.0 // indirect + github.com/kr/pretty v0.3.1 // indirect github.com/mattn/go-colorable v0.1.14 // indirect - github.com/pjbgf/sha1cd v0.3.2 // indirect - github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect - golang.org/x/crypto v0.39.0 // indirect - golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b // indirect + github.com/rogpeppe/go-internal v1.14.1 // indirect + github.com/stretchr/testify v1.10.0 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect ) require ( @@ -35,7 +25,6 @@ require ( github.com/git-lfs/gitobj/v2 v2.1.1 // indirect github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect - github.com/go-git/go-git/v6 v6.0.0-20250618100032-7bc22667c9e1 github.com/google/go-github v17.0.0+incompatible // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/hashicorp/go-version v1.4.0 // indirect diff --git a/go.sum b/go.sum index 93ce033..416b00a 100644 --- a/go.sum +++ b/go.sum @@ -1,31 +1,13 @@ -dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= -dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= -github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw= -github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE= github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 h1:Kk6a4nehpJ3UuJRqlA3JxYxBZEqCeOmATOvrbT4p9RA= github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= -github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= -github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= -github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= -github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/avast/retry-go v2.4.2+incompatible h1:+ZjCypQT/CyP0kyJO2EcU4d/ZEJWSbP8NENI578cPmA= github.com/avast/retry-go v2.4.2+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= -github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0= -github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= -github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= -github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 h1:oempk9HjNt6rVKyKmpdnoN7XABQv3SXLWu3pxUI7Vlk= github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430/go.mod h1:AVSs/gZKt1bOd2AhkhbS7Qh56Hv7klde22yXVbwYJhc= -github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= -github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= -github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= -github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/git-lfs/git-lfs/v3 v3.6.1 h1:0RA2HzkMVl69KE5zCGY1PxqkDSbd/f/O7Du6CNkTYtY= @@ -38,18 +20,6 @@ github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= -github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= -github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= -github.com/go-git/gcfg/v2 v2.0.1 h1:vIDPEdcmkwmbMCHs/0Fv/HFA9SH9ZVVI/gglNeLztF0= -github.com/go-git/gcfg/v2 v2.0.1/go.mod h1:/lv2NsxvhepuMrldsFilrgct6pxzpGdSRC13ydTLSLs= -github.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM= -github.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU= -github.com/go-git/go-git-fixtures/v5 v5.0.0-20241203230421-0753e18f8f03 h1:LumE+tQdnYW24a9RoO08w64LHTzkNkdUqBD/0QPtlEY= -github.com/go-git/go-git-fixtures/v5 v5.0.0-20241203230421-0753e18f8f03/go.mod h1:hMKrMnUE4W0SJ7bFyM00dyz/HoknZoptGWzrj6M+dEM= -github.com/go-git/go-git/v6 v6.0.0-20250618100032-7bc22667c9e1 h1:/IFaZq5TSTYdMZoTJzPnRk8mU0KZ/NZVu4E5Q+prlY8= -github.com/go-git/go-git/v6 v6.0.0-20250618100032-7bc22667c9e1/go.mod h1:/Uu/Qt4LfnBg/ajxrKxBQ8zTCZyk8SKM2aHTMFMe48U= -github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= -github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -80,9 +50,7 @@ github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZ github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= -github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= -github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -101,32 +69,26 @@ github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2bxbqI6DXV8bF8POAnArqvRrIyw= github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= -github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= -github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= -github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= -github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= -github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/ssgelm/cookiejarparser v1.0.1 h1:cRdXauUbOTFzTPJFaeiWbHnQ+tRGlpKKzvIK9PUekE4= github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9VrZuz3wSlI+OEI= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e h1:IWllFTiDjjLIf2oeKxpIUmtiDV5sn71VgeQgg6vcE7k= @@ -135,8 +97,6 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= -golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= -golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -147,8 +107,6 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= -golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= @@ -156,15 +114,12 @@ golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapK golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/cheggaaa/pb.v1 v1.0.28 h1:n1tBJnnK2r7g9OW2btFH91V92STTUevLXYFb8gy9EMk= gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/ini.v1 v1.66.3 h1:jRskFVxYaMGAMUbN0UZ7niA9gzL9B49DOqE78vg0k3w= gopkg.in/ini.v1 v1.66.3/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= From 1faab96b881e053ffc7c4d49a5e90cd93efd001e Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 20 Jun 2025 18:30:21 -0700 Subject: [PATCH 28/51] move .drsconfig to .drs/config --- client/config.go | 6 +++--- client/drs-map.go | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/client/config.go b/client/config.go index b3d1ddc..38c91c1 100644 --- a/client/config.go +++ b/client/config.go @@ -23,18 +23,18 @@ type Config struct { } const ( - DRS_CONFIG = ".drsconfig" + DRS_CONFIG = "config" ) func LoadConfig() (*Config, error) { - //look in Git base dir and find .drsconfig file + //look in Git base dir and find .drs/config file topLevel, err := utils.GitTopLevel() if err != nil { return nil, err } - configPath := filepath.Join(topLevel, DRS_CONFIG) + configPath := filepath.Join(topLevel, DRS_DIR, DRS_CONFIG) //check if config exists reader, err := os.Open(configPath) diff --git a/client/drs-map.go b/client/drs-map.go index c8ba437..3b8990c 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -30,6 +30,7 @@ type LfsLsOutput struct { const ( LFS_OBJS_PATH = ".git/lfs/objects" DRS_DIR = ".drs" + // FIXME: should this be /lfs/objects or just /objects? DRS_OBJS_PATH = DRS_DIR + "/lfs/objects" ) @@ -233,6 +234,7 @@ func GetObjectPath(basePath string, oid string) (string, error) { func getStagedFiles() ([]string, error) { // chose exec here for performance over using go-git // tradeoff is very rare concurrency problems which currently aren't relevant to the pre-commit + // FIXME: filter out files that have been deleted? Bug: if git rm, the DRS object still created cmd := exec.Command("git", "diff", "--name-only", "--cached") var out bytes.Buffer cmd.Stdout = &out From 183de90ee3f02a8bb86ab0f6d74045dfe8f0831b Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 23 Jun 2025 15:08:49 -0700 Subject: [PATCH 29/51] clean up commands --- .drsconfig | 9 ------ cmd/add/main.go | 27 ------------------ cmd/filterprocess/main.go | 59 --------------------------------------- cmd/list/main.go | 42 ---------------------------- cmd/pull/main.go | 21 -------------- cmd/push/main.go | 21 -------------- cmd/register/main.go | 38 ------------------------- cmd/root.go | 24 +--------------- cmd/track/main.go | 21 -------------- cmd/transfer/main.go | 2 +- 10 files changed, 2 insertions(+), 262 deletions(-) delete mode 100644 .drsconfig delete mode 100644 cmd/add/main.go delete mode 100644 cmd/filterprocess/main.go delete mode 100644 cmd/list/main.go delete mode 100644 cmd/pull/main.go delete mode 100644 cmd/push/main.go delete mode 100644 cmd/register/main.go delete mode 100644 cmd/track/main.go diff --git a/.drsconfig b/.drsconfig deleted file mode 100644 index 8f4aa8e..0000000 --- a/.drsconfig +++ /dev/null @@ -1,9 +0,0 @@ -{ - "queryServer": { - "baseURL": "https://caliper-training.ohsu.edu" - }, - "writeServer": { - "baseURL": "https://caliper-training.ohsu.edu" - }, - "gen3Profile": "" -} diff --git a/cmd/add/main.go b/cmd/add/main.go deleted file mode 100644 index 0a63c26..0000000 --- a/cmd/add/main.go +++ /dev/null @@ -1,27 +0,0 @@ -package add - -import ( - "fmt" - "path/filepath" - - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "add", - Short: "Add a file", - Long: ``, - Args: cobra.MinimumNArgs(0), - RunE: func(cmd *cobra.Command, args []string) error { - for _, fileArg := range args { - matches, err := filepath.Glob(fileArg) - if err == nil { - for _, f := range matches { - fmt.Printf("Adding %s\n", f) - } - } - } - return nil - }, -} diff --git a/cmd/filterprocess/main.go b/cmd/filterprocess/main.go deleted file mode 100644 index 688d4f9..0000000 --- a/cmd/filterprocess/main.go +++ /dev/null @@ -1,59 +0,0 @@ -package filterprocess - -import ( - "fmt" - "io" - "log" - "os" - - "github.com/git-lfs/git-lfs/v3/git" - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "filter-process", - Short: "filter process", - Long: ``, - Args: cobra.MinimumNArgs(0), - RunE: func(cmd *cobra.Command, args []string) error { - s := git.NewFilterProcessScanner(os.Stdin, os.Stdout) - err := s.Init() - if err != nil { - return err - } - - caps, err := s.NegotiateCapabilities() - if err != nil { - return err - } - log.Printf("Caps: %#v\n", caps) - log.Printf("Running filter-process: %s\n", args) - - for s.Scan() { - req := s.Request() - switch req.Header["command"] { - case "clean": - log.Printf("Request to clean %#v %s\n", req.Payload, req.Header["pathname"]) - - clean(os.Stdout, req.Payload, req.Header["pathname"], -1) - - case "smudge": - log.Printf("Request to smudge %s %s\n", req.Payload, req.Header["pathname"]) - case "list_available_blobs": - log.Printf("Request for list_available_blobs\n") - - default: - return fmt.Errorf("don't know what to do: %s", req.Header["command"]) - } - log.Printf("Request: %#v\n", req) - } - - return nil - }, -} - -func clean(to io.Writer, from io.Reader, fileName string, fileSize int64) error { - - return nil -} diff --git a/cmd/list/main.go b/cmd/list/main.go deleted file mode 100644 index 27f36ae..0000000 --- a/cmd/list/main.go +++ /dev/null @@ -1,42 +0,0 @@ -package list - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/bmeg/git-drs/utils" - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "list", - Aliases: []string{"ls"}, - Short: "list files", - Long: ``, - Args: cobra.MinimumNArgs(0), - RunE: func(cmd *cobra.Command, args []string) error { - gitTop, err := utils.GitTopLevel() - if err != nil { - fmt.Printf("Error: %s\n", err) - return err - } - manifestDir := filepath.Join(gitTop, "MANIFEST") - fmt.Printf("Manifest: %s\n", manifestDir) - s, err := os.Stat(manifestDir) - if err != nil { - return err - } - if s.IsDir() { - files, err := filepath.Glob(filepath.Join(manifestDir, "*")) - if err != nil { - return err - } - for _, i := range files { - fmt.Printf("%s\n", i) - } - } - return nil - }, -} diff --git a/cmd/pull/main.go b/cmd/pull/main.go deleted file mode 100644 index 4d8d460..0000000 --- a/cmd/pull/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package pull - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "pull", - Short: "Pull a file", - Long: ``, - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - for i := range args { - fmt.Printf("Pulling file %s\n", args[i]) - } - return nil - }, -} diff --git a/cmd/push/main.go b/cmd/push/main.go deleted file mode 100644 index 0afeef7..0000000 --- a/cmd/push/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package push - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "push", - Short: "Push a repo", - Long: ``, - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - for i := range args { - fmt.Printf("Pushing %s\n", args[i]) - } - return nil - }, -} diff --git a/cmd/register/main.go b/cmd/register/main.go deleted file mode 100644 index 3d8cbdb..0000000 --- a/cmd/register/main.go +++ /dev/null @@ -1,38 +0,0 @@ -package register - -import ( - "fmt" - "log" - - "github.com/bmeg/git-drs/client" - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "register", - Short: "", - Long: `accepts one parameter: `, - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - log.Printf("Registering file %s", args[0]) - - cfg, err := client.LoadConfig() - if err != nil { - fmt.Println("error loading config:", err) - return err - } - client, err := client.NewIndexDClient(cfg.QueryServer.BaseURL) - if err != nil { - return err - } - - //upload the file, name would probably be relative to the base of the git repo - client.RegisterFile(args[0]) - - //remove later - _ = client - - return nil - }, -} diff --git a/cmd/root.go b/cmd/root.go index ff0731b..62fe02c 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -1,18 +1,10 @@ package cmd import ( - "os" - - "github.com/bmeg/git-drs/cmd/add" "github.com/bmeg/git-drs/cmd/download" - "github.com/bmeg/git-drs/cmd/filterprocess" "github.com/bmeg/git-drs/cmd/initialize" - "github.com/bmeg/git-drs/cmd/list" "github.com/bmeg/git-drs/cmd/precommit" - "github.com/bmeg/git-drs/cmd/pull" - "github.com/bmeg/git-drs/cmd/push" "github.com/bmeg/git-drs/cmd/query" - "github.com/bmeg/git-drs/cmd/register" "github.com/bmeg/git-drs/cmd/transfer" "github.com/spf13/cobra" ) @@ -28,24 +20,10 @@ var RootCmd = &cobra.Command{ } func init() { - RootCmd.AddCommand(add.Cmd) RootCmd.AddCommand(download.Cmd) - RootCmd.AddCommand(filterprocess.Cmd) - RootCmd.AddCommand(genBashCompletionCmd) RootCmd.AddCommand(initialize.Cmd) - RootCmd.AddCommand(list.Cmd) RootCmd.AddCommand(precommit.Cmd) - RootCmd.AddCommand(push.Cmd) - RootCmd.AddCommand(pull.Cmd) RootCmd.AddCommand(query.Cmd) - RootCmd.AddCommand(register.Cmd) RootCmd.AddCommand(transfer.Cmd) -} - -var genBashCompletionCmd = &cobra.Command{ - Use: "bash", - Short: "Generate bash completions file", - Run: func(cmd *cobra.Command, args []string) { - RootCmd.GenBashCompletion(os.Stdout) - }, + RootCmd.CompletionOptions.HiddenDefaultCmd = true } diff --git a/cmd/track/main.go b/cmd/track/main.go deleted file mode 100644 index 31d1964..0000000 --- a/cmd/track/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package track - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -// Cmd line declaration -var Cmd = &cobra.Command{ - Use: "track", - Short: "Set a file track filter", - Long: ``, - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - for i := range args { - fmt.Printf("Track %s\n", args[i]) - } - return nil - }, -} diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 2b804eb..b1f4ef0 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -167,7 +167,7 @@ var Cmd = &cobra.Command{ // download file using the DRS object myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", downloadMsg.Oid, indexdObj)) - // FIXME: generalize access ID method, + // FIXME: generalize access ID method // naively get access ID from splitting first path into : accessId := strings.Split(indexdObj.URLs[0], ":")[0] myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", downloadMsg.Oid, accessId, indexdObj.FileName)) From 0bd25ae714370abd978259229ba409b4a4977769 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 23 Jun 2025 16:28:29 -0700 Subject: [PATCH 30/51] remove urls from config, pull from gen3Profile --- client/config.go | 2 -- client/indexd.go | 19 ++++++++++--------- cmd/download/main.go | 13 ++----------- cmd/query/main.go | 11 +---------- cmd/transfer/main.go | 9 +-------- 5 files changed, 14 insertions(+), 40 deletions(-) diff --git a/client/config.go b/client/config.go index 38c91c1..d57209f 100644 --- a/client/config.go +++ b/client/config.go @@ -15,8 +15,6 @@ type Server struct { } type Config struct { - QueryServer Server `json:"queryServer"` - WriteServer Server `json:"writeServer"` Gen3Profile string `json:"gen3Profile"` Gen3Project string `json:"gen3Project"` Gen3Bucket string `json:"gen3Bucket"` diff --git a/client/indexd.go b/client/indexd.go index 7ce382d..6c1121a 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -100,24 +100,25 @@ type HashInfo struct { ETag string `json:"etag,omitempty"` } -func NewIndexDClient(base string) (ObjectStoreClient, error) { - baseURL, err := url.Parse(base) - // print baseURL - if err != nil { - return nil, err - } - +func NewIndexDClient() (ObjectStoreClient, error) { cfg, err := LoadConfig() if err != nil { return nil, err } - // get the gen3Profile, gen3Project, and gen3Bucket from the config + // get the gen3Profile and baseURL profile := cfg.Gen3Profile if profile == "" { return nil, fmt.Errorf("No gen3 profile specified. Please provide a gen3Profile key in your .drsconfig") } + profileConfig = conf.ParseConfig(profile) + baseUrl, err := url.Parse(profileConfig.APIEndpoint) + if err != nil { + return nil, fmt.Errorf("error parsing base URL from profile %s: %v", profile, err) + } + + // get the gen3Project and gen3Bucket from the config projectId := cfg.Gen3Project if projectId == "" { return nil, fmt.Errorf("No gen3 project specified. Please provide a gen3Project key in your .drsconfig") @@ -131,7 +132,7 @@ func NewIndexDClient(base string) (ObjectStoreClient, error) { // fmt.Printf("Base URL: %s\n", baseURL.String()) // fmt.Printf("Profile: %s\n", profile) - return &IndexDClient{baseURL, profile, projectId, bucketName}, err + return &IndexDClient{baseUrl, profile, projectId, bucketName}, err } // DownloadFile implements ObjectStoreClient diff --git a/cmd/download/main.go b/cmd/download/main.go index bbe1899..2550dd5 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -24,23 +24,14 @@ var Cmd = &cobra.Command{ RunE: func(cmd *cobra.Command, args []string) error { drsId := args[0] accessId := args[1] - cfg, err := client.LoadConfig() - if err != nil { - return err - } - - baseURL := cfg.QueryServer.BaseURL - - // print random string to stdout - fmt.Println("Using server:", cfg.QueryServer.BaseURL) - client, err := client.NewIndexDClient(baseURL) + client, err := client.NewIndexDClient() if err != nil { fmt.Printf("\nerror creating indexd client: %s", err) return err } - fmt.Println("created indexd client:", cfg.QueryServer.BaseURL) + fmt.Println("created indexd client") if dstPath == "" { diff --git a/cmd/query/main.go b/cmd/query/main.go index b9463d7..0c6714b 100644 --- a/cmd/query/main.go +++ b/cmd/query/main.go @@ -15,16 +15,7 @@ var Cmd = &cobra.Command{ Long: "Query DRS server by DRS ID", Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - - cfg, err := client.LoadConfig() - if err != nil { - return err - } - - //fix this later - baseURL := cfg.QueryServer.BaseURL - - client, err := client.NewIndexDClient(baseURL) + client, err := client.NewIndexDClient() if err != nil { return err } diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index b1f4ef0..7b732e2 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -120,14 +120,7 @@ var Cmd = &cobra.Command{ // Log for debugging myLogger.Log(fmt.Sprintf("Handling init: %s", msg)) - // setup indexd client - cfg, err := client.LoadConfig() - if err != nil { - myLogger.Log(fmt.Sprintf("Error loading config: %s", err)) - } - - baseURL := cfg.QueryServer.BaseURL - drsClient, err = client.NewIndexDClient(baseURL) + drsClient, err = client.NewIndexDClient() if err != nil { myLogger.Log(fmt.Sprintf("Error creating indexd client: %s", err)) continue From c7aab2cf4aebff37e8d79bf59ee2a4aba872efaa Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 23 Jun 2025 18:01:55 -0700 Subject: [PATCH 31/51] refactor clone to use indexd records from server // split out indexd structs --- client/indexd-objects.go | 89 +++++++++++++++++++++++ client/indexd.go | 148 ++++++++++++++++++--------------------- client/interface.go | 2 +- cmd/download/main.go | 31 ++------ cmd/transfer/main.go | 32 +-------- 5 files changed, 166 insertions(+), 136 deletions(-) create mode 100644 client/indexd-objects.go diff --git a/client/indexd-objects.go b/client/indexd-objects.go new file mode 100644 index 0000000..68e80cf --- /dev/null +++ b/client/indexd-objects.go @@ -0,0 +1,89 @@ +package client + +// HashInfo represents file hash information as per OpenAPI spec +// Patterns are documented for reference, but not enforced at struct level +// md5: ^[0-9a-f]{32}$ +// sha: ^[0-9a-f]{40}$ +// sha256: ^[0-9a-f]{64}$ +// sha512: ^[0-9a-f]{128}$ +// crc: ^[0-9a-f]{8}$ +// etag: ^[0-9a-f]{32}(-\d+)?$ +type HashInfo struct { + MD5 string `json:"md5,omitempty"` + SHA string `json:"sha,omitempty"` + SHA256 string `json:"sha256,omitempty"` + SHA512 string `json:"sha512,omitempty"` + CRC string `json:"crc,omitempty"` + ETag string `json:"etag,omitempty"` +} + +// subset of the OpenAPI spec for the InputInfo object in indexd +// https://github.com/uc-cdis/indexd/blob/master/openapis/swagger.yaml +// TODO: make another object based on VersionInputInfo that has content_created_date and so can handle a POST of dates via indexd/ +type IndexdRecord struct { + // Unique identifier for the record (UUID) + Did string `json:"did"` + + // Human-readable file name + FileName string `json:"file_name,omitempty"` + + // List of URLs where the file can be accessed + URLs []string `json:"urls"` + + // Hashes of the file (e.g., md5, sha256) + Size int64 `json:"size"` + + // List of access control lists (ACLs) + ACL []string `json:"acl,omitempty"` + + // List of authorization policies + Authz []string `json:"authz,omitempty"` + + Hashes HashInfo `json:"hashes,omitempty"` + + // Additional metadata as key-value pairs + Metadata map[string]string `json:"metadata,omitempty"` + + // Version of the record (optional) + Version string `json:"version,omitempty"` + + // // Created timestamp (RFC3339 format) + // ContentCreatedDate string `json:"content_created_date,omitempty"` + + // // Updated timestamp (RFC3339 format) + // ContentUpdatedDate string `json:"content_updated_date,omitempty"` +} + +type ListRecords struct { + IDs []string `json:"ids"` + Records []OutputInfo `json:"records"` + Size int64 `json:"size"` + Start int64 `json:"start"` + Limit int64 `json:"limit"` + FileName string `json:"file_name"` + URLs []string `json:"urls"` + ACL []string `json:"acl"` + Authz []string `json:"authz"` + Hashes HashInfo `json:"hashes"` + Metadata map[string]interface{} `json:"metadata"` + Version string `json:"version"` +} + +type OutputInfo struct { + Did string `json:"did"` + BaseID string `json:"baseid"` + Rev string `json:"rev"` + Form string `json:"form"` + Size int64 `json:"size"` + FileName string `json:"file_name"` + Version string `json:"version"` + Uploader string `json:"uploader"` + URLs []string `json:"urls"` + ACL []string `json:"acl"` + Authz []string `json:"authz"` + Hashes HashInfo `json:"hashes"` + UpdatedDate string `json:"updated_date"` + CreatedDate string `json:"created_date"` + Metadata map[string]interface{} `json:"metadata"` + URLsMetadata map[string]interface{} `json:"urls_metadata"` +} diff --git a/client/indexd.go b/client/indexd.go index 6c1121a..ccb8c36 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -27,78 +27,9 @@ type IndexDClient struct { bucketName string } -// subset of the OpenAPI spec for the InputInfo object in indexd -// https://github.com/uc-cdis/indexd/blob/master/openapis/swagger.yaml -// TODO: make another object based on VersionInputInfo that has content_created_date and so can handle a POST of dates via indexd/ -type IndexdRecord struct { - // Unique identifier for the record (UUID) - Did string `json:"did"` - - // Human-readable file name - FileName string `json:"file_name,omitempty"` - - // List of URLs where the file can be accessed - URLs []string `json:"urls"` - - // Hashes of the file (e.g., md5, sha256) - Size int64 `json:"size"` - - // List of access control lists (ACLs) - ACL []string `json:"acl,omitempty"` - - // List of authorization policies - Authz []string `json:"authz,omitempty"` - - Hashes HashInfo `json:"hashes,omitempty"` - - // Additional metadata as key-value pairs - Metadata map[string]string `json:"metadata,omitempty"` - - // Version of the record (optional) - Version string `json:"version,omitempty"` - - // // Created timestamp (RFC3339 format) - // ContentCreatedDate string `json:"content_created_date,omitempty"` - - // // Updated timestamp (RFC3339 format) - // ContentUpdatedDate string `json:"content_updated_date,omitempty"` -} - -type OutputInfo struct { - Did string `json:"did"` - BaseID string `json:"baseid"` - Rev string `json:"rev"` - Form string `json:"form"` - Size int64 `json:"size"` - FileName string `json:"file_name"` - Version string `json:"version"` - Uploader string `json:"uploader"` - URLs []string `json:"urls"` - ACL []string `json:"acl"` - Authz []string `json:"authz"` - Hashes HashInfo `json:"hashes"` - UpdatedDate string `json:"updated_date"` - CreatedDate string `json:"created_date"` - Metadata map[string]interface{} `json:"metadata"` - URLsMetadata map[string]interface{} `json:"urls_metadata"` -} - -// HashInfo represents file hash information as per OpenAPI spec -// Patterns are documented for reference, but not enforced at struct level -// md5: ^[0-9a-f]{32}$ -// sha: ^[0-9a-f]{40}$ -// sha256: ^[0-9a-f]{64}$ -// sha512: ^[0-9a-f]{128}$ -// crc: ^[0-9a-f]{8}$ -// etag: ^[0-9a-f]{32}(-\d+)?$ -type HashInfo struct { - MD5 string `json:"md5,omitempty"` - SHA string `json:"sha,omitempty"` - SHA256 string `json:"sha256,omitempty"` - SHA512 string `json:"sha512,omitempty"` - CRC string `json:"crc,omitempty"` - ETag string `json:"etag,omitempty"` -} +//////////////////// +// CLIENT METHODS // +//////////////////// func NewIndexDClient() (ObjectStoreClient, error) { cfg, err := LoadConfig() @@ -129,28 +60,52 @@ func NewIndexDClient() (ObjectStoreClient, error) { return nil, fmt.Errorf("No gen3 bucket specified. Please provide a gen3Bucket key in your .drsconfig") } - // fmt.Printf("Base URL: %s\n", baseURL.String()) - // fmt.Printf("Profile: %s\n", profile) - return &IndexDClient{baseUrl, profile, projectId, bucketName}, err } // DownloadFile implements ObjectStoreClient -func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) { +func (cl *IndexDClient) DownloadFile(oid string) (*drs.AccessURL, error) { // setup logging myLogger, err := NewLogger("") if err != nil { // Handle error (e.g., print to stderr and exit) log.Fatalf("Failed to open log file: %v", err) } - defer myLogger.Close() // Ensures cleanup - myLogger.Log("download file started for id: %s", id) + defer myLogger.Close() + myLogger.Log("requested download of file oid %s", oid) + + // get the DRS object using the OID + // FIXME: how do we not hardcode sha256 here? + records, err := cl.queryIndexdByHash("sha256", oid) + if err != nil { + myLogger.Log(fmt.Sprintf("Error getting DRS info for OID %s: %v", oid, err)) + // create failure message and send it back + return &drs.AccessURL{}, fmt.Errorf("Error retrieving DRS info: " + err.Error()) + } + + if len(records.Records) != 1 { + myLogger.Log(fmt.Sprintf("Error: expected 1 record for OID %s, got %d records", oid, len(records.Records))) + myLogger.Log(fmt.Sprintf("Records: %v", records.Records)) + return nil, fmt.Errorf("expected 1 record for OID %s, got %d records", oid, len(records.Records)) + } + indexdObj := records.Records[0] + + // get LFS objects path to write to + dstPath, err := GetObjectPath(LFS_OBJS_PATH, oid) + + // download file using the DRS object + myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, indexdObj)) + + // FIXME: generalize access ID method + // naively get access ID from splitting first path into : + accessId := strings.Split(indexdObj.URLs[0], ":")[0] + myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", oid, accessId, indexdObj.FileName)) // get file from indexd a := *cl.base - a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id, "access", access_id) + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", indexdObj.Did, "access", accessId) - myLogger.Log("using API: %s\n", a.String()) + myLogger.Log("using endpoint: %s\n", a.String()) // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) @@ -254,7 +209,7 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { // if upload unsuccessful (panic or error), delete record from indexd defer func() { - + // delete indexd record if panic if r := recover(); r != nil { // TODO: this panic isn't getting triggered myLogger.Log("panic occurred, cleaning up indexd record for oid %s", oid) @@ -270,6 +225,8 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { myLogger.Log("exiting: %v", r) panic(r) // re-throw if you want the CLI to still terminate } + + // delete indexd record if error thrown if err != nil { myLogger.Log("registration incomplete, cleaning up indexd record for oid %s", oid) err = cl.deleteIndexdRecord(drsObj.Id) @@ -330,6 +287,10 @@ func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { return &out, nil } +///////////// +// HELPERS // +///////////// + func addGen3AuthHeader(req *http.Request, profile string) error { // extract accessToken from gen3 profile and insert into header of request profileConfig = conf.ParseConfig(profile) @@ -465,3 +426,28 @@ func (cl *IndexDClient) deleteIndexdRecord(did string) error { } return nil } + +func (cl *IndexDClient) queryIndexdByHash(hashType string, hash string) (ListRecords, error) { + // search via hash https://calypr-dev.ohsu.edu/index/index?hash=sha256:52d9baed146de4895a5c9c829e7765ad349c4124ba43ae93855dbfe20a7dd3f0 + + // get + url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base, hashType, hash) + resp, err := http.Get(url) + if err != nil { + return ListRecords{}, fmt.Errorf("error querying index for hash (%s:%s): %v", hashType, hash, err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return ListRecords{}, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err) + } + + records := ListRecords{} + err = json.Unmarshal(body, &records) + if err != nil { + return ListRecords{}, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) + } + + return records, nil +} diff --git a/client/interface.go b/client/interface.go index 652acdc..2e28b00 100644 --- a/client/interface.go +++ b/client/interface.go @@ -10,5 +10,5 @@ type ObjectStoreClient interface { RegisterFile(oid string) (*drs.DRSObject, error) //Download file given a DRS ID - DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) + DownloadFile(oid string) (*drs.AccessURL, error) } diff --git a/cmd/download/main.go b/cmd/download/main.go index 2550dd5..ef00dd6 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -17,13 +17,12 @@ var ( // Cmd line declaration // Cmd line declaration var Cmd = &cobra.Command{ - Use: "download ", - Short: "Download file using DRS ID and access ID", - Long: "Download file using DRS ID and access ID. The access ID is the access method used to download the file.", - Args: cobra.ExactArgs(2), + Use: "download ", + Short: "Download file using file object ID", + Long: "Download file using file object ID (sha256 hash). Use lfs ls-files to get oid", + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - drsId := args[0] - accessId := args[1] + oid := args[0] client, err := client.NewIndexDClient() if err != nil { @@ -33,21 +32,9 @@ var Cmd = &cobra.Command{ fmt.Println("created indexd client") - if dstPath == "" { - - drsObj, err = client.QueryID(drsId) - if err != nil { - fmt.Printf("\nerror querying DRS ID %s: %s", drsId, err) - return err - } - dstPath = drsObj.Name - } - - fmt.Println("downloading file:", drsObj.Name) - - _, err = client.DownloadFile(drsId, accessId, dstPath) + _, err = client.DownloadFile(oid) if err != nil { - fmt.Printf("\nerror downloading file %s: %s", drsId, err) + fmt.Printf("\nerror downloading file object ID %s: %s", oid, err) return err } @@ -56,7 +43,3 @@ var Cmd = &cobra.Command{ return nil }, } - -func init() { - Cmd.Flags().StringVarP(&dstPath, "dstPath", "d", "", "Optional destination file path") -} diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 7b732e2..f8b69ff 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -6,7 +6,6 @@ import ( "fmt" "log" "os" - "strings" "github.com/bmeg/git-drs/client" "github.com/spf13/cobra" @@ -140,34 +139,7 @@ var Cmd = &cobra.Command{ continue } - // get the DRS object using the OID - indexdObj, err := client.DrsInfoFromOid(downloadMsg.Oid) - if err != nil { - myLogger.Log(fmt.Sprintf("Error getting DRS info for OID %s: %v", downloadMsg.Oid, err)) - // create failure message and send it back - errorResponse := ErrorMessage{ - Event: "complete", - Oid: downloadMsg.Oid, - Error: Error{ - Code: 500, - Message: "Error retrieving DRS info: " + err.Error(), - }, - } - encoder.Encode(errorResponse) - continue - } - - // download file using the DRS object - myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", downloadMsg.Oid, indexdObj)) - - // FIXME: generalize access ID method - // naively get access ID from splitting first path into : - accessId := strings.Split(indexdObj.URLs[0], ":")[0] - myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", downloadMsg.Oid, accessId, indexdObj.FileName)) - - // download the file using the indexd client - dstPath, err := client.GetObjectPath(client.LFS_OBJS_PATH, downloadMsg.Oid) - _, err = drsClient.DownloadFile(indexdObj.Did, accessId, dstPath) + accessUrl, err := drsClient.DownloadFile(downloadMsg.Oid) if err != nil { myLogger.Log(fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)) @@ -189,7 +161,7 @@ var Cmd = &cobra.Command{ completeMsg := CompleteMessage{ Event: "complete", Oid: downloadMsg.Oid, - Path: dstPath, + Path: accessUrl.URL, } encoder.Encode(completeMsg) From b8ae6ef4b7a163b63eaaca3d9323f1559ac65a33 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 24 Jun 2025 12:06:49 -0700 Subject: [PATCH 32/51] patch indexd use for download // update ObjectStoreClient --- client/indexd.go | 43 ++++++++++-------------- client/interface.go | 13 +++++--- cmd/download/main.go | 2 +- cmd/query/main.go | 2 +- cmd/transfer/main.go | 79 +++++++++++++++++++++++++++----------------- 5 files changed, 75 insertions(+), 64 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index ccb8c36..ba74ab0 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -63,8 +63,8 @@ func NewIndexDClient() (ObjectStoreClient, error) { return &IndexDClient{baseUrl, profile, projectId, bucketName}, err } -// DownloadFile implements ObjectStoreClient -func (cl *IndexDClient) DownloadFile(oid string) (*drs.AccessURL, error) { +// GetDownloadURL implements ObjectStoreClient +func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { // setup logging myLogger, err := NewLogger("") if err != nil { @@ -90,9 +90,6 @@ func (cl *IndexDClient) DownloadFile(oid string) (*drs.AccessURL, error) { } indexdObj := records.Records[0] - // get LFS objects path to write to - dstPath, err := GetObjectPath(LFS_OBJS_PATH, oid) - // download file using the DRS object myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, indexdObj)) @@ -134,56 +131,50 @@ func (cl *IndexDClient) DownloadFile(oid string) (*drs.AccessURL, error) { return nil, err } - out := drs.AccessURL{} - err = json.Unmarshal(body, &out) + accessUrl := drs.AccessURL{} + err = json.Unmarshal(body, &accessUrl) if err != nil { return nil, fmt.Errorf("unable to unmarshal response into drs.AccessURL, response looks like: %s", body) } - myLogger.Log("unmarshaled response into AccessURL struct") + myLogger.Log("unmarshaled response into DRS AccessURL") - // Extract the signed URL from the response - signedURL := out.URL - if signedURL == "" { - return nil, fmt.Errorf("signed URL not found in response.") - } + return &accessUrl, nil +} +func DownloadSignedUrl(signedURL string, dstPath string) error { // Download the file using the signed URL fileResponse, err := http.Get(signedURL) if err != nil { - return nil, err + return err } defer fileResponse.Body.Close() - myLogger.Log("file download response status: %s\n", fileResponse.Status) - // Check if the response status is OK if fileResponse.StatusCode != http.StatusOK { - return nil, fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) + return fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) } // Create the destination directory if it doesn't exist err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm) if err != nil { - return nil, err + return err } // Create the destination file dstFile, err := os.Create(dstPath) if err != nil { - return nil, err + return err } defer dstFile.Close() // Write the file content to the destination file _, err = io.Copy(dstFile, fileResponse.Body) if err != nil { - return nil, err + return err } - myLogger.Log("File written to %s\n", dstFile.Name()) - - return &out, nil + return nil } // RegisterFile implements ObjectStoreClient. @@ -257,7 +248,7 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { return drsObj, nil } -func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { +func (cl *IndexDClient) GetDRSObject(id string) (*drs.DRSObject, error) { a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id) @@ -365,7 +356,7 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. myLogger.Log("POST successful: %s", response.Status) // query and return DRS object - drsObj, err := cl.QueryID(indexdObj.Did) + drsObj, err := cl.GetDRSObject(indexdObj.Did) if err != nil { return nil, fmt.Errorf("error querying DRS ID %s: %v", drsId, err) } @@ -374,7 +365,7 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. } func (cl *IndexDClient) deleteIndexdRecord(did string) error { - // get the indexd record, can't use queryId cause the DRS object doesn't contain the rev + // get the indexd record, can't use GetDRSObject cause the DRS object doesn't contain the rev a := *cl.base a.Path = filepath.Join(a.Path, "index", did) diff --git a/client/interface.go b/client/interface.go index 2e28b00..d92ffbc 100644 --- a/client/interface.go +++ b/client/interface.go @@ -3,12 +3,15 @@ package client import "github.com/bmeg/git-drs/drs" type ObjectStoreClient interface { - //Given a DRS string ID, retrieve the object describing it - QueryID(id string) (*drs.DRSObject, error) + // Given a DRS string ID, retrieve the object describing it + // corresponds to /ga4gh/drs/v1/objects + GetDRSObject(id string) (*drs.DRSObject, error) - //Put file into object storage and obtain a DRS record pointing to it + // Put file into object storage and obtain a DRS record pointing to it + // no DRS write endpoint exists, so this is custom code RegisterFile(oid string) (*drs.DRSObject, error) - //Download file given a DRS ID - DownloadFile(oid string) (*drs.AccessURL, error) + // Get a signed url given a DRS ID + // corresponds to /ga4gh/drs/v1/objects/{object_id}/access/{access_id} + GetDownloadURL(oid string) (*drs.AccessURL, error) } diff --git a/cmd/download/main.go b/cmd/download/main.go index ef00dd6..663c320 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -32,7 +32,7 @@ var Cmd = &cobra.Command{ fmt.Println("created indexd client") - _, err = client.DownloadFile(oid) + _, err = client.GetDownloadURL(oid) if err != nil { fmt.Printf("\nerror downloading file object ID %s: %s", oid, err) return err diff --git a/cmd/query/main.go b/cmd/query/main.go index 0c6714b..1b691c3 100644 --- a/cmd/query/main.go +++ b/cmd/query/main.go @@ -20,7 +20,7 @@ var Cmd = &cobra.Command{ return err } - obj, err := client.QueryID(args[0]) + obj, err := client.GetDRSObject(args[0]) if err != nil { return err } diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index f8b69ff..1866960 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -135,33 +135,46 @@ var Cmd = &cobra.Command{ // get download message var downloadMsg DownloadMessage if err := json.Unmarshal(scanner.Bytes(), &downloadMsg); err != nil { - myLogger.Log(fmt.Sprintf("Error parsing downloadMessage: %v\n", err)) + errMsg := fmt.Sprintf("Error parsing downloadMessage: %v\n", err) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) continue } - accessUrl, err := drsClient.DownloadFile(downloadMsg.Oid) + // get signed url + accessUrl, err := drsClient.GetDownloadURL(downloadMsg.Oid) if err != nil { - myLogger.Log(fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)) - - // create failure message and send it back - errorResponse := ErrorMessage{ - Event: "complete", - Oid: downloadMsg.Oid, - Error: Error{ - Code: 500, - Message: "Error downloading file: " + err.Error(), - }, - } - encoder.Encode(errorResponse) + errMsg := fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) + } + if accessUrl.URL == "" { + errMsg := fmt.Sprintf("Unable to get access URL %s", downloadMsg.Oid) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) + } + + // download signed url + dstPath, err := client.GetObjectPath(client.LFS_OBJS_PATH, downloadMsg.Oid) + if err != nil { + errMsg := fmt.Sprintf("Error getting destination path for OID %s: %v", downloadMsg.Oid, err) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) continue } - myLogger.Log(fmt.Sprintf("Download for OID %s complete", downloadMsg.Oid)) + err = client.DownloadSignedUrl(accessUrl.URL, dstPath) + if err != nil { + errMsg := fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) + } // send success message back + myLogger.Log(fmt.Sprintf("Download for OID %s complete", downloadMsg.Oid)) completeMsg := CompleteMessage{ Event: "complete", Oid: downloadMsg.Oid, - Path: accessUrl.URL, + Path: dstPath, } encoder.Encode(completeMsg) @@ -172,27 +185,18 @@ var Cmd = &cobra.Command{ // create UploadMessage from the received message var uploadMsg UploadMessage if err := json.Unmarshal(scanner.Bytes(), &uploadMsg); err != nil { - myLogger.Log(fmt.Sprintf("Error parsing UploadMessage: %v\n", err)) - continue + errMsg := fmt.Sprintf("Error parsing UploadMessage: %v\n", err) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, uploadMsg.Oid, errMsg) } myLogger.Log(fmt.Sprintf("Got UploadMessage: %+v\n", uploadMsg)) // handle the upload via drs client (indexd client) drsObj, err := drsClient.RegisterFile(uploadMsg.Oid) if err != nil { - myLogger.Log(fmt.Sprintf("Error, DRS Object: %+v\n", drsObj)) - - // create failure message and send it to back - errorResponse := ErrorMessage{ - Event: "complete", - Oid: uploadMsg.Oid, - Error: Error{ - Code: 500, - Message: "Error registering file: " + err.Error(), - }, - } - encoder.Encode(errorResponse) - continue + errMsg := fmt.Sprintf("Error registering file: " + err.Error()) + myLogger.Log(errMsg) + WriteErrorMessage(encoder, uploadMsg.Oid, errMsg) } myLogger.Log("creating response message with oid %s", uploadMsg.Oid) @@ -222,3 +226,16 @@ var Cmd = &cobra.Command{ return nil }, } + +func WriteErrorMessage(encoder *json.Encoder, oid string, errMsg string) { + // create failure message and send it back + errorResponse := ErrorMessage{ + Event: "complete", + Oid: oid, + Error: Error{ + Code: 500, + Message: errMsg, + }, + } + encoder.Encode(errorResponse) +} From 0c0313ceaa98fa1912f9f79f089134dfb140d243 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 24 Jun 2025 17:30:36 -0700 Subject: [PATCH 33/51] enable indexd as source of truth // add query by hash to client interface --- client/drs-map.go | 38 ++++++++---- client/indexd.go | 137 +++++++++++++++++++++++------------------ client/interface.go | 6 +- cmd/download/main.go | 31 ++++++++-- cmd/initialize/main.go | 75 ++++++++++++++++++++++ cmd/query/main.go | 2 +- 6 files changed, 208 insertions(+), 81 deletions(-) diff --git a/client/drs-map.go b/client/drs-map.go index 3b8990c..37991dc 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -39,14 +39,17 @@ var ( ) func UpdateDrsObjects() error { - // TODO: only change staged files with new oids instead of writing all objects each time + // init logger logger, err := NewLogger("") if err != nil { log.Fatalf("Failed to open log file: %v", err) } - defer logger.Close() // Ensures cleanup + defer logger.Close() logger.Log("Update to DRS objects started") + // init indexd client + indexdClient, err := NewIndexDClient() + // get all LFS files' info using json // TODO: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) cmd := exec.Command("git", "lfs", "ls-files", "--json") @@ -80,13 +83,25 @@ func UpdateDrsObjects() error { logger.Log("Creating DRS objects for staged files: %v", stagedFiles) // for each LFS file, calculate the DRS ID using repoName and the oid + // assumes that the DRS_OBJS_PATH only contains + // ie that DRS objects is not manually edited, only edited via CLI for _, file := range lfsFiles.Files { // check if the file is staged if _, ok := stagedFilesSet[file.Name]; !ok { continue } - // check if oid already exists + // check hash to see if record already exists in indexd (source of truth) + obj, err := indexdClient.GetObjectByHash(file.OidType, file.Oid) + if err != nil { + return fmt.Errorf("error getting object by hash %s: %v", file.Oid, err) + } + if obj != nil { + logger.Log("Skipping staged file %s: OID %s already exists in indexd", file.Name, file.Oid) + continue + } + + // check if oid already committed to git // TODO: need to determine how to manage indexd file name // right now, chooses the path of the first committed copy or // if there's multiple copies in one commit, the first occurrence from ls-files @@ -104,9 +119,10 @@ func UpdateDrsObjects() error { return fmt.Errorf("Staged file %s is not cached. Please unstage the file, then git add the file again", file.Name) } - // FIXME: do we want to hash this with the project ID instead of the repoName? - // TODO: determine git to gen3 project hierarchy mapping - drsId := DrsUUID(repoName, file.Oid) + // if file is in cache, hasn't been committted to git or pushed to indexd, + // create a local DRS object for it + // TODO: determine git to gen3 project hierarchy mapping (eg repo name to project ID) + drsId := DrsUUID(repoName, file.Oid) // FIXME: do we want to hash this with the project ID instead of the repoName? logger.Log("Processing staged file: %s, OID: %s, DRS ID: %s\n", file.Name, file.Oid, drsId) // get file info needed to create indexd record @@ -156,18 +172,14 @@ func UpdateDrsObjects() error { logger.Log("Adding to DRS Objects: %s -> %s", file.Name, indexdObj.Did) // write drs objects to DRS_OBJS_PATH + if err != nil { + return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err) + } err = writeDrsObj(indexdObj, file.Oid, drsObjPath) if err != nil { return fmt.Errorf("error writing DRS object for oid %s: %v", file.Oid, err) } logger.Log("Created %s for file %s", drsObjPath, file.Name) - - // stage the object file - cmd = exec.Command("git", "add", drsObjPath) - _, err = cmd.Output() - if err != nil { - return fmt.Errorf("error adding %s to git: %v", drsObjPath, err) - } } return nil diff --git a/client/indexd.go b/client/indexd.go index ba74ab0..0f7d86c 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -31,13 +31,14 @@ type IndexDClient struct { // CLIENT METHODS // //////////////////// +// load repo-level config and return a new IndexDClient func NewIndexDClient() (ObjectStoreClient, error) { cfg, err := LoadConfig() if err != nil { return nil, err } - // get the gen3Profile and baseURL + // get the gen3Profile and endpoint profile := cfg.Gen3Profile if profile == "" { return nil, fmt.Errorf("No gen3 profile specified. Please provide a gen3Profile key in your .drsconfig") @@ -76,31 +77,19 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { // get the DRS object using the OID // FIXME: how do we not hardcode sha256 here? - records, err := cl.queryIndexdByHash("sha256", oid) - if err != nil { - myLogger.Log(fmt.Sprintf("Error getting DRS info for OID %s: %v", oid, err)) - // create failure message and send it back - return &drs.AccessURL{}, fmt.Errorf("Error retrieving DRS info: " + err.Error()) - } - - if len(records.Records) != 1 { - myLogger.Log(fmt.Sprintf("Error: expected 1 record for OID %s, got %d records", oid, len(records.Records))) - myLogger.Log(fmt.Sprintf("Records: %v", records.Records)) - return nil, fmt.Errorf("expected 1 record for OID %s, got %d records", oid, len(records.Records)) - } - indexdObj := records.Records[0] + drsObj, err := cl.GetObjectByHash("sha256", oid) // download file using the DRS object - myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, indexdObj)) + myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, drsObj)) // FIXME: generalize access ID method // naively get access ID from splitting first path into : - accessId := strings.Split(indexdObj.URLs[0], ":")[0] - myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", oid, accessId, indexdObj.FileName)) + accessId := drsObj.AccessMethods[0].AccessID + myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", oid, accessId, drsObj.Name)) // get file from indexd a := *cl.base - a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", indexdObj.Did, "access", accessId) + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", drsObj.Id, "access", accessId) myLogger.Log("using endpoint: %s\n", a.String()) @@ -142,41 +131,6 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { return &accessUrl, nil } -func DownloadSignedUrl(signedURL string, dstPath string) error { - // Download the file using the signed URL - fileResponse, err := http.Get(signedURL) - if err != nil { - return err - } - defer fileResponse.Body.Close() - - // Check if the response status is OK - if fileResponse.StatusCode != http.StatusOK { - return fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) - } - - // Create the destination directory if it doesn't exist - err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm) - if err != nil { - return err - } - - // Create the destination file - dstFile, err := os.Create(dstPath) - if err != nil { - return err - } - defer dstFile.Close() - - // Write the file content to the destination file - _, err = io.Copy(dstFile, fileResponse.Body) - if err != nil { - return err - } - - return nil -} - // RegisterFile implements ObjectStoreClient. // This function registers a file with gen3 indexd, writes the file to the bucket, // and returns the successful DRS object. @@ -244,11 +198,17 @@ func (cl *IndexDClient) RegisterFile(oid string) (*drs.DRSObject, error) { return nil, fmt.Errorf("error uploading file to bucket: %v", err) } + // if all successful, remove temp DRS object + drsPath, err := GetObjectPath(DRS_OBJS_PATH, oid) + if err == nil { + _ = os.Remove(drsPath) + } + // return return drsObj, nil } -func (cl *IndexDClient) GetDRSObject(id string) (*drs.DRSObject, error) { +func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) { a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", id) @@ -296,6 +256,8 @@ func addGen3AuthHeader(req *http.Request, profile string) error { return nil } +// given oid, uses saved indexd object +// and implements /index/index POST func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs.DRSObject, error) { // (get indexd object using drs map) indexdObj, err := DrsInfoFromOid(oid) @@ -356,7 +318,7 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. myLogger.Log("POST successful: %s", response.Status) // query and return DRS object - drsObj, err := cl.GetDRSObject(indexdObj.Did) + drsObj, err := cl.GetObject(indexdObj.Did) if err != nil { return nil, fmt.Errorf("error querying DRS ID %s: %v", drsId, err) } @@ -364,8 +326,9 @@ func (cl *IndexDClient) registerIndexdRecord(myLogger Logger, oid string) (*drs. return drsObj, nil } +// implements /index{did}?rev={rev} DELETE func (cl *IndexDClient) deleteIndexdRecord(did string) error { - // get the indexd record, can't use GetDRSObject cause the DRS object doesn't contain the rev + // get the indexd record, can't use GetObject cause the DRS object doesn't contain the rev a := *cl.base a.Path = filepath.Join(a.Path, "index", did) @@ -418,27 +381,79 @@ func (cl *IndexDClient) deleteIndexdRecord(did string) error { return nil } -func (cl *IndexDClient) queryIndexdByHash(hashType string, hash string) (ListRecords, error) { +// downloads a file to a specified path using a signed URL +func DownloadSignedUrl(signedURL string, dstPath string) error { + // Download the file using the signed URL + fileResponse, err := http.Get(signedURL) + if err != nil { + return err + } + defer fileResponse.Body.Close() + + // Check if the response status is OK + if fileResponse.StatusCode != http.StatusOK { + return fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) + } + + // Create the destination directory if it doesn't exist + err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm) + if err != nil { + return err + } + + // Create the destination file + dstFile, err := os.Create(dstPath) + if err != nil { + return err + } + defer dstFile.Close() + + // Write the file content to the destination file + _, err = io.Copy(dstFile, fileResponse.Body) + if err != nil { + return err + } + + return nil +} + +// implements /index/index?hash={hashType}:{hash} GET +func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) { // search via hash https://calypr-dev.ohsu.edu/index/index?hash=sha256:52d9baed146de4895a5c9c829e7765ad349c4124ba43ae93855dbfe20a7dd3f0 // get url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base, hashType, hash) resp, err := http.Get(url) if err != nil { - return ListRecords{}, fmt.Errorf("error querying index for hash (%s:%s): %v", hashType, hash, err) + return nil, fmt.Errorf("error querying index for hash (%s:%s): %v", hashType, hash, err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - return ListRecords{}, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err) + return nil, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err) } records := ListRecords{} err = json.Unmarshal(body, &records) if err != nil { - return ListRecords{}, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) + return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) + } + + if err != nil { + return nil, fmt.Errorf("Error getting DRS info for OID (%s:%s): %v", hashType, hash, err) } - return records, nil + if len(records.Records) > 1 { + return nil, fmt.Errorf("expected at most 1 record for OID %s:%s, got %d records", hashType, hash, len(records.Records)) + } + // if no records found, return nil to handle in caller + if len(records.Records) == 0 { + return nil, nil + } + drsId := records.Records[0].Did + + drsObj, err := cl.GetObject(drsId) + + return drsObj, nil } diff --git a/client/interface.go b/client/interface.go index d92ffbc..21d61e8 100644 --- a/client/interface.go +++ b/client/interface.go @@ -5,7 +5,11 @@ import "github.com/bmeg/git-drs/drs" type ObjectStoreClient interface { // Given a DRS string ID, retrieve the object describing it // corresponds to /ga4gh/drs/v1/objects - GetDRSObject(id string) (*drs.DRSObject, error) + GetObject(id string) (*drs.DRSObject, error) + + // given a hash, get the object describing it + // no corresponding DRS endpoint exists, so this is custom code + GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) // Put file into object storage and obtain a DRS record pointing to it // no DRS write endpoint exists, so this is custom code diff --git a/cmd/download/main.go b/cmd/download/main.go index 663c320..b4684aa 100644 --- a/cmd/download/main.go +++ b/cmd/download/main.go @@ -24,18 +24,35 @@ var Cmd = &cobra.Command{ RunE: func(cmd *cobra.Command, args []string) error { oid := args[0] - client, err := client.NewIndexDClient() + indexdClient, err := client.NewIndexDClient() if err != nil { fmt.Printf("\nerror creating indexd client: %s", err) return err } - fmt.Println("created indexd client") + // get signed url + accessUrl, err := indexdClient.GetDownloadURL(oid) + if err != nil { + return fmt.Errorf("Error downloading file for OID %s: %v", oid, err) + } + if accessUrl.URL == "" { + return fmt.Errorf("Unable to get access URL %s", oid) + } - _, err = client.GetDownloadURL(oid) + // download url to destination path or LFS objects if not specified + if dstPath == "" { + dstPath, err = client.GetObjectPath(client.LFS_OBJS_PATH, oid) + } if err != nil { - fmt.Printf("\nerror downloading file object ID %s: %s", oid, err) - return err + return fmt.Errorf("Error getting destination path for OID %s: %v", oid, err) + } + err = client.DownloadSignedUrl(accessUrl.URL, dstPath) + if err != nil { + return fmt.Errorf("Error downloading file for OID %s: %v", oid, err) + } + + if err != nil { + return fmt.Errorf("\nerror downloading file object ID %s: %s", oid, err) } fmt.Println("file downloaded") @@ -43,3 +60,7 @@ var Cmd = &cobra.Command{ return nil }, } + +func init() { + Cmd.Flags().StringVarP(&dstPath, "dst", "d", "", "Destination path to save the downloaded file") +} diff --git a/cmd/initialize/main.go b/cmd/initialize/main.go index 7dcd55c..8095e1f 100644 --- a/cmd/initialize/main.go +++ b/cmd/initialize/main.go @@ -1,11 +1,14 @@ package initialize import ( + "bufio" "fmt" "os" "os/exec" "path/filepath" + "strings" + "github.com/bmeg/git-drs/client" "github.com/spf13/cobra" "github.com/uc-cdis/gen3-client/gen3-client/jwt" ) @@ -23,6 +26,11 @@ var Cmd = &cobra.Command{ Long: "initialize hooks, config required for git-drs", Args: cobra.ExactArgs(0), RunE: func(cmd *cobra.Command, args []string) error { + // add .drs/objects to .gitignore if not already present + if err := ensureDrsObjectsIgnore(client.DRS_OBJS_PATH); err != nil { + return fmt.Errorf("Error: %v\n", err) + } + // Create .git/hooks/pre-commit file hooksDir := filepath.Join(".git", "hooks") preCommitPath := filepath.Join(hooksDir, "pre-commit") @@ -57,6 +65,7 @@ var Cmd = &cobra.Command{ fmt.Printf("[ERROR] unable to configure your gen3 profile: %v\n", err) return err } + fmt.Println("Git DRS initialized successfully!") return nil }, @@ -70,3 +79,69 @@ func init() { Cmd.Flags().StringVar(&apiEndpoint, "apiendpoint", "", "Specify the API endpoint of the data commons") Cmd.MarkFlagRequired("apiendpoint") } + +// ensureDrsObjectsIgnore ensures that ".drs/objects" is ignored in .gitignore. +// It creates the file if it doesn't exist, and adds the line if not present. +func ensureDrsObjectsIgnore(ignorePattern string) error { + const ( + gitignorePath = ".gitignore" + ) + + var found bool + + // Check if .gitignore exists + var lines []string + file, err := os.Open(gitignorePath) + if err == nil { + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + // Normalize slashes for comparison, trim spaces + if strings.TrimSpace(line) == ignorePattern { + found = true + } + lines = append(lines, line) + } + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading %s: %w", gitignorePath, err) + } + } else if os.IsNotExist(err) { + // .gitignore doesn't exist, will create it + lines = []string{} + } else { + return fmt.Errorf("could not open %s: %w", gitignorePath, err) + } + + if found { + fmt.Println(client.DRS_OBJS_PATH, "already present in .gitignore") + return nil + } + + // Add the ignore pattern (ensure a blank line before if file is not empty) + if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) != "" { + lines = append(lines, "") + } + lines = append(lines, ignorePattern) + + // Write back the file + f, err := os.OpenFile(gitignorePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) + if err != nil { + return fmt.Errorf("could not write to %s: %w", gitignorePath, err) + } + defer f.Close() + + w := bufio.NewWriter(f) + for i, l := range lines { + if i > 0 { + _, _ = w.WriteString("\n") + } + _, _ = w.WriteString(l) + } + if err := w.Flush(); err != nil { + return fmt.Errorf("error writing %s: %w", gitignorePath, err) + } + + fmt.Println("Added", client.DRS_OBJS_PATH, "to .gitignore") + return nil +} diff --git a/cmd/query/main.go b/cmd/query/main.go index 1b691c3..5696e5e 100644 --- a/cmd/query/main.go +++ b/cmd/query/main.go @@ -20,7 +20,7 @@ var Cmd = &cobra.Command{ return err } - obj, err := client.GetDRSObject(args[0]) + obj, err := client.GetObject(args[0]) if err != nil { return err } From fa6428c9a32cc9c9385be658b2574218661a7230 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Thu, 26 Jun 2025 16:11:55 -0700 Subject: [PATCH 34/51] transfer logs in .drs // update .gitignore w trailing "\n" on write --- client/logger.go | 3 ++- cmd/initialize/main.go | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/client/logger.go b/client/logger.go index 3e86385..03d81ac 100644 --- a/client/logger.go +++ b/client/logger.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "os" + "path/filepath" ) // Logger wraps a log.Logger and the file it writes to. @@ -15,7 +16,7 @@ type Logger struct { // NewLogger opens the log file and returns a Logger. func NewLogger(filename string) (*Logger, error) { if filename == "" { - filename = "transfer.log" + filename = filepath.Join(DRS_DIR, "transfer.log") } file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) diff --git a/cmd/initialize/main.go b/cmd/initialize/main.go index 8095e1f..1c578a4 100644 --- a/cmd/initialize/main.go +++ b/cmd/initialize/main.go @@ -138,6 +138,8 @@ func ensureDrsObjectsIgnore(ignorePattern string) error { } _, _ = w.WriteString(l) } + // Always end with a trailing newline + _, _ = w.WriteString("\n") if err := w.Flush(); err != nil { return fmt.Errorf("error writing %s: %w", gitignorePath, err) } From 6ce57eebcd1e5dacc3e820f3a3010f0857a653b1 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 27 Jun 2025 14:23:36 -0700 Subject: [PATCH 35/51] starter readme --- README.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..b9250a3 --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +# Git DRS + +## About + +Built off [Git LFS](https://git-lfs.com/), Git DRS allows you to store file contents outside of the Git repo such as in a gen3 bucket, while keeping a pointer to the file inside the repo. With Git DRS, data files that are traditionally too large to store in Git can be tracked along with your code in a single Git repo! And the best part: you can still use the same Git commands you know (and possibly love)! Using just a few extra command line tools, Git DRS helps consolidate your data and code into a single location. + +## Basics + +Git DRS functions within Git, so you will only need a few extra commands (`git-lfs pull`, `git-drs init`, etc) that aren't the usual Git commands to do this. Git DRS primarily plugs in the following ways: +- `git add`: during each add, Git LFS processes your file and checks in a pointer to git. +- `git commit`: before each commit, Git DRS creates a DRS object that stores the details of your file needed to push. +- `git push` / `git pull`: before each push, Git DRS handles the transfer of each committed file +- `git pull`: Git DRS pulls from the DRS server to your working directory if it doesn't already exists locally + +## Getting Started: Gen3 DRS Server + +### Dependencies + +1. Download [Git LFS](https://git-lfs.com/) (`brew install git-lfs` for Mac users) +2. Configure LFS on your machine + ``` + git lfs install --skip-smudge + ``` +3. Download credentials from your data commons + 1. Login to your data commons + 2. Click your email in the top right to go to your profile + 3. Click Create API Key -> Download JSON + 4. Make note of the path that it downloaded to +4. Download Git DRS + ``` + # build git-drs from source w/ custom gen3-client dependency + git clone --recurse-submodule https://github.com/bmeg/git-drs.git + cd git-drs + go build + + # make the executable accessible + export PATH=$PATH:$(pwd) + ``` +5. Clone an existing DRS repo. If you don't already have one set up see "Setting up your repo" + ``` + cd .. + + # clone test repo + git clone git@source.ohsu.edu:CBDS/git-drs-test-repo.git + cd git-drs-test-repo + ``` +6. Configure general acccess to your data commons + ``` + git drs init --profile --apiendpoint https://data-commons-name.com/ --cred /path/to/downloaded/credentials.json + ``` + +### Project Setup + +When you do `git drs init`, there are a couple things already set up for you... +- `.drs` directory to automatically store any background files and logs needed during execution +- Git settings to sync up the git with gen3 services +- a gen3 profile is created for you so that you can access gen3 + +In your own repo, all you need to setup is a .drsconfig file. Once you have created a Git repo, create a `.drs/config` with the following structure + +``` +{ + "gen3Profile": "", + "gen3Project": "", + "gen3Bucket": "" +} +``` +- `gen3Profile` stores the name of the profile you specified in `git drs init` (eg the `` above) +- `gen3Project` is the project ID uniquely describing the data from your project. This will be provided to you by a data commons administrator +- `gen3Bucket` is the name of the bucket that you will be using to store all your files. This will also be provided by a data commons administrator + + +### Quick Start Commands + +**Track Specific File Types** +Store all bam files as a pointer in the Git repo and store actual contents in the DRS server. This is handled by a configuration line in `.gitattributes` +``` +git lfs track "*.bam" +git add .gitattributes +``` + +**Pull Files** +Pull a single file +``` +git lfs pull -I /path/to/file +``` +Pull all non-localized files +``` +git lfs pull +``` \ No newline at end of file From e89f6523da11e607efeea76f25e4632d8f1ab590 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Tue, 1 Jul 2025 16:41:48 -0700 Subject: [PATCH 36/51] Adding a git-drs list command to list files found on DRS server --- client/indexd.go | 45 ++++++++++++++++++++++++++++ client/interface.go | 2 ++ cmd/list/main.go | 71 +++++++++++++++++++++++++++++++++++++++++++++ cmd/root.go | 2 ++ drs/object.go | 6 +++- 5 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 cmd/list/main.go diff --git a/client/indexd.go b/client/indexd.go index 0f7d86c..6c09ce1 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -238,6 +238,51 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) { return &out, nil } +func (cl *IndexDClient) ListObjects() (chan *drs.DRSObject, error) { + myLogger, err := NewLogger("") + if err != nil { + return nil, err + } + + a := *cl.base + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects") + + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + return nil, err + } + + out := make(chan *drs.DRSObject, 10) + + go func() { + defer close(out) + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + myLogger.Log("error: %s", err) + return + } + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + if err != nil { + myLogger.Log("error: %s", err) + return + } + + page := &drs.DRSPage{} + err = json.Unmarshal(body, &page) + if err != nil { + myLogger.Log("error: %s", err) + return + } + for _, elem := range page.DRSObjects { + out <- &elem + } + }() + return out, nil +} + ///////////// // HELPERS // ///////////// diff --git a/client/interface.go b/client/interface.go index 21d61e8..888e8b9 100644 --- a/client/interface.go +++ b/client/interface.go @@ -7,6 +7,8 @@ type ObjectStoreClient interface { // corresponds to /ga4gh/drs/v1/objects GetObject(id string) (*drs.DRSObject, error) + ListObjects() (chan *drs.DRSObject, error) + // given a hash, get the object describing it // no corresponding DRS endpoint exists, so this is custom code GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) diff --git a/cmd/list/main.go b/cmd/list/main.go new file mode 100644 index 0000000..b4f769f --- /dev/null +++ b/cmd/list/main.go @@ -0,0 +1,71 @@ +package list + +import ( + "encoding/json" + "fmt" + + "github.com/bmeg/git-drs/client" + "github.com/bmeg/git-drs/drs" + "github.com/spf13/cobra" +) + +var outJson = false + +var checksumPref = []string{"sha256", "md5", "etag"} + +func getStringPos(q string, a []string) int { + for i, s := range a { + if q == s { + return i + } + } + return -1 +} + +// Pick out the most preferred checksum to display +func getCheckSumStr(obj drs.DRSObject) string { + curPos := len(checksumPref) + 1 + curVal := "" + for _, e := range obj.Checksums { + c := getStringPos(e.Type, checksumPref) + if c != -1 && c < curPos { + curPos = c + curVal = e.Type + ":" + e.Checksum + } + } + return curVal +} + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "list", + Short: "List DRS entities from server", + Args: cobra.MinimumNArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + client, err := client.NewIndexDClient() + if err != nil { + return err + } + objChan, err := client.ListObjects() + if err != nil { + return err + } + for obj := range objChan { + if outJson { + out, err := json.Marshal(*obj) + if err != nil { + return err + } + fmt.Printf("%s\n", string(out)) + } else { + fmt.Printf("%s\t%15d\t%-45s\t%s\n", obj.SelfURL, obj.Size, getCheckSumStr(*obj), obj.Name) + } + + } + return nil + }, +} + +func init() { + Cmd.Flags().BoolVarP(&outJson, "json", "j", outJson, "Specify the profile to use") +} diff --git a/cmd/root.go b/cmd/root.go index 62fe02c..15185ad 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,6 +3,7 @@ package cmd import ( "github.com/bmeg/git-drs/cmd/download" "github.com/bmeg/git-drs/cmd/initialize" + "github.com/bmeg/git-drs/cmd/list" "github.com/bmeg/git-drs/cmd/precommit" "github.com/bmeg/git-drs/cmd/query" "github.com/bmeg/git-drs/cmd/transfer" @@ -25,5 +26,6 @@ func init() { RootCmd.AddCommand(precommit.Cmd) RootCmd.AddCommand(query.Cmd) RootCmd.AddCommand(transfer.Cmd) + RootCmd.AddCommand(list.Cmd) RootCmd.CompletionOptions.HiddenDefaultCmd = true } diff --git a/drs/object.go b/drs/object.go index 2199e3c..147493f 100644 --- a/drs/object.go +++ b/drs/object.go @@ -27,10 +27,14 @@ type AccessMethod struct { type Contents struct { } +type DRSPage struct { + DRSObjects []DRSObject `json:"drs_objects"` +} + type DRSObject struct { Id string `json:"id"` Name string `json:"name"` - SelfURL string `json:"self_url,omitempty"` + SelfURL string `json:"self_uri,omitempty"` Size int64 `json:"size"` CreatedTime string `json:"created_time,omitempty"` UpdatedTime string `json:"updated_time,omitempty"` From 7063aace2f00d2c80defabfe1c36bfa657c98a59 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Wed, 2 Jul 2025 12:24:52 -0700 Subject: [PATCH 37/51] Adding paging and auth token to drs list command --- client/indexd.go | 73 +++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index 6c09ce1..c97686d 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -247,37 +247,60 @@ func (cl *IndexDClient) ListObjects() (chan *drs.DRSObject, error) { a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects") - req, err := http.NewRequest("GET", a.String(), nil) - if err != nil { - return nil, err - } - out := make(chan *drs.DRSObject, 10) + LIMIT := 50 + pageNum := 0 + go func() { defer close(out) - client := &http.Client{} - response, err := client.Do(req) - if err != nil { - myLogger.Log("error: %s", err) - return - } - defer response.Body.Close() + active := true + for active { + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + myLogger.Log("error: %s", err) + return + } + q := req.URL.Query() + q.Add("limit", fmt.Sprintf("%d", LIMIT)) + q.Add("page", fmt.Sprintf("%d", pageNum)) + req.URL.RawQuery = q.Encode() + //fmt.Printf("query: %s\n", req.URL) + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + myLogger.Log("error: %s", err) + return + } - body, err := io.ReadAll(response.Body) - if err != nil { - myLogger.Log("error: %s", err) - return - } + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + myLogger.Log("error: %s", err) + return + } + defer response.Body.Close() - page := &drs.DRSPage{} - err = json.Unmarshal(body, &page) - if err != nil { - myLogger.Log("error: %s", err) - return - } - for _, elem := range page.DRSObjects { - out <- &elem + body, err := io.ReadAll(response.Body) + if err != nil { + myLogger.Log("error: %s", err) + return + } + + //fmt.Printf("%s\n", body) + + page := &drs.DRSPage{} + err = json.Unmarshal(body, &page) + if err != nil { + myLogger.Log("error: %s", err) + return + } + for _, elem := range page.DRSObjects { + out <- &elem + } + if len(page.DRSObjects) == 0 { + active = false + } + pageNum++ } }() return out, nil From 74ade9647982b56028949b54a28e536881683674 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Thu, 10 Jul 2025 13:19:26 -0700 Subject: [PATCH 38/51] first draft fixed with rbac --- client/drs-map.go | 3 --- client/indexd.go | 53 +++++++++++++++++++++++++++++++++----------- cmd/transfer/main.go | 3 ++- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/client/drs-map.go b/client/drs-map.go index 37991dc..2bafa31 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -172,9 +172,6 @@ func UpdateDrsObjects() error { logger.Log("Adding to DRS Objects: %s -> %s", file.Name, indexdObj.Did) // write drs objects to DRS_OBJS_PATH - if err != nil { - return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err) - } err = writeDrsObj(indexdObj, file.Oid, drsObjPath) if err != nil { return fmt.Errorf("error writing DRS object for oid %s: %v", file.Oid, err) diff --git a/client/indexd.go b/client/indexd.go index 0f7d86c..20b72a4 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -69,7 +69,6 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { // setup logging myLogger, err := NewLogger("") if err != nil { - // Handle error (e.g., print to stderr and exit) log.Fatalf("Failed to open log file: %v", err) } defer myLogger.Close() @@ -78,22 +77,28 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { // get the DRS object using the OID // FIXME: how do we not hardcode sha256 here? drsObj, err := cl.GetObjectByHash("sha256", oid) + if err != nil { + myLogger.Log("error getting DRS object for oid %s: %s", oid, err) + return nil, fmt.Errorf("error getting DRS object for oid %s: %v", oid, err) + } + if drsObj == nil { + myLogger.Log("no DRS object found for oid %s", oid) + return nil, fmt.Errorf("no DRS object found for oid %s", oid) + } // download file using the DRS object - myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, drsObj)) + myLogger.Log("Downloading file for OID %s from DRS object: %+v", oid, drsObj) // FIXME: generalize access ID method // naively get access ID from splitting first path into : accessId := drsObj.AccessMethods[0].AccessID myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", oid, accessId, drsObj.Name)) - // get file from indexd + // get signed url a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", drsObj.Id, "access", accessId) myLogger.Log("using endpoint: %s\n", a.String()) - - // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) if err != nil { return nil, err @@ -218,6 +223,11 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) { return nil, err } + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) + } + client := &http.Client{} response, err := client.Do(req) if err != nil { @@ -419,13 +429,34 @@ func DownloadSignedUrl(signedURL string, dstPath string) error { // implements /index/index?hash={hashType}:{hash} GET func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) { + + // TODO: remove setup logging + myLogger, err := NewLogger("") + if err != nil { + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() + // search via hash https://calypr-dev.ohsu.edu/index/index?hash=sha256:52d9baed146de4895a5c9c829e7765ad349c4124ba43ae93855dbfe20a7dd3f0 - // get - url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base, hashType, hash) - resp, err := http.Get(url) + // setup get request to indexd + url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base.String(), hashType, hash) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + myLogger.Log("GET request created for indexd: %s", url) + + err = addGen3AuthHeader(req, cl.profile) if err != nil { - return nil, fmt.Errorf("error querying index for hash (%s:%s): %v", hashType, hash, err) + return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) + } + req.Header.Set("accept", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("error querying index for hash (%s:%s): %v, %s", hashType, hash, err, url) } defer resp.Body.Close() @@ -440,10 +471,6 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) } - if err != nil { - return nil, fmt.Errorf("Error getting DRS info for OID (%s:%s): %v", hashType, hash, err) - } - if len(records.Records) > 1 { return nil, fmt.Errorf("expected at most 1 record for OID %s:%s, got %d records", hashType, hash, len(records.Records)) } diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 1866960..3437f31 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -144,10 +144,11 @@ var Cmd = &cobra.Command{ // get signed url accessUrl, err := drsClient.GetDownloadURL(downloadMsg.Oid) if err != nil { - errMsg := fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err) + errMsg := fmt.Sprintf("Error getting signed url for OID %s: %v", downloadMsg.Oid, err) myLogger.Log(errMsg) WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) } + myLogger.Log(fmt.Sprintf("Got signed URL for OID %s: %+v", downloadMsg.Oid, accessUrl)) if accessUrl.URL == "" { errMsg := fmt.Sprintf("Unable to get access URL %s", downloadMsg.Oid) myLogger.Log(errMsg) From a6c6122084a28151db9d162cec9823e79f083eae Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 15 Jul 2025 15:37:31 -0700 Subject: [PATCH 39/51] show 403 error when expired creds --- client/indexd.go | 10 +++++++++- cmd/transfer/main.go | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index 20b72a4..fcc71f1 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -453,6 +453,7 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } req.Header.Set("accept", "application/json") + // run request and do checks client := &http.Client{} resp, err := client.Do(req) if err != nil { @@ -460,23 +461,30 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } defer resp.Body.Close() + // unmarshal response body body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err) } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to query indexd for %s:%s. Error: %s, %s", hashType, hash, resp.Status, string(body)) + } + records := ListRecords{} err = json.Unmarshal(body, &records) if err != nil { return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) } + myLogger.Log("records: %+v", records) + // if one record found, return it if len(records.Records) > 1 { return nil, fmt.Errorf("expected at most 1 record for OID %s:%s, got %d records", hashType, hash, len(records.Records)) } // if no records found, return nil to handle in caller if len(records.Records) == 0 { - return nil, nil + return nil, fmt.Errorf("no records found for OID %s:%s", hashType, hash) } drsId := records.Records[0].Did diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 3437f31..594e30b 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -234,7 +234,7 @@ func WriteErrorMessage(encoder *json.Encoder, oid string, errMsg string) { Event: "complete", Oid: oid, Error: Error{ - Code: 500, + Code: 1, Message: errMsg, }, } From f104938241bf2c801a20a4e99768fe0b0d6f3fcd Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 15 Jul 2025 16:44:12 -0700 Subject: [PATCH 40/51] tidy go mod --- go.mod | 9 --------- go.sum | 58 ---------------------------------------------------------- 2 files changed, 67 deletions(-) diff --git a/go.mod b/go.mod index 7b82f43..f9b1833 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,6 @@ module github.com/bmeg/git-drs go 1.24.0 require ( - github.com/git-lfs/git-lfs/v3 v3.6.1 github.com/google/uuid v1.6.0 github.com/spf13/cobra v1.9.1 github.com/uc-cdis/gen3-client v0.0.23 @@ -21,25 +20,17 @@ require ( ) require ( - github.com/avast/retry-go v2.4.2+incompatible // indirect - github.com/git-lfs/gitobj/v2 v2.1.1 // indirect - github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect - github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect github.com/google/go-github v17.0.0+incompatible // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/hashicorp/go-version v1.4.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/leonelquinteros/gotext v1.5.0 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/rivo/uniseg v0.2.0 // indirect - github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 // indirect github.com/spf13/pflag v1.0.6 // indirect github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e // indirect golang.org/x/net v0.41.0 // indirect golang.org/x/sys v0.33.0 // indirect - golang.org/x/text v0.26.0 // indirect gopkg.in/cheggaaa/pb.v1 v1.0.28 // indirect gopkg.in/ini.v1 v1.66.3 // indirect ) diff --git a/go.sum b/go.sum index 416b00a..79e7573 100644 --- a/go.sum +++ b/go.sum @@ -1,25 +1,9 @@ -github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 h1:Kk6a4nehpJ3UuJRqlA3JxYxBZEqCeOmATOvrbT4p9RA= -github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= -github.com/avast/retry-go v2.4.2+incompatible h1:+ZjCypQT/CyP0kyJO2EcU4d/ZEJWSbP8NENI578cPmA= -github.com/avast/retry-go v2.4.2+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 h1:oempk9HjNt6rVKyKmpdnoN7XABQv3SXLWu3pxUI7Vlk= -github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430/go.mod h1:AVSs/gZKt1bOd2AhkhbS7Qh56Hv7klde22yXVbwYJhc= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= -github.com/git-lfs/git-lfs/v3 v3.6.1 h1:0RA2HzkMVl69KE5zCGY1PxqkDSbd/f/O7Du6CNkTYtY= -github.com/git-lfs/git-lfs/v3 v3.6.1/go.mod h1:1YO3nafGw2wKBR5LTZ7/LXJ7U7ELdvIGvcCBrLt6mfM= -github.com/git-lfs/gitobj/v2 v2.1.1 h1:tf/VU6zL1kxa3he+nf6FO/syX+LGkm6WGDsMpfuXV7Q= -github.com/git-lfs/gitobj/v2 v2.1.1/go.mod h1:q6aqxl6Uu3gWsip5GEKpw+7459F97er8COmU45ncAxw= -github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a h1:6pskVZacdMUL93pCpMAYnMDLjH1yDFhssPYGe32sjdk= -github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a/go.mod h1:70O4NAtvWn1jW8V8V+OKrJJYcxDLTmIozfi2fmSz5SI= -github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d5raz9t3+IzoR1i1wqxE1kZC6dY+U= -github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= -github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= -github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -30,26 +14,10 @@ github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= -github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.4.0 h1:aAQzgqIrRKRa7w75CKpbBxYsmUoPjzVm1W59ca1L0J4= github.com/hashicorp/go-version v1.4.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= -github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= -github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= -github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= -github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= -github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= -github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= -github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= -github.com/jcmturner/gokrb5/v8 v8.4.2 h1:6ZIM6b/JJN0X8UM43ZOM6Z4SJzla+a/u7scXFJzodkA= -github.com/jcmturner/gokrb5/v8 v8.4.2/go.mod h1:sb+Xq/fTY5yktf/VxLsE3wlfPqQjp0aWNYyvBVK62bc= -github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= -github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= -github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= -github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -57,8 +25,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leonelquinteros/gotext v1.5.0 h1:ODY7LzLpZWWSJdAHnzhreOr6cwLXTAmc914FOauSkBM= -github.com/leonelquinteros/gotext v1.5.0/go.mod h1:OCiUVHuhP9LGFBQ1oAmdtNCHJCiHiQA8lf4nAifHkr0= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= @@ -67,11 +33,7 @@ github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4 github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2bxbqI6DXV8bF8POAnArqvRrIyw= -github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= @@ -79,39 +41,19 @@ github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= -github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= -github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/ssgelm/cookiejarparser v1.0.1 h1:cRdXauUbOTFzTPJFaeiWbHnQ+tRGlpKKzvIK9PUekE4= -github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9VrZuz3wSlI+OEI= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e h1:IWllFTiDjjLIf2oeKxpIUmtiDV5sn71VgeQgg6vcE7k= github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e/go.mod h1:d7u6HkTYKSv5m6MCKkOQlHwaShTMl3HjqSGW3XtVhXM= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= -golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= -golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= From c9aff0fcd0bdda67f28aa5bec27b58fc6b1599f9 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 15 Jul 2025 16:49:04 -0700 Subject: [PATCH 41/51] update commit to log on failure to gen3 configure during init --- cdis-data-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdis-data-client b/cdis-data-client index 3d771f5..80a54c1 160000 --- a/cdis-data-client +++ b/cdis-data-client @@ -1 +1 @@ -Subproject commit 3d771f5ff6e5c5942c0ee5ca0c13de75356544b8 +Subproject commit 80a54c1430d2fd30a2779f301b62426213418629 From bb203d88b4538bb8039538c71fc67f895bb887c7 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Tue, 15 Jul 2025 17:02:14 -0700 Subject: [PATCH 42/51] Fixing issues related to PR --- client/indexd.go | 5 +++++ cmd/list/main.go | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index bfa52f0..003aa64 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -288,6 +288,11 @@ func (cl *IndexDClient) ListObjects() (chan *drs.DRSObject, error) { myLogger.Log("error: %s", err) return } + if response.StatusCode != http.StatusOK { + body, _ := io.ReadAll(response.Body) + myLogger.Log("network error: %s", body) + return + } defer response.Body.Close() body, err := io.ReadAll(response.Body) diff --git a/cmd/list/main.go b/cmd/list/main.go index b4f769f..d14f0b6 100644 --- a/cmd/list/main.go +++ b/cmd/list/main.go @@ -40,7 +40,7 @@ func getCheckSumStr(obj drs.DRSObject) string { var Cmd = &cobra.Command{ Use: "list", Short: "List DRS entities from server", - Args: cobra.MinimumNArgs(0), + Args: cobra.ExactArgs(0), RunE: func(cmd *cobra.Command, args []string) error { client, err := client.NewIndexDClient() if err != nil { @@ -50,6 +50,9 @@ var Cmd = &cobra.Command{ if err != nil { return err } + if !outJson { + fmt.Printf("%-55s\t%15s\t%-45s\t%s\n", "URL", "Size", "Checksum", "Name") + } for obj := range objChan { if outJson { out, err := json.Marshal(*obj) @@ -67,5 +70,5 @@ var Cmd = &cobra.Command{ } func init() { - Cmd.Flags().BoolVarP(&outJson, "json", "j", outJson, "Specify the profile to use") + Cmd.Flags().BoolVarP(&outJson, "json", "j", outJson, "Output formatted as JSON") } From bec366a63f8b7b8cb93b34c59ec860825b11672f Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 15 Jul 2025 15:37:31 -0700 Subject: [PATCH 43/51] show 403 error when expired creds --- client/indexd.go | 10 +++++++++- cmd/transfer/main.go | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index 003aa64..3e07194 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -526,6 +526,7 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } req.Header.Set("accept", "application/json") + // run request and do checks client := &http.Client{} resp, err := client.Do(req) if err != nil { @@ -533,23 +534,30 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } defer resp.Body.Close() + // unmarshal response body body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err) } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to query indexd for %s:%s. Error: %s, %s", hashType, hash, resp.Status, string(body)) + } + records := ListRecords{} err = json.Unmarshal(body, &records) if err != nil { return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) } + myLogger.Log("records: %+v", records) + // if one record found, return it if len(records.Records) > 1 { return nil, fmt.Errorf("expected at most 1 record for OID %s:%s, got %d records", hashType, hash, len(records.Records)) } // if no records found, return nil to handle in caller if len(records.Records) == 0 { - return nil, nil + return nil, fmt.Errorf("no records found for OID %s:%s", hashType, hash) } drsId := records.Records[0].Did diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 3437f31..594e30b 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -234,7 +234,7 @@ func WriteErrorMessage(encoder *json.Encoder, oid string, errMsg string) { Event: "complete", Oid: oid, Error: Error{ - Code: 500, + Code: 1, Message: errMsg, }, } From 416395c54bc3ae272ae4bcacce63891eff681822 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 16 Jul 2025 12:14:23 -0700 Subject: [PATCH 44/51] update gen3-client module dependency --- cdis-data-client | 2 +- cmd/list/main.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cdis-data-client b/cdis-data-client index 3d771f5..80a54c1 160000 --- a/cdis-data-client +++ b/cdis-data-client @@ -1 +1 @@ -Subproject commit 3d771f5ff6e5c5942c0ee5ca0c13de75356544b8 +Subproject commit 80a54c1430d2fd30a2779f301b62426213418629 diff --git a/cmd/list/main.go b/cmd/list/main.go index d14f0b6..acce5e1 100644 --- a/cmd/list/main.go +++ b/cmd/list/main.go @@ -51,7 +51,7 @@ var Cmd = &cobra.Command{ return err } if !outJson { - fmt.Printf("%-55s\t%15s\t%-45s\t%s\n", "URL", "Size", "Checksum", "Name") + fmt.Printf("%-55s\t%-15s\t%-75s\t%s\n", "URL", "Size", "Checksum", "Name") } for obj := range objChan { if outJson { @@ -61,7 +61,7 @@ var Cmd = &cobra.Command{ } fmt.Printf("%s\n", string(out)) } else { - fmt.Printf("%s\t%15d\t%-45s\t%s\n", obj.SelfURL, obj.Size, getCheckSumStr(*obj), obj.Name) + fmt.Printf("%s\t%-15d\t%-75s\t%s\n", obj.SelfURL, obj.Size, getCheckSumStr(*obj), obj.Name) } } From 7baafa9fefec01e1cf8fef4b2cf3599c8152088a Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 16 Jul 2025 16:41:45 -0700 Subject: [PATCH 45/51] caller handles no records found --- client/indexd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/indexd.go b/client/indexd.go index fcc71f1..8dd58e2 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -484,7 +484,7 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } // if no records found, return nil to handle in caller if len(records.Records) == 0 { - return nil, fmt.Errorf("no records found for OID %s:%s", hashType, hash) + return nil, nil } drsId := records.Records[0].Did From 8634a3255915f8970802d0ec8194f7636fb1bd02 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Wed, 16 Jul 2025 16:41:45 -0700 Subject: [PATCH 46/51] caller handles no records found --- client/indexd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/indexd.go b/client/indexd.go index 3e07194..1b76a6a 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -557,7 +557,7 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } // if no records found, return nil to handle in caller if len(records.Records) == 0 { - return nil, fmt.Errorf("no records found for OID %s:%s", hashType, hash) + return nil, nil } drsId := records.Records[0].Did From 33086e2c2c288735ba23de8dd17a09fa898fa344 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 18 Jul 2025 12:32:40 -0700 Subject: [PATCH 47/51] ensure indexd passes back errors in updateDrsObjects --- client/drs-map.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client/drs-map.go b/client/drs-map.go index 2bafa31..e3fb92b 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -49,6 +49,9 @@ func UpdateDrsObjects() error { // init indexd client indexdClient, err := NewIndexDClient() + if err != nil { + return fmt.Errorf("error initializing indexd with credentials: %v", err) + } // get all LFS files' info using json // TODO: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) From 1d0689442303e4c55643df4b8052bde49323ff61 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Fri, 18 Jul 2025 17:04:57 -0700 Subject: [PATCH 48/51] Adding a git-drs list command to list files found on DRS server (#19) * Adding a git-drs list command to list files found on DRS server * Adding paging and auth token to drs list command * Fixing issues related to PR * show 403 error when expired creds * update gen3-client module dependency * create structure to return errors within channel --------- Co-authored-by: quinnwai Co-authored-by: Liam Beckman --- .github/dependabot.yml | 12 +++++ .github/workflows/build.yaml | 29 ++++++++++++ .github/workflows/release.yaml | 34 ++++++++++++++ .goreleaser.yaml | 22 +++++++++ Makefile | 69 +++++++++++++++++++++++++++ client/indexd.go | 85 +++++++++++++++++++++++++++++++++- client/interface.go | 2 + cmd/list/main.go | 81 ++++++++++++++++++++++++++++++++ cmd/root.go | 5 ++ cmd/version/main.go | 18 +++++++ drs/object.go | 11 ++++- version/version.go | 35 ++++++++++++++ 12 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/release.yaml create mode 100644 .goreleaser.yaml create mode 100644 Makefile create mode 100644 cmd/list/main.go create mode 100644 cmd/version/main.go create mode 100644 version/version.go diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..62adb5c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "daily" + target-branch: "main" + groups: + dependabot: + patterns: + - "*" + diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..0c65b0f --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,29 @@ +name: Build + +on: + push: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Go 1.x + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Build Git-DRS + run: make build + + - name: Upload Git-DRS binary as artifact + uses: actions/upload-artifact@v4 + with: + name: git-drs + path: git-drs diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..c23cf06 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,34 @@ +name: Release + +on: + push: + tags: + - '*' + workflow_dispatch: + +permissions: + contents: write + +jobs: + goreleaser: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + - + name: Set up Go + uses: actions/setup-go@v5 + - + name: Run GoReleaser + uses: goreleaser/goreleaser-action@v6 + with: + # either 'goreleaser' (default) or 'goreleaser-pro' + distribution: goreleaser + # 'latest', 'nightly', or a semver + version: 'latest' + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..20f6900 --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,22 @@ +builds: + - binary: git-drs + goos: + - darwin + - linux + goarch: + - amd64 + - arm64 + env: + - CGO_ENABLED=0 + +dist: build/release + +release: + draft: false + +env_files: + github_token: .github_token + +archives: + - format: tar.gz + name_template: "{{.ProjectName}}-{{.Os}}-{{.Arch}}-{{.Version}}" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f07e54f --- /dev/null +++ b/Makefile @@ -0,0 +1,69 @@ +# The commands used in this Makefile expect to be interpreted by bash. +# Adapted from Funnel's Makefile: +# https://github.com/ohsu-comp-bio/funnel/blob/master/Makefile + +SHELL := /bin/bash + +TESTS=$(shell go list ./... | grep -v /vendor/) + +git_commit := $(shell git rev-parse --short HEAD) +git_branch := $(shell git symbolic-ref -q --short HEAD) +git_upstream := $(shell git config --get remote.origin.url) +export GIT_BRANCH = $(git_branch) +export GIT_UPSTREAM = $(git_upstream) + +# Determine if the current commit has a tag +git_tag := $(shell git describe --tags --exact-match --abbrev=0 2>/dev/null) + +ifeq ($(git_tag),) + version := unknown +else + version := $(git_tag) +endif + +VERSION_LDFLAGS=\ + -X "github.com/bmeg/git-drs/version.BuildDate=$(shell date)" \ + -X "github.com/bmeg/git-drs/version.GitCommit=$(git_commit)" \ + -X "github.com/bmeg/git-drs/version.GitBranch=$(git_branch)" \ + -X "github.com/bmeg/git-drs/version.GitUpstream=$(git_upstream)" \ + -X "github.com/bmeg/git-drs/version.Version=$(version)" + +export CGO_ENABLED=0 + +# Build the code +install: + @go install -ldflags '$(VERSION_LDFLAGS)' . + +# Build the code +build: + @go build -ldflags '$(VERSION_LDFLAGS)' -buildvcs=false . + +lint-depends: + go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.50.1 + go install golang.org/x/tools/cmd/goimports + +# Run code style and other checks +lint: + @golangci-lint run --timeout 3m --disable-all \ + --enable=vet \ + --enable=golint \ + --enable=gofmt \ + --enable=goimports \ + --enable=misspell \ + ./... + +# Run all tests +test: + @go test $(TESTS) + +test-verbose: + @go test -v $(TESTS) + +# Make everything usually needed to prepare for a pull request +full: proto install tidy lint test website webdash + +# Remove build/development files. +clean: + @rm -rf ./bin ./pkg ./test_tmp ./build ./buildtools + +.PHONY: proto proto-lint website docker webdash build debug diff --git a/client/indexd.go b/client/indexd.go index 8dd58e2..6a7bca1 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -248,6 +248,89 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) { return &out, nil } +func (cl *IndexDClient) ListObjects() (chan drs.DRSObjectResult, error) { + myLogger, err := NewLogger("") + if err != nil { + return nil, err + } + myLogger.Log("Getting DRS objects from indexd") + + a := *cl.base + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects") + + out := make(chan drs.DRSObjectResult, 10) + + LIMIT := 50 + pageNum := 0 + + go func() { + defer close(out) + active := true + for active { + // setup request + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + + q := req.URL.Query() + q.Add("limit", fmt.Sprintf("%d", LIMIT)) + q.Add("page", fmt.Sprintf("%d", pageNum)) + req.URL.RawQuery = q.Encode() + + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + + // execute request with error checking + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + if response.StatusCode != http.StatusOK { + myLogger.Log("%d: check that your credentials are valid \nfull message: %s", response.StatusCode, body) + out <- drs.DRSObjectResult{Error: fmt.Errorf("%d: check your credentials are valid, \nfull message: %s", response.StatusCode, body)} + return + } + + // return page of DRS objects + page := &drs.DRSPage{} + err = json.Unmarshal(body, &page) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + for _, elem := range page.DRSObjects { + out <- drs.DRSObjectResult{Object: &elem} + } + if len(page.DRSObjects) == 0 { + active = false + } + pageNum++ + } + + myLogger.Log("total pages retrieved: %d", pageNum) + }() + return out, nil +} + ///////////// // HELPERS // ///////////// @@ -484,7 +567,7 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } // if no records found, return nil to handle in caller if len(records.Records) == 0 { - return nil, nil + return nil, fmt.Errorf("no records found for OID %s:%s", hashType, hash) } drsId := records.Records[0].Did diff --git a/client/interface.go b/client/interface.go index 21d61e8..26780ef 100644 --- a/client/interface.go +++ b/client/interface.go @@ -7,6 +7,8 @@ type ObjectStoreClient interface { // corresponds to /ga4gh/drs/v1/objects GetObject(id string) (*drs.DRSObject, error) + ListObjects() (chan drs.DRSObjectResult, error) + // given a hash, get the object describing it // no corresponding DRS endpoint exists, so this is custom code GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) diff --git a/cmd/list/main.go b/cmd/list/main.go new file mode 100644 index 0000000..1804b0e --- /dev/null +++ b/cmd/list/main.go @@ -0,0 +1,81 @@ +package list + +import ( + "encoding/json" + "fmt" + + "github.com/bmeg/git-drs/client" + "github.com/bmeg/git-drs/drs" + "github.com/spf13/cobra" +) + +var outJson = false + +var checksumPref = []string{"sha256", "md5", "etag"} + +func getStringPos(q string, a []string) int { + for i, s := range a { + if q == s { + return i + } + } + return -1 +} + +// Pick out the most preferred checksum to display +func getCheckSumStr(obj drs.DRSObject) string { + curPos := len(checksumPref) + 1 + curVal := "" + for _, e := range obj.Checksums { + c := getStringPos(e.Type, checksumPref) + if c != -1 && c < curPos { + curPos = c + curVal = e.Type + ":" + e.Checksum + } + } + return curVal +} + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "list", + Short: "List DRS entities from server", + Args: cobra.ExactArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + + // setup + client, err := client.NewIndexDClient() + if err != nil { + return err + } + objChan, err := client.ListObjects() + if err != nil { + return err + } + if !outJson { + fmt.Printf("%-55s\t%-15s\t%-75s\t%s\n", "URI", "Size", "Checksum", "Name") + } + + // for each result, check for error and print + for objResult := range objChan { + if objResult.Error != nil { + return objResult.Error + } + obj := objResult.Object + if outJson { + out, err := json.Marshal(*obj) + if err != nil { + return err + } + fmt.Printf("%s\n", string(out)) + } else { + fmt.Printf("%s\t%-15d\t%-75s\t%s\n", obj.SelfURI, obj.Size, getCheckSumStr(*obj), obj.Name) + } + } + return nil + }, +} + +func init() { + Cmd.Flags().BoolVarP(&outJson, "json", "j", outJson, "Output formatted as JSON") +} diff --git a/cmd/root.go b/cmd/root.go index 62fe02c..9efdfbe 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,9 +3,11 @@ package cmd import ( "github.com/bmeg/git-drs/cmd/download" "github.com/bmeg/git-drs/cmd/initialize" + "github.com/bmeg/git-drs/cmd/list" "github.com/bmeg/git-drs/cmd/precommit" "github.com/bmeg/git-drs/cmd/query" "github.com/bmeg/git-drs/cmd/transfer" + "github.com/bmeg/git-drs/cmd/version" "github.com/spf13/cobra" ) @@ -25,5 +27,8 @@ func init() { RootCmd.AddCommand(precommit.Cmd) RootCmd.AddCommand(query.Cmd) RootCmd.AddCommand(transfer.Cmd) + RootCmd.AddCommand(list.Cmd) + RootCmd.AddCommand(version.Cmd) RootCmd.CompletionOptions.HiddenDefaultCmd = true + RootCmd.SilenceUsage = true } diff --git a/cmd/version/main.go b/cmd/version/main.go new file mode 100644 index 0000000..3dde4dd --- /dev/null +++ b/cmd/version/main.go @@ -0,0 +1,18 @@ +package version + +import ( + "fmt" + + "github.com/bmeg/git-drs/version" + "github.com/spf13/cobra" +) + +// Cmd represents the "version" command +var Cmd = &cobra.Command{ + Use: "version", + Short: "Get version", + Long: ``, + Run: func(cmd *cobra.Command, args []string) { + fmt.Println(version.String()) + }, +} diff --git a/drs/object.go b/drs/object.go index 2199e3c..43b27b9 100644 --- a/drs/object.go +++ b/drs/object.go @@ -27,10 +27,19 @@ type AccessMethod struct { type Contents struct { } +type DRSPage struct { + DRSObjects []DRSObject `json:"drs_objects"` +} + +type DRSObjectResult struct { + Object *DRSObject + Error error +} + type DRSObject struct { Id string `json:"id"` Name string `json:"name"` - SelfURL string `json:"self_url,omitempty"` + SelfURI string `json:"self_uri,omitempty"` Size int64 `json:"size"` CreatedTime string `json:"created_time,omitempty"` UpdatedTime string `json:"updated_time,omitempty"` diff --git a/version/version.go b/version/version.go new file mode 100644 index 0000000..ae0dc1f --- /dev/null +++ b/version/version.go @@ -0,0 +1,35 @@ +// Package version reports the Git-DRS version. +package version + +import "fmt" + +// Build and version details +var ( + GitCommit = "" + GitBranch = "" + GitUpstream = "" + BuildDate = "" + Version = "" +) + +var tpl = `git commit: %s +git branch: %s +git upstream: %s +build date: %s +version: %s` + +// String formats a string with version details. +func String() string { + return fmt.Sprintf(tpl, GitCommit, GitBranch, GitUpstream, BuildDate, Version) +} + +// LogFields logs build and version information to the given logger. +func LogFields() []interface{} { + return []interface{}{ + "GitCommit", GitCommit, + "GitBranch", GitBranch, + "GitUpstream", GitUpstream, + "BuildDate", BuildDate, + "Version", Version, + } +} From fc7c33e0b02fdf546ceb78830845df25e200449f Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 18 Jul 2025 17:16:03 -0700 Subject: [PATCH 49/51] fix comment --- client/indexd.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/client/indexd.go b/client/indexd.go index 6a7bca1..330817a 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -513,7 +513,7 @@ func DownloadSignedUrl(signedURL string, dstPath string) error { // implements /index/index?hash={hashType}:{hash} GET func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) { - // TODO: remove setup logging + // setup logging myLogger, err := NewLogger("") if err != nil { log.Fatalf("Failed to open log file: %v", err) @@ -559,7 +559,6 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO if err != nil { return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) } - myLogger.Log("records: %+v", records) // if one record found, return it if len(records.Records) > 1 { From 18f2331257746f78247bc8d2569deccf1aa4fc0d Mon Sep 17 00:00:00 2001 From: quinnwai Date: Fri, 18 Jul 2025 17:22:05 -0700 Subject: [PATCH 50/51] pass in caller handling no records found --- client/indexd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/indexd.go b/client/indexd.go index 330817a..408c1b7 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -566,7 +566,7 @@ func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSO } // if no records found, return nil to handle in caller if len(records.Records) == 0 { - return nil, fmt.Errorf("no records found for OID %s:%s", hashType, hash) + return nil, nil } drsId := records.Records[0].Did From 9ed0533ebd32c75a6d8f58608d69a62ef043acfd Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 22 Jul 2025 16:18:58 -0700 Subject: [PATCH 51/51] readme changes --- README.md | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b9250a3..1e91e13 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ In your own repo, all you need to setup is a .drsconfig file. Once you have crea ### Quick Start Commands **Track Specific File Types** -Store all bam files as a pointer in the Git repo and store actual contents in the DRS server. This is handled by a configuration line in `.gitattributes` +Store all bam files as a pointer in the Git repo and store actual contents in the DRS server. This is handled by a configuration line in `.gitattributes`. ``` git lfs track "*.bam" git add .gitattributes @@ -87,4 +87,28 @@ git lfs pull -I /path/to/file Pull all non-localized files ``` git lfs pull -``` \ No newline at end of file +``` + + +### When to Use Git vs Git LFS vs Git DRS +The goal of Git DRS is to maximize integration with the Git workflow using a minimal amount of extra tooling. That being said, sometimes `git lfs` commands or `git drs` commands will have to be run outside of the Git workflow. Here's some advice on when to use each of the three... +- **Git DRS**: Only used for initialization of your local repo! The rest of Git DRS is triggered automatically. +- **Git LFS**: Used to interact with files that are tracked by LFS. Examples include + - `git lfs track` to track files whose contents are stored outside of the Git repo + - `git lfs ls-files` to get a list of LFS files that LFS tracks + - `git lfs pull` to pull a file whose contents exist on a server outside of the Git repo. +- **Git**: Everything else! (eg adding/committing files, pushing files, cloning repos, and checking out different commits) + +### Troubleshooting + +To see more logs and errors, see the log files in the `.drs` directory. + +## Implementation Details + +### Adding new files +When new files are added, a [precommit hook](https://git-scm.com/book/ms/v2/Customizing-Git-Git-Hooks#:~:text=The%20pre%2Dcommit,on%20new%20methods.) is run which triggers `git drs precommit`. This takes all of the LFS files that have been staged (ie `git add`ed) and creates DRS records for them. Those get used later during a push to register these new files in the DRS server. DRS objects are only created during this pre-commit if they have been staged +and don't already exist on the DRS server. + +### File transfers + +In order to push file contents to a different system, Git DRS makes use of [custom transfers](https://github.com/git-lfs/git-lfs/blob/main/docs/custom-transfers.md). These custom transfer are how Git LFS sends information to Git DRS to automatically update the server, passing in the files that have been changed for every each commit that needs to be pushed.. For instance,in the gen3 custom transfer client, we add a indexd record to the DRS server and upload the file to a gen3-registered bucket. \ No newline at end of file