diff --git a/README.md b/README.md index 2b8c87c..5423973 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,10 @@ When creating a repo from scratch, make sure to create a configuration file at ### Quick Start When in doubt, use the `--help` flag to get more info about the commands +======= #### Track a Specific File Type Store all bam files as a pointer in the Git repo and store actual contents in the DRS server. This is handled by a configuration line in `.gitattributes` + ``` git lfs track "*.bam" git add .gitattributes @@ -111,7 +113,25 @@ git lfs pull -I "*.bam" git lfs pull ``` +### When to Use Git vs Git LFS vs Git DRS +The goal of Git DRS is to maximize integration with the Git workflow using a minimal amount of extra tooling. That being said, sometimes `git lfs` commands or `git drs` commands will have to be run outside of the Git workflow. Here's some advice on when to use each of the three... +- **Git DRS**: Only used for initialization of your local repo! The rest of Git DRS is triggered automatically. +- **Git LFS**: Used to interact with files that are tracked by LFS. Examples include + - `git lfs track` to track files whose contents are stored outside of the Git repo + - `git lfs ls-files` to get a list of LFS files that LFS tracks + - `git lfs pull` to pull a file whose contents exist on a server outside of the Git repo. +- **Git**: Everything else! (eg adding/committing files, pushing files, cloning repos, and checking out different commits) ### Troubleshooting -- To see the logs for a given file transfer, navigate to `.drs` and view the log files +To see more logs and errors, see the log files in the `.drs` directory. + +## Implementation Details + +### Adding new files +When new files are added, a [precommit hook](https://git-scm.com/book/ms/v2/Customizing-Git-Git-Hooks#:~:text=The%20pre%2Dcommit,on%20new%20methods.) is run which triggers `git drs precommit`. This takes all of the LFS files that have been staged (ie `git add`ed) and creates DRS records for them. Those get used later during a push to register these new files in the DRS server. DRS objects are only created during this pre-commit if they have been staged +and don't already exist on the DRS server. + +### File transfers + +In order to push file contents to a different system, Git DRS makes use of [custom transfers](https://github.com/git-lfs/git-lfs/blob/main/docs/custom-transfers.md). These custom transfer are how Git LFS sends information to Git DRS to automatically update the server, passing in the files that have been changed for every each commit that needs to be pushed.. For instance,in the gen3 custom transfer client, we add a indexd record to the DRS server and upload the file to a gen3-registered bucket. diff --git a/cdis-data-client b/cdis-data-client index 3d771f5..80a54c1 160000 --- a/cdis-data-client +++ b/cdis-data-client @@ -1 +1 @@ -Subproject commit 3d771f5ff6e5c5942c0ee5ca0c13de75356544b8 +Subproject commit 80a54c1430d2fd30a2779f301b62426213418629 diff --git a/client/drs-map.go b/client/drs-map.go index 37991dc..e3fb92b 100644 --- a/client/drs-map.go +++ b/client/drs-map.go @@ -49,6 +49,9 @@ func UpdateDrsObjects() error { // init indexd client indexdClient, err := NewIndexDClient() + if err != nil { + return fmt.Errorf("error initializing indexd with credentials: %v", err) + } // get all LFS files' info using json // TODO: use git-lfs internally instead of exec? (eg git.GetTrackedFiles) @@ -172,9 +175,6 @@ func UpdateDrsObjects() error { logger.Log("Adding to DRS Objects: %s -> %s", file.Name, indexdObj.Did) // write drs objects to DRS_OBJS_PATH - if err != nil { - return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err) - } err = writeDrsObj(indexdObj, file.Oid, drsObjPath) if err != nil { return fmt.Errorf("error writing DRS object for oid %s: %v", file.Oid, err) diff --git a/client/indexd.go b/client/indexd.go index 0f7d86c..408c1b7 100644 --- a/client/indexd.go +++ b/client/indexd.go @@ -69,7 +69,6 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { // setup logging myLogger, err := NewLogger("") if err != nil { - // Handle error (e.g., print to stderr and exit) log.Fatalf("Failed to open log file: %v", err) } defer myLogger.Close() @@ -78,22 +77,28 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) { // get the DRS object using the OID // FIXME: how do we not hardcode sha256 here? drsObj, err := cl.GetObjectByHash("sha256", oid) + if err != nil { + myLogger.Log("error getting DRS object for oid %s: %s", oid, err) + return nil, fmt.Errorf("error getting DRS object for oid %s: %v", oid, err) + } + if drsObj == nil { + myLogger.Log("no DRS object found for oid %s", oid) + return nil, fmt.Errorf("no DRS object found for oid %s", oid) + } // download file using the DRS object - myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, drsObj)) + myLogger.Log("Downloading file for OID %s from DRS object: %+v", oid, drsObj) // FIXME: generalize access ID method // naively get access ID from splitting first path into : accessId := drsObj.AccessMethods[0].AccessID myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", oid, accessId, drsObj.Name)) - // get file from indexd + // get signed url a := *cl.base a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", drsObj.Id, "access", accessId) myLogger.Log("using endpoint: %s\n", a.String()) - - // unmarshal response req, err := http.NewRequest("GET", a.String(), nil) if err != nil { return nil, err @@ -218,6 +223,11 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) { return nil, err } + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) + } + client := &http.Client{} response, err := client.Do(req) if err != nil { @@ -238,6 +248,89 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) { return &out, nil } +func (cl *IndexDClient) ListObjects() (chan drs.DRSObjectResult, error) { + myLogger, err := NewLogger("") + if err != nil { + return nil, err + } + myLogger.Log("Getting DRS objects from indexd") + + a := *cl.base + a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects") + + out := make(chan drs.DRSObjectResult, 10) + + LIMIT := 50 + pageNum := 0 + + go func() { + defer close(out) + active := true + for active { + // setup request + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + + q := req.URL.Query() + q.Add("limit", fmt.Sprintf("%d", LIMIT)) + q.Add("page", fmt.Sprintf("%d", pageNum)) + req.URL.RawQuery = q.Encode() + + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + + // execute request with error checking + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + if response.StatusCode != http.StatusOK { + myLogger.Log("%d: check that your credentials are valid \nfull message: %s", response.StatusCode, body) + out <- drs.DRSObjectResult{Error: fmt.Errorf("%d: check your credentials are valid, \nfull message: %s", response.StatusCode, body)} + return + } + + // return page of DRS objects + page := &drs.DRSPage{} + err = json.Unmarshal(body, &page) + if err != nil { + myLogger.Log("error: %s", err) + out <- drs.DRSObjectResult{Error: err} + return + } + for _, elem := range page.DRSObjects { + out <- drs.DRSObjectResult{Object: &elem} + } + if len(page.DRSObjects) == 0 { + active = false + } + pageNum++ + } + + myLogger.Log("total pages retrieved: %d", pageNum) + }() + return out, nil +} + ///////////// // HELPERS // ///////////// @@ -419,31 +512,55 @@ func DownloadSignedUrl(signedURL string, dstPath string) error { // implements /index/index?hash={hashType}:{hash} GET func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) { + + // setup logging + myLogger, err := NewLogger("") + if err != nil { + log.Fatalf("Failed to open log file: %v", err) + } + defer myLogger.Close() + // search via hash https://calypr-dev.ohsu.edu/index/index?hash=sha256:52d9baed146de4895a5c9c829e7765ad349c4124ba43ae93855dbfe20a7dd3f0 - // get - url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base, hashType, hash) - resp, err := http.Get(url) + // setup get request to indexd + url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base.String(), hashType, hash) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + myLogger.Log("GET request created for indexd: %s", url) + + err = addGen3AuthHeader(req, cl.profile) + if err != nil { + return nil, fmt.Errorf("error adding Gen3 auth header: %v", err) + } + req.Header.Set("accept", "application/json") + + // run request and do checks + client := &http.Client{} + resp, err := client.Do(req) if err != nil { - return nil, fmt.Errorf("error querying index for hash (%s:%s): %v", hashType, hash, err) + return nil, fmt.Errorf("error querying index for hash (%s:%s): %v, %s", hashType, hash, err, url) } defer resp.Body.Close() + // unmarshal response body body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err) } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to query indexd for %s:%s. Error: %s, %s", hashType, hash, resp.Status, string(body)) + } + records := ListRecords{} err = json.Unmarshal(body, &records) if err != nil { return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err) } - if err != nil { - return nil, fmt.Errorf("Error getting DRS info for OID (%s:%s): %v", hashType, hash, err) - } - + // if one record found, return it if len(records.Records) > 1 { return nil, fmt.Errorf("expected at most 1 record for OID %s:%s, got %d records", hashType, hash, len(records.Records)) } diff --git a/client/interface.go b/client/interface.go index 21d61e8..26780ef 100644 --- a/client/interface.go +++ b/client/interface.go @@ -7,6 +7,8 @@ type ObjectStoreClient interface { // corresponds to /ga4gh/drs/v1/objects GetObject(id string) (*drs.DRSObject, error) + ListObjects() (chan drs.DRSObjectResult, error) + // given a hash, get the object describing it // no corresponding DRS endpoint exists, so this is custom code GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) diff --git a/cmd/list/main.go b/cmd/list/main.go new file mode 100644 index 0000000..1804b0e --- /dev/null +++ b/cmd/list/main.go @@ -0,0 +1,81 @@ +package list + +import ( + "encoding/json" + "fmt" + + "github.com/bmeg/git-drs/client" + "github.com/bmeg/git-drs/drs" + "github.com/spf13/cobra" +) + +var outJson = false + +var checksumPref = []string{"sha256", "md5", "etag"} + +func getStringPos(q string, a []string) int { + for i, s := range a { + if q == s { + return i + } + } + return -1 +} + +// Pick out the most preferred checksum to display +func getCheckSumStr(obj drs.DRSObject) string { + curPos := len(checksumPref) + 1 + curVal := "" + for _, e := range obj.Checksums { + c := getStringPos(e.Type, checksumPref) + if c != -1 && c < curPos { + curPos = c + curVal = e.Type + ":" + e.Checksum + } + } + return curVal +} + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "list", + Short: "List DRS entities from server", + Args: cobra.ExactArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + + // setup + client, err := client.NewIndexDClient() + if err != nil { + return err + } + objChan, err := client.ListObjects() + if err != nil { + return err + } + if !outJson { + fmt.Printf("%-55s\t%-15s\t%-75s\t%s\n", "URI", "Size", "Checksum", "Name") + } + + // for each result, check for error and print + for objResult := range objChan { + if objResult.Error != nil { + return objResult.Error + } + obj := objResult.Object + if outJson { + out, err := json.Marshal(*obj) + if err != nil { + return err + } + fmt.Printf("%s\n", string(out)) + } else { + fmt.Printf("%s\t%-15d\t%-75s\t%s\n", obj.SelfURI, obj.Size, getCheckSumStr(*obj), obj.Name) + } + } + return nil + }, +} + +func init() { + Cmd.Flags().BoolVarP(&outJson, "json", "j", outJson, "Output formatted as JSON") +} diff --git a/cmd/root.go b/cmd/root.go index fd32fd9..9efdfbe 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,6 +3,7 @@ package cmd import ( "github.com/bmeg/git-drs/cmd/download" "github.com/bmeg/git-drs/cmd/initialize" + "github.com/bmeg/git-drs/cmd/list" "github.com/bmeg/git-drs/cmd/precommit" "github.com/bmeg/git-drs/cmd/query" "github.com/bmeg/git-drs/cmd/transfer" @@ -26,6 +27,8 @@ func init() { RootCmd.AddCommand(precommit.Cmd) RootCmd.AddCommand(query.Cmd) RootCmd.AddCommand(transfer.Cmd) + RootCmd.AddCommand(list.Cmd) RootCmd.AddCommand(version.Cmd) RootCmd.CompletionOptions.HiddenDefaultCmd = true + RootCmd.SilenceUsage = true } diff --git a/cmd/transfer/main.go b/cmd/transfer/main.go index 1866960..594e30b 100644 --- a/cmd/transfer/main.go +++ b/cmd/transfer/main.go @@ -144,10 +144,11 @@ var Cmd = &cobra.Command{ // get signed url accessUrl, err := drsClient.GetDownloadURL(downloadMsg.Oid) if err != nil { - errMsg := fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err) + errMsg := fmt.Sprintf("Error getting signed url for OID %s: %v", downloadMsg.Oid, err) myLogger.Log(errMsg) WriteErrorMessage(encoder, downloadMsg.Oid, errMsg) } + myLogger.Log(fmt.Sprintf("Got signed URL for OID %s: %+v", downloadMsg.Oid, accessUrl)) if accessUrl.URL == "" { errMsg := fmt.Sprintf("Unable to get access URL %s", downloadMsg.Oid) myLogger.Log(errMsg) @@ -233,7 +234,7 @@ func WriteErrorMessage(encoder *json.Encoder, oid string, errMsg string) { Event: "complete", Oid: oid, Error: Error{ - Code: 500, + Code: 1, Message: errMsg, }, } diff --git a/drs/object.go b/drs/object.go index 2199e3c..43b27b9 100644 --- a/drs/object.go +++ b/drs/object.go @@ -27,10 +27,19 @@ type AccessMethod struct { type Contents struct { } +type DRSPage struct { + DRSObjects []DRSObject `json:"drs_objects"` +} + +type DRSObjectResult struct { + Object *DRSObject + Error error +} + type DRSObject struct { Id string `json:"id"` Name string `json:"name"` - SelfURL string `json:"self_url,omitempty"` + SelfURI string `json:"self_uri,omitempty"` Size int64 `json:"size"` CreatedTime string `json:"created_time,omitempty"` UpdatedTime string `json:"updated_time,omitempty"`