Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
2960410
Experimenting with integrating patterns from git-lfs
kellrott May 7, 2025
6e251ce
Adding DRS query test to code
kellrott May 8, 2025
ab2437f
Starting to outline the DRS/indexd client support
kellrott May 12, 2025
28179ff
1st draft DRS query and download, make sure to setup .drsconfig
quinnwai May 14, 2025
e6fb636
make DownloadFile more loosely coupled
quinnwai May 15, 2025
08425c3
drafted README requirements
quinnwai May 19, 2025
9493117
typos
quinnwai May 19, 2025
5912b71
clarify requirements, provide git lfs vs git drs comparison
quinnwai May 20, 2025
66a370f
1st draft: create hash -> drs map w/ precommit
quinnwai Jun 9, 2025
5a50247
use uuid
quinnwai Jun 9, 2025
a537b3f
working draft for indexd writing
quinnwai Jun 12, 2025
6b512a8
parse gen3 project info from config, drafted upload-single refactor s…
quinnwai Jun 16, 2025
46749c2
successful push using oid (sha) as filename
quinnwai Jun 16, 2025
d147178
improve download to localize to .git/lfs/objects
quinnwai Jun 16, 2025
50cdbdd
creation date stub
quinnwai Jun 17, 2025
bfc5713
update gen3-client location to be within directory
quinnwai Jun 17, 2025
c185674
logs
quinnwai Jun 17, 2025
be629da
fix build warning
quinnwai Jun 18, 2025
a1755fc
add cdis-data-client submodule
quinnwai Jun 18, 2025
7b89b86
refactor precommit and transfer into cli
quinnwai Jun 18, 2025
7b841dd
implement init to create pre-commit hook and git config for custom tr…
quinnwai Jun 19, 2025
a49db2b
update branch for submodules
quinnwai Jun 19, 2025
fa2249a
enable init to pass in the right keys
quinnwai Jun 19, 2025
0219557
delete indexd record if drs object registration (ie file upload) fails
quinnwai Jun 20, 2025
5a39eb6
first pass refactor drs map into separate objects in to .drs directory
quinnwai Jun 20, 2025
7c1231d
fully deprecate drs-map usage in favor of .drs/objects
quinnwai Jun 20, 2025
95147c7
update only staged files // delete final drsmap usages // revert use …
quinnwai Jun 21, 2025
1faab96
move .drsconfig to .drs/config
quinnwai Jun 21, 2025
183de90
clean up commands
quinnwai Jun 23, 2025
0bd25ae
remove urls from config, pull from gen3Profile
quinnwai Jun 23, 2025
c7aab2c
refactor clone to use indexd records from server // split out indexd …
quinnwai Jun 24, 2025
b8ae6ef
patch indexd use for download // update ObjectStoreClient
quinnwai Jun 24, 2025
0c0313c
enable indexd as source of truth // add query by hash to client inter…
quinnwai Jun 25, 2025
fa6428c
transfer logs in .drs // update .gitignore w trailing "\n" on write
quinnwai Jun 26, 2025
6ce57ee
starter readme
quinnwai Jun 27, 2025
e89f652
Adding a git-drs list command to list files found on DRS server
kellrott Jul 1, 2025
7063aac
Adding paging and auth token to drs list command
kellrott Jul 2, 2025
c6360fe
Merge remote-tracking branch 'origin/main' into feature/drs-list
kellrott Jul 2, 2025
74ade96
first draft fixed with rbac
quinnwai Jul 10, 2025
d524556
Merge remote-tracking branch 'origin/bugfix/indexd-rbac' into feature…
kellrott Jul 11, 2025
fd8091f
Merge remote-tracking branch 'origin/main' into feature/drs-list
kellrott Jul 14, 2025
a6c6122
show 403 error when expired creds
quinnwai Jul 15, 2025
f104938
tidy go mod
quinnwai Jul 15, 2025
c9aff0f
update commit to log on failure to gen3 configure during init
quinnwai Jul 15, 2025
bb203d8
Fixing issues related to PR
kellrott Jul 16, 2025
bec366a
show 403 error when expired creds
quinnwai Jul 15, 2025
416395c
update gen3-client module dependency
quinnwai Jul 16, 2025
7baafa9
caller handles no records found
quinnwai Jul 16, 2025
8634a32
caller handles no records found
quinnwai Jul 16, 2025
33086e2
ensure indexd passes back errors in updateDrsObjects
quinnwai Jul 18, 2025
1d06894
Adding a git-drs list command to list files found on DRS server (#19)
kellrott Jul 19, 2025
70f2797
Merge branch 'bugfix/indexd-rbac' of https://github.com/bmeg/git-drs …
quinnwai Jul 19, 2025
fc7c33e
fix comment
quinnwai Jul 19, 2025
18f2331
pass in caller handling no records found
quinnwai Jul 19, 2025
9ed0533
readme changes
quinnwai Jul 22, 2025
bff9484
Merge branch 'main' into bugfix/indexd-rbac
quinnwai Jul 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ When creating a repo from scratch, make sure to create a configuration file at
### Quick Start
When in doubt, use the `--help` flag to get more info about the commands

=======
#### Track a Specific File Type
Store all bam files as a pointer in the Git repo and store actual contents in the DRS server. This is handled by a configuration line in `.gitattributes`

```
git lfs track "*.bam"
git add .gitattributes
Expand Down Expand Up @@ -111,7 +113,25 @@ git lfs pull -I "*.bam"
git lfs pull
```

### When to Use Git vs Git LFS vs Git DRS
The goal of Git DRS is to maximize integration with the Git workflow using a minimal amount of extra tooling. That being said, sometimes `git lfs` commands or `git drs` commands will have to be run outside of the Git workflow. Here's some advice on when to use each of the three...
- **Git DRS**: Only used for initialization of your local repo! The rest of Git DRS is triggered automatically.
- **Git LFS**: Used to interact with files that are tracked by LFS. Examples include
- `git lfs track` to track files whose contents are stored outside of the Git repo
- `git lfs ls-files` to get a list of LFS files that LFS tracks
- `git lfs pull` to pull a file whose contents exist on a server outside of the Git repo.
- **Git**: Everything else! (eg adding/committing files, pushing files, cloning repos, and checking out different commits)

### Troubleshooting

- To see the logs for a given file transfer, navigate to `.drs` and view the log files
To see more logs and errors, see the log files in the `.drs` directory.

## Implementation Details

### Adding new files
When new files are added, a [precommit hook](https://git-scm.com/book/ms/v2/Customizing-Git-Git-Hooks#:~:text=The%20pre%2Dcommit,on%20new%20methods.) is run which triggers `git drs precommit`. This takes all of the LFS files that have been staged (ie `git add`ed) and creates DRS records for them. Those get used later during a push to register these new files in the DRS server. DRS objects are only created during this pre-commit if they have been staged
and don't already exist on the DRS server.

### File transfers

In order to push file contents to a different system, Git DRS makes use of [custom transfers](https://github.com/git-lfs/git-lfs/blob/main/docs/custom-transfers.md). These custom transfer are how Git LFS sends information to Git DRS to automatically update the server, passing in the files that have been changed for every each commit that needs to be pushed.. For instance,in the gen3 custom transfer client, we add a indexd record to the DRS server and upload the file to a gen3-registered bucket.
2 changes: 1 addition & 1 deletion cdis-data-client
6 changes: 3 additions & 3 deletions client/drs-map.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ func UpdateDrsObjects() error {

// init indexd client
indexdClient, err := NewIndexDClient()
if err != nil {
return fmt.Errorf("error initializing indexd with credentials: %v", err)
}

// get all LFS files' info using json
// TODO: use git-lfs internally instead of exec? (eg git.GetTrackedFiles)
Expand Down Expand Up @@ -172,9 +175,6 @@ func UpdateDrsObjects() error {
logger.Log("Adding to DRS Objects: %s -> %s", file.Name, indexdObj.Did)

// write drs objects to DRS_OBJS_PATH
if err != nil {
return fmt.Errorf("error getting object path for oid %s: %v", file.Oid, err)
}
err = writeDrsObj(indexdObj, file.Oid, drsObjPath)
if err != nil {
return fmt.Errorf("error writing DRS object for oid %s: %v", file.Oid, err)
Expand Down
143 changes: 130 additions & 13 deletions client/indexd.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) {
// setup logging
myLogger, err := NewLogger("")
if err != nil {
// Handle error (e.g., print to stderr and exit)
log.Fatalf("Failed to open log file: %v", err)
}
defer myLogger.Close()
Expand All @@ -78,22 +77,28 @@ func (cl *IndexDClient) GetDownloadURL(oid string) (*drs.AccessURL, error) {
// get the DRS object using the OID
// FIXME: how do we not hardcode sha256 here?
drsObj, err := cl.GetObjectByHash("sha256", oid)
if err != nil {
myLogger.Log("error getting DRS object for oid %s: %s", oid, err)
return nil, fmt.Errorf("error getting DRS object for oid %s: %v", oid, err)
}
if drsObj == nil {
myLogger.Log("no DRS object found for oid %s", oid)
return nil, fmt.Errorf("no DRS object found for oid %s", oid)
}

// download file using the DRS object
myLogger.Log(fmt.Sprintf("Downloading file for OID %s from DRS object: %+v", oid, drsObj))
myLogger.Log("Downloading file for OID %s from DRS object: %+v", oid, drsObj)

// FIXME: generalize access ID method
// naively get access ID from splitting first path into :
accessId := drsObj.AccessMethods[0].AccessID
myLogger.Log(fmt.Sprintf("Downloading file with oid %s, access ID: %s, file name: %s", oid, accessId, drsObj.Name))

// get file from indexd
// get signed url
a := *cl.base
a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects", drsObj.Id, "access", accessId)

myLogger.Log("using endpoint: %s\n", a.String())

// unmarshal response
req, err := http.NewRequest("GET", a.String(), nil)
if err != nil {
return nil, err
Expand Down Expand Up @@ -218,6 +223,11 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) {
return nil, err
}

err = addGen3AuthHeader(req, cl.profile)
if err != nil {
return nil, fmt.Errorf("error adding Gen3 auth header: %v", err)
}

client := &http.Client{}
response, err := client.Do(req)
if err != nil {
Expand All @@ -238,6 +248,89 @@ func (cl *IndexDClient) GetObject(id string) (*drs.DRSObject, error) {
return &out, nil
}

func (cl *IndexDClient) ListObjects() (chan drs.DRSObjectResult, error) {
myLogger, err := NewLogger("")
if err != nil {
return nil, err
}
myLogger.Log("Getting DRS objects from indexd")

a := *cl.base
a.Path = filepath.Join(a.Path, "ga4gh/drs/v1/objects")

out := make(chan drs.DRSObjectResult, 10)

LIMIT := 50
pageNum := 0

go func() {
defer close(out)
active := true
for active {
// setup request
req, err := http.NewRequest("GET", a.String(), nil)
if err != nil {
myLogger.Log("error: %s", err)
out <- drs.DRSObjectResult{Error: err}
return
}

q := req.URL.Query()
q.Add("limit", fmt.Sprintf("%d", LIMIT))
q.Add("page", fmt.Sprintf("%d", pageNum))
req.URL.RawQuery = q.Encode()

err = addGen3AuthHeader(req, cl.profile)
if err != nil {
myLogger.Log("error: %s", err)
out <- drs.DRSObjectResult{Error: err}
return
}

// execute request with error checking
client := &http.Client{}
response, err := client.Do(req)
if err != nil {
myLogger.Log("error: %s", err)
out <- drs.DRSObjectResult{Error: err}
return
}

defer response.Body.Close()
body, err := io.ReadAll(response.Body)
if err != nil {
myLogger.Log("error: %s", err)
out <- drs.DRSObjectResult{Error: err}
return
}
if response.StatusCode != http.StatusOK {
myLogger.Log("%d: check that your credentials are valid \nfull message: %s", response.StatusCode, body)
out <- drs.DRSObjectResult{Error: fmt.Errorf("%d: check your credentials are valid, \nfull message: %s", response.StatusCode, body)}
return
}

// return page of DRS objects
page := &drs.DRSPage{}
err = json.Unmarshal(body, &page)
if err != nil {
myLogger.Log("error: %s", err)
out <- drs.DRSObjectResult{Error: err}
return
}
for _, elem := range page.DRSObjects {
out <- drs.DRSObjectResult{Object: &elem}
}
if len(page.DRSObjects) == 0 {
active = false
}
pageNum++
}

myLogger.Log("total pages retrieved: %d", pageNum)
}()
return out, nil
}

/////////////
// HELPERS //
/////////////
Expand Down Expand Up @@ -419,31 +512,55 @@ func DownloadSignedUrl(signedURL string, dstPath string) error {

// implements /index/index?hash={hashType}:{hash} GET
func (cl *IndexDClient) GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error) {

// setup logging
myLogger, err := NewLogger("")
if err != nil {
log.Fatalf("Failed to open log file: %v", err)
}
defer myLogger.Close()

// search via hash https://calypr-dev.ohsu.edu/index/index?hash=sha256:52d9baed146de4895a5c9c829e7765ad349c4124ba43ae93855dbfe20a7dd3f0

// get
url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base, hashType, hash)
resp, err := http.Get(url)
// setup get request to indexd
url := fmt.Sprintf("%s/index/index?hash=%s:%s", cl.base.String(), hashType, hash)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
myLogger.Log("GET request created for indexd: %s", url)

err = addGen3AuthHeader(req, cl.profile)
if err != nil {
return nil, fmt.Errorf("error adding Gen3 auth header: %v", err)
}
req.Header.Set("accept", "application/json")

// run request and do checks
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("error querying index for hash (%s:%s): %v", hashType, hash, err)
return nil, fmt.Errorf("error querying index for hash (%s:%s): %v, %s", hashType, hash, err, url)
}
defer resp.Body.Close()

// unmarshal response body
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading response body for (%s:%s): %v", hashType, hash, err)
}

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to query indexd for %s:%s. Error: %s, %s", hashType, hash, resp.Status, string(body))
}

records := ListRecords{}
err = json.Unmarshal(body, &records)
if err != nil {
return nil, fmt.Errorf("error unmarshaling (%s:%s): %v", hashType, hash, err)
}

if err != nil {
return nil, fmt.Errorf("Error getting DRS info for OID (%s:%s): %v", hashType, hash, err)
}

// if one record found, return it
if len(records.Records) > 1 {
return nil, fmt.Errorf("expected at most 1 record for OID %s:%s, got %d records", hashType, hash, len(records.Records))
}
Expand Down
2 changes: 2 additions & 0 deletions client/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ type ObjectStoreClient interface {
// corresponds to /ga4gh/drs/v1/objects
GetObject(id string) (*drs.DRSObject, error)

ListObjects() (chan drs.DRSObjectResult, error)

// given a hash, get the object describing it
// no corresponding DRS endpoint exists, so this is custom code
GetObjectByHash(hashType string, hash string) (*drs.DRSObject, error)
Expand Down
81 changes: 81 additions & 0 deletions cmd/list/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package list

import (
"encoding/json"
"fmt"

"github.com/bmeg/git-drs/client"
"github.com/bmeg/git-drs/drs"
"github.com/spf13/cobra"
)

var outJson = false

var checksumPref = []string{"sha256", "md5", "etag"}

func getStringPos(q string, a []string) int {
for i, s := range a {
if q == s {
return i
}
}
return -1
}

// Pick out the most preferred checksum to display
func getCheckSumStr(obj drs.DRSObject) string {
curPos := len(checksumPref) + 1
curVal := ""
for _, e := range obj.Checksums {
c := getStringPos(e.Type, checksumPref)
if c != -1 && c < curPos {
curPos = c
curVal = e.Type + ":" + e.Checksum
}
}
return curVal
}

// Cmd line declaration
var Cmd = &cobra.Command{
Use: "list",
Short: "List DRS entities from server",
Args: cobra.ExactArgs(0),
RunE: func(cmd *cobra.Command, args []string) error {

// setup
client, err := client.NewIndexDClient()
if err != nil {
return err
}
objChan, err := client.ListObjects()
if err != nil {
return err
}
if !outJson {
fmt.Printf("%-55s\t%-15s\t%-75s\t%s\n", "URI", "Size", "Checksum", "Name")
}

// for each result, check for error and print
for objResult := range objChan {
if objResult.Error != nil {
return objResult.Error
}
obj := objResult.Object
if outJson {
out, err := json.Marshal(*obj)
if err != nil {
return err
}
fmt.Printf("%s\n", string(out))
} else {
fmt.Printf("%s\t%-15d\t%-75s\t%s\n", obj.SelfURI, obj.Size, getCheckSumStr(*obj), obj.Name)
}
}
return nil
},
}

func init() {
Cmd.Flags().BoolVarP(&outJson, "json", "j", outJson, "Output formatted as JSON")
}
3 changes: 3 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"github.com/bmeg/git-drs/cmd/download"
"github.com/bmeg/git-drs/cmd/initialize"
"github.com/bmeg/git-drs/cmd/list"
"github.com/bmeg/git-drs/cmd/precommit"
"github.com/bmeg/git-drs/cmd/query"
"github.com/bmeg/git-drs/cmd/transfer"
Expand All @@ -26,6 +27,8 @@ func init() {
RootCmd.AddCommand(precommit.Cmd)
RootCmd.AddCommand(query.Cmd)
RootCmd.AddCommand(transfer.Cmd)
RootCmd.AddCommand(list.Cmd)
RootCmd.AddCommand(version.Cmd)
RootCmd.CompletionOptions.HiddenDefaultCmd = true
RootCmd.SilenceUsage = true
}
5 changes: 3 additions & 2 deletions cmd/transfer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,11 @@ var Cmd = &cobra.Command{
// get signed url
accessUrl, err := drsClient.GetDownloadURL(downloadMsg.Oid)
if err != nil {
errMsg := fmt.Sprintf("Error downloading file for OID %s: %v", downloadMsg.Oid, err)
errMsg := fmt.Sprintf("Error getting signed url for OID %s: %v", downloadMsg.Oid, err)
myLogger.Log(errMsg)
WriteErrorMessage(encoder, downloadMsg.Oid, errMsg)
}
myLogger.Log(fmt.Sprintf("Got signed URL for OID %s: %+v", downloadMsg.Oid, accessUrl))
if accessUrl.URL == "" {
errMsg := fmt.Sprintf("Unable to get access URL %s", downloadMsg.Oid)
myLogger.Log(errMsg)
Expand Down Expand Up @@ -233,7 +234,7 @@ func WriteErrorMessage(encoder *json.Encoder, oid string, errMsg string) {
Event: "complete",
Oid: oid,
Error: Error{
Code: 500,
Code: 1,
Message: errMsg,
},
}
Expand Down
Loading