Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
28bb3aa
create some helper functions to uploading/downloading gcs
jess-lowe Mar 5, 2026
3cd4243
parallelize + tests
jess-lowe Mar 5, 2026
3a32283
rename upload -> gcs-tools
jess-lowe Mar 6, 2026
45cb1f8
make things more go tst idiomatic
jess-lowe Mar 6, 2026
c44a7d6
Merge branch 'master' into feat/upload-to-gcs
jess-lowe Mar 17, 2026
6a596ec
fix test
jess-lowe Mar 17, 2026
d506f45
fix lint
jess-lowe Mar 17, 2026
263f87d
Merge branch 'feat/use-go-gcs' into refactor/nvd-use-gcs
jess-lowe Mar 19, 2026
933d315
initial changes for immediately uploading records to gcs bucket
jess-lowe Mar 19, 2026
06c4f4f
progress
jess-lowe Mar 19, 2026
886c2c1
fix some compiling issues and lint
jess-lowe Mar 20, 2026
15c4757
remove the upload part of the run-cve-to-osv generation script
jess-lowe Mar 20, 2026
b8ed421
renamed functions and moved them around to be more and less generic
jess-lowe Mar 20, 2026
3c64e5f
Merge branch 'master' into refactor/nvd-use-gcs
jess-lowe Mar 20, 2026
6c773e6
fix lint
jess-lowe Mar 20, 2026
31e132b
Merge branch 'refactor/nvd-use-gcs' of https://github.com/jess-lowe/o…
jess-lowe Mar 20, 2026
24369ea
remove unneeded test as output happens elsewhere
jess-lowe Mar 20, 2026
9468c20
Merge remote-tracking branch 'upstream/master' into refactor/nvd-use-gcs
jess-lowe Apr 29, 2026
d586a11
fix some linter issues
jess-lowe Apr 29, 2026
9e3fa62
fixed the test change
jess-lowe Apr 29, 2026
5b315de
update nits
jess-lowe May 1, 2026
997ae65
update comment
jess-lowe May 1, 2026
da92e68
ensure deterministic output
jess-lowe May 1, 2026
dc1ab51
add logs for upload and fixed upload still happening if vuln not changed
jess-lowe May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/combine-to-osv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import (

"cloud.google.com/go/storage"
"github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/ossf/osv-schema/bindings/go/osvschema"
"google.golang.org/api/iterator"
Expand Down Expand Up @@ -92,7 +92,7 @@ func main() {
vulnerabilities = append(vulnerabilities, v)
}

upload.Upload(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions)
writer.UploadVulnsToGCS(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions)
}

// extractCVEName extracts the CVE name from a given filename and prefix.
Expand Down
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/converters/alpine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import (
"strings"
"time"

"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/google/osv/vulnfeeds/vulns"
"github.com/ossf/osv-schema/bindings/go/osvschema"
Expand Down Expand Up @@ -64,7 +64,7 @@ func main() {
}

ctx := context.Background()
upload.Upload(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions)
writer.UploadVulnsToGCS(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions)
logger.Info("Alpine CVE conversion succeeded.")
}

Expand Down
6 changes: 3 additions & 3 deletions vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ import (
"sync"
"time"

"github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/cve5"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/utility/logger"
)
Expand Down Expand Up @@ -119,8 +119,8 @@ func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string
cveID := cve.Metadata.CVEID
logger.Info("Processing "+string(cveID), slog.String("cve", string(cveID)))

osvFile, errCVE := conversion.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := conversion.CreateMetricsFile(cveID, outDir)
osvFile, errCVE := writer.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir)
if errCVE != nil || errMetrics != nil {
logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID)))
}
Expand Down
6 changes: 3 additions & 3 deletions vulnfeeds/cmd/converters/cve/cve5/single-converter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"log/slog"
"os"

"github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/cve5"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/utility/logger"
)
Expand Down Expand Up @@ -46,8 +46,8 @@ func main() {
}
// create the files

osvFile, errCVE := conversion.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := conversion.CreateMetricsFile(cveID, outDir)
osvFile, errCVE := writer.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir)
if errCVE != nil || errMetrics != nil {
logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID)))
}
Expand Down
84 changes: 76 additions & 8 deletions vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package main

import (
"context"
"encoding/json"
"flag"
"fmt"
Expand All @@ -14,11 +15,14 @@ import (
"slices"
"sync"

"cloud.google.com/go/storage"
c "github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/nvd"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/git"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/google/osv/vulnfeeds/vulns"
)

var (
Expand All @@ -30,6 +34,9 @@ var (
rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.")
outputMetrics = flag.Bool("output-metrics", true, "If true, output the metrics information about the conversion")
cpuProfile = flag.String("cpuprofile", "", "Path to write cpu profile to file (default = no output)")
uploadToGCS = flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.")
outputBucket = flag.String("output-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to write to.")
gcsPrefix = flag.String("gcs-prefix", "nvd-osv", "The prefix within the GCS bucket.")
)

func main() {
Expand Down Expand Up @@ -76,12 +83,24 @@ func main() {

repoTagsCache := &git.RepoTagsCache{}

var bkt *storage.BucketHandle
ctx := context.Background()
if *uploadToGCS {
client, err := storage.NewClient(ctx)
if err != nil {
logger.Fatal("Failed to create GCS client", slog.Any("err", err))
}
defer client.Close()
bkt = client.Bucket(*outputBucket)
logger.Info("GCS Client and Bucket initialized", slog.String("bucket", *outputBucket))
}

jobs := make(chan models.NVDCVE)
var wg sync.WaitGroup

for range *workers {
wg.Add(1)
go worker(&wg, jobs, *outDir, vpRepoCache, repoTagsCache)
go worker(ctx, &wg, jobs, bkt, *outDir, vpRepoCache, repoTagsCache)
}

for _, cve := range parsed.Vulnerabilities {
Expand All @@ -107,7 +126,7 @@ func main() {
}
}

func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) models.ConversionOutcome {
func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) (*vulns.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) {
metrics := &models.ConversionMetrics{
CVEID: cve.ID,
CNA: "nvd",
Expand All @@ -116,24 +135,73 @@ func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *gi
metrics.Repos = repos

var outcome models.ConversionOutcome
var vuln *vulns.Vulnerability
var finalMetrics *models.ConversionMetrics
switch *outFormat {
case "OSV":
outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics)
vuln, finalMetrics, outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, metrics)
case "PackageInfo":
outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics)
finalMetrics = metrics
}

return outcome
return vuln, finalMetrics, outcome
}

func worker(wg *sync.WaitGroup, jobs <-chan models.NVDCVE, _ string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) {
func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, bkt *storage.BucketHandle, outDir string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) {
defer wg.Done()
for cve := range jobs {
outcome := processCVE(cve, vpRepoCache, repoTagsCache)
vuln, metrics, outcome := processCVE(cve, vpRepoCache, repoTagsCache)
cveID := string(cve.ID)
if outcome == models.Error {
logger.Error("Error generating OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String()))
continue // Don't attempt to output files if there was an error
}

if outcome != models.Successful {
logger.Info("Failed to generate an OSV record", slog.String("cve", string(cve.ID)), slog.String("outcome", outcome.String()))
logger.Info("Failed to generate a successful OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String()))
if *rejectFailed {
continue // Skip outputting OSV file
}
} else {
logger.Info("Generated OSV record for "+cveID, slog.String("cve", cveID))
}

if *uploadToGCS && bkt != nil {
if vuln != nil {
if err := writer.UploadVulnIfChanged(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil {
logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err))
}
}
if *outputMetrics && metrics != nil {
if err := writer.UploadMetricsToGCS(ctx, bkt, *gcsPrefix, models.CVEID(cveID), metrics); err != nil {
logger.Error("Failed to upload metrics", slog.String("cve", cveID), slog.Any("err", err))
}
}
} else {
logger.Info("Generated OSV record for "+string(cve.ID), slog.String("cve", string(cve.ID)))
// Local file output
if vuln != nil {
osvFile, err := writer.CreateOSVFile(models.CVEID(vuln.Id), outDir)
if err != nil {
logger.Error("Failed to create OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err))
} else {
if err := vuln.ToJSON(osvFile); err != nil {
logger.Error("Failed to write OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err))
}
osvFile.Close()
}
}
if *outputMetrics && metrics != nil {
metricsFile, err := writer.CreateMetricsFile(models.CVEID(cveID), outDir)
if err != nil {
logger.Error("Failed to create metrics file locally", slog.String("cve", cveID), slog.Any("err", err))
} else {
if err := writer.WriteMetricsFile(metrics, metricsFile); err != nil {
logger.Error("Failed to write metrics file locally", slog.String("cve", cveID), slog.Any("err", err))
}
metricsFile.Close()
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,4 @@ for (( YEAR = $(date +%Y) ; YEAR >= ${FIRST_INSCOPE_YEAR} ; YEAR-- )); do
-exec cp '{}' "${WORK_DIR}/nvd2osv/gcs_stage/" \;
done

# Copy (and remove any missing) results to GCS bucket, with some sanity
# checking.
objs_present=$(gcloud storage ls "${OSV_OUTPUT_GCS_PATH}" | wc -l)
objs_deleted=$(gcloud storage rsync --checksums-only --dry-run --delete-unmatched-destination-objects "${WORK_DIR}/nvd2osv/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" | wc -l)

threshold=$(echo "scale=2; ${objs_present} * (${SAFETY_THRESHOLD_PCT:-2} / 100)" | bc)

# Bash can't deal with floats
if (( $(echo "${objs_deleted} > ${threshold}" | bc -l) )); then
echo "Warning. Unexpectedly high (${objs_deleted}) number of CVE records would be deleted!" >> /dev/stderr
gcloud storage rsync --checksums-only --dry-run --delete-unmatched-destination-objects "${WORK_DIR}/nvd2osv/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" >> /dev/stderr
# TODO: add back in once nvd-mirror issue fixed: exit 1
fi

echo "Copying NVD CVE records successfully converted to GCS bucket"
gcloud storage rsync --quiet --checksums-only "${WORK_DIR}/nvd2osv/gcs_stage" "${OSV_OUTPUT_GCS_PATH}"

echo "Conversion run complete"
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/converters/debian/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import (
"strconv"
"strings"

"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/faulttolerant"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/google/osv/vulnfeeds/vulns"
"github.com/ossf/osv-schema/bindings/go/osvschema"
Expand Down Expand Up @@ -70,7 +70,7 @@ func main() {
}

ctx := context.Background()
upload.Upload(ctx, "Debian CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *debianOutputPath, vulnerabilities, *syncDeletions)
writer.UploadVulnsToGCS(ctx, "Debian CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *debianOutputPath, vulnerabilities, *syncDeletions)
logger.Info("Debian CVE conversion succeeded.")
}

Expand Down
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/converters/dsa-dla-dtsa/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"time"

htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/ossf/osv-schema/bindings/go/osvschema"
"golang.org/x/text/encoding/charmap"
Expand Down Expand Up @@ -572,7 +572,7 @@ func run(webwmlRepo, securityTrackerRepo, outputDir, outputBucket string, upload
if uploadToGCS {
logger.Info("Uploading to GCS", "bucket", outputBucket)
ctx := context.Background()
upload.Upload(ctx, "debian-osv", uploadToGCS, outputBucket, "", numWorkers, outputDir, allVulnerabilities, doDeletions)
writer.UploadVulnsToGCS(ctx, "debian-osv", uploadToGCS, outputBucket, "", numWorkers, outputDir, allVulnerabilities, doDeletions)
} else {
logger.Info("Skipping GCS upload")
}
Expand Down
43 changes: 0 additions & 43 deletions vulnfeeds/conversion/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,49 +133,6 @@ func ConductAnalysis(year string, dir string) {
}
}

// CreateMetricsFile creates the initial file for the metrics record.
func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) {
metricsFile := filepath.Join(vulnDir, string(id)+".metrics"+models.Extension)
f, err := os.Create(metricsFile)
if err != nil {
logger.Info("Failed to open for writing "+metricsFile, slog.String("cve", string(id)), slog.String("path", metricsFile), slog.Any("err", err))
return nil, err
}

return f, nil
}

// CreateOSVFile creates the initial file for the OSV record.
func CreateOSVFile(id models.CVEID, vulnDir string) (*os.File, error) {
outputFile := filepath.Join(vulnDir, string(id)+models.Extension)

f, err := os.Create(outputFile)
if err != nil {
logger.Info("Failed to open for writing "+outputFile, slog.String("cve", string(id)), slog.String("path", outputFile), slog.Any("err", err))
return nil, err
}

return f, err
}

func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) error {
marshalledMetrics, err := json.MarshalIndent(&metrics, "", " ")
if err != nil {
logger.Info("Failed to marshal", slog.Any("err", err))
return err
}

_, err = metricsFile.Write(marshalledMetrics)
if err != nil {
logger.Warn("Failed to write", slog.String("path", metricsFile.Name()), slog.Any("err", err))
return fmt.Errorf("failed to write %s: %w", metricsFile.Name(), err)
}

metricsFile.Close()

return nil
}

// GitVersionsToCommits examines repos and tries to convert versions to commits by treating them as Git tags.
// Returns the resolved ranges, unresolved ranges, and successful repos involved.
func GitVersionsToCommits(versionRanges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) {
Expand Down
Loading
Loading