diff --git a/go.mod b/go.mod index 1560382..d042b83 100644 --- a/go.mod +++ b/go.mod @@ -5,10 +5,14 @@ go 1.18 require ( github.com/PuerkitoBio/goquery v1.5.1 github.com/cheggaaa/pb/v3 v3.1.0 + github.com/google/licenseclassifier/v2 v2.0.0 github.com/hashicorp/go-retryablehttp v0.7.2 + github.com/orcaman/concurrent-map/v2 v2.0.1 + github.com/samber/lo v1.38.1 github.com/spf13/cobra v1.6.1 github.com/stretchr/testify v1.8.1 - golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 + golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f + golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 k8s.io/utils v0.0.0-20230115233650-391b47cb4029 modernc.org/sqlite v1.20.3 @@ -30,11 +34,13 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect github.com/rivo/uniseg v0.2.0 // indirect + github.com/sergi/go-diff v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - golang.org/x/mod v0.3.0 // indirect - golang.org/x/net v0.0.0-20201021035429-f5854403a974 // indirect + golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect + golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 // indirect golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect - golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 // indirect + golang.org/x/text v0.3.7 // indirect + golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/uint128 v1.2.0 // indirect modernc.org/cc/v3 v3.40.0 // indirect diff --git a/go.sum b/go.sum index 91ffddd..219db3a 100644 --- a/go.sum +++ b/go.sum @@ -15,7 +15,10 @@ github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/licenseclassifier/v2 v2.0.0 h1:1Y57HHILNf4m0ABuMVb6xk4vAJYEUO0gDxNpog0pyeA= +github.com/google/licenseclassifier/v2 v2.0.0/go.mod h1:cOjbdH0kyC9R22sdQbYsFkto4NGCAc+ZSwbeThazEtM= github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -29,6 +32,11 @@ github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7P github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8= github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -37,6 +45,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-runewidth v0.0.12 h1:Y41i/hVW3Pgwr8gV+J23B9YEY0zxjptBuCWEaxmAOow= github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= +github.com/orcaman/concurrent-map/v2 v2.0.1 h1:jOJ5Pg2w1oeB6PeDurIYf6k9PQ+aTITr/6lP/L/zp6c= +github.com/orcaman/concurrent-map/v2 v2.0.1/go.mod h1:9Eq3TG2oBe5FirmYWQfYO5iH1q0Jv47PLaNK++uCdOM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= @@ -45,6 +55,10 @@ github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= +github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -53,46 +67,41 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 h1:LQmS1nU0twXLA96Kt7U9qtHJEbBk3z6Q0V4UXjZkpr4= +golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f h1:OfiFi4JbukWwe3lzw+xunroH1mnC1e2Gy5cxNJApiSY= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab h1:2QkjZIsXupsJbJIdSjjUOgWK3aEtzyuh2mPt3l/CkeU= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 h1:M8tBwCtWD/cZV9DZpFYRUgaymAYAr+aIUTWzDaM3uPs= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023 h1:0c3L82FDQ5rt1bjTBlchS8t6RQ6299/+5bWMnRLh+uI= +golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/builder/builder.go b/pkg/builder/builder.go index 8758638..bf49ffb 100644 --- a/pkg/builder/builder.go +++ b/pkg/builder/builder.go @@ -4,7 +4,9 @@ import ( "encoding/json" "io" "log" + "os" "path/filepath" + "strings" "time" "github.com/cheggaaa/pb/v3" @@ -18,6 +20,7 @@ import ( ) const updateInterval = time.Hour * 72 // 3 days +const licenseStringLimit = 150 type Builder struct { db db.DB @@ -34,7 +37,20 @@ func NewBuilder(db db.DB, meta db.Client) Builder { } func (b *Builder) Build(cacheDir string) error { - indexDir := filepath.Join(cacheDir, "indexes") + indexDir := filepath.Join(cacheDir, types.IndexesDir) + licenseDir := filepath.Join(cacheDir, types.LicenseDir) + + licenseFile, err := os.Open(licenseDir + types.NormalizedlicenseFileName) + if err != nil { + xerrors.Errorf("failed to open normalized license file: %w", err) + } + + licenseMap := make(map[string]string) + + if err := json.NewDecoder(licenseFile).Decode(&licenseMap); err != nil { + return xerrors.Errorf("failed to decode license file: %w", err) + } + count, err := fileutil.Count(indexDir) if err != nil { return xerrors.Errorf("count error: %w", err) @@ -56,6 +72,7 @@ func (b *Builder) Build(cacheDir string) error { Version: ver.Version, SHA1: ver.SHA1, ArchiveType: index.ArchiveType, + License: b.processLicenseInformationFromCache(ver.License, licenseDir, licenseMap), }) } bar.Increment() @@ -92,3 +109,30 @@ func (b *Builder) Build(cacheDir string) error { return nil } + +// processLicenseInformationFromCache : gets cached license information by license key and updates the records to be inserted +func (b *Builder) processLicenseInformationFromCache(license, licenseDir string, licenseMap map[string]string) string { + var updatedLicenseList []string + // process license information + for _, l := range strings.Split(license, "|") { + if val, ok := licenseMap[l]; ok { + val = strings.TrimSpace(val) + updatedLicenseList = append(updatedLicenseList, val) + } + } + + // precautionary check + // return first characters if license string is too long + result := strings.Join(updatedLicenseList, "|") + if len(result) > licenseStringLimit { + r := []rune(result) + if len(r) > licenseStringLimit { + log.Printf("untrimmed license string: %s", result) + return string(r[:licenseStringLimit]) + } + + } + + return result + +} diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go index 06e9527..1889d30 100644 --- a/pkg/crawler/crawler.go +++ b/pkg/crawler/crawler.go @@ -5,17 +5,25 @@ import ( "encoding/hex" "encoding/xml" "fmt" - "github.com/aquasecurity/trivy-java-db/pkg/fileutil" - "github.com/aquasecurity/trivy-java-db/pkg/types" "io" "log" "net/http" + "os" "path/filepath" + "sort" "strings" "sync" + "time" + + "github.com/aquasecurity/trivy-java-db/pkg/fileutil" + "github.com/aquasecurity/trivy-java-db/pkg/types" + "github.com/google/licenseclassifier/v2/tools/identify_license/backend" + "github.com/samber/lo" "github.com/PuerkitoBio/goquery" "github.com/hashicorp/go-retryablehttp" + cmap "github.com/orcaman/concurrent-map/v2" + "golang.org/x/net/html/charset" "golang.org/x/sync/semaphore" "golang.org/x/xerrors" ) @@ -23,13 +31,21 @@ import ( const mavenRepoURL = "https://repo.maven.apache.org/maven2/" type Crawler struct { - dir string - http *retryablehttp.Client + dir string + licensedir string + http *retryablehttp.Client rootUrl string wg sync.WaitGroup urlCh chan string limit *semaphore.Weighted + opt Option + + // license classifier + classifier *backend.ClassifierBackend + + // uniqueLicenseKeys : key is hash of license url or name in POM, whichever available + uniqueLicenseKeys cmap.ConcurrentMap[string, License] } type Option struct { @@ -38,6 +54,11 @@ type Option struct { CacheDir string } +type licenseFilesMeta struct { + FileName string + License +} + func NewCrawler(opt Option) Crawler { client := retryablehttp.NewClient() client.Logger = nil @@ -46,21 +67,39 @@ func NewCrawler(opt Option) Crawler { opt.RootUrl = mavenRepoURL } - indexDir := filepath.Join(opt.CacheDir, "indexes") + indexDir := filepath.Join(opt.CacheDir, types.IndexesDir) log.Printf("Index dir %s", indexDir) - return Crawler{ - dir: indexDir, - http: client, + licensedir := filepath.Join(opt.CacheDir, types.LicenseDir) - rootUrl: opt.RootUrl, - urlCh: make(chan string, opt.Limit*10), - limit: semaphore.NewWeighted(opt.Limit), + err := os.MkdirAll(licensedir, os.ModePerm) + if err != nil { + log.Panicf("panic while creating license cache directory %s .Error:%s", licensedir, err) + } + log.Printf("License dir %s", licensedir) + + classifier, err := backend.New() + if err != nil { + log.Panicf("panic while creating license classifier backend %s", err) + } + + return Crawler{ + dir: indexDir, + licensedir: licensedir, + http: client, + + rootUrl: opt.RootUrl, + urlCh: make(chan string, opt.Limit*10), + limit: semaphore.NewWeighted(opt.Limit), + classifier: classifier, + opt: opt, + uniqueLicenseKeys: cmap.New[License](), } } func (c *Crawler) Crawl(ctx context.Context) error { log.Println("Crawl maven repository and save indexes") + errCh := make(chan error) defer close(errCh) @@ -112,9 +151,12 @@ loop: } } log.Println("Crawl completed") - return nil + + // fetch license information + return c.classifyLicense(ctx) } +// Visit : visits the maven urls. func (c *Crawler) Visit(url string) error { resp, err := c.http.Get(url) if err != nil { @@ -148,11 +190,8 @@ func (c *Crawler) Visit(url string) error { return xerrors.Errorf("metadata parse error: %w", err) } if meta != nil { - if err = c.crawlSHA1(url, meta); err != nil { - return err - } - // Return here since there is no need to crawl dirs anymore. - return nil + // analyze GAV information + return c.crawlSHA1(url, meta) } } @@ -176,10 +215,22 @@ func (c *Crawler) crawlSHA1(baseURL string, meta *Metadata) error { return err } if len(sha1) != 0 { + + // fetch license information on the basis of pom url + pomURL := getPomURL(baseURL, meta.ArtifactID, version) + licenseKeys, err := c.fetchAndSavePOMLicenseKeys(pomURL) + if err != nil { + log.Println(err) + } + licenseKeys = lo.Uniq(licenseKeys) + sort.Strings(licenseKeys) + v := Version{ Version: version, SHA1: sha1, + License: strings.Join(licenseKeys, "|"), } + versions = append(versions, v) } } @@ -262,3 +313,260 @@ func (c *Crawler) fetchSHA1(url string) ([]byte, error) { } return sha1b, nil } + +func (c *Crawler) fetchAndSavePOMLicenseKeys(url string) ([]string, error) { + var keys []string + resp, err := c.http.Get(url) + if resp.StatusCode == http.StatusNotFound { + return keys, nil + } + if err != nil { + return keys, xerrors.Errorf("can't get pom xml from %s: %w", url, err) + } + defer resp.Body.Close() + + var pomProject PomProject + + decoder := xml.NewDecoder(resp.Body) + decoder.CharsetReader = charset.NewReaderLabel + err = decoder.Decode(&pomProject) + + if err != nil { + return keys, xerrors.Errorf("can't parse pom xml from %s: %w", url, err) + } + + if len(pomProject.Licenses) == 0 { + return keys, nil + } + + for _, l := range pomProject.Licenses { + l.LicenseKey = getLicenseKey(l) + + // update uniqueLicenseKeys map + c.uniqueLicenseKeys.Set(l.LicenseKey, l) + + keys = append(keys, l.LicenseKey) + } + + return keys, nil + +} + +func (c *Crawler) classifyLicense(ctx context.Context) error { + normalizedLicenseMap := make(map[string]string) + + // prepare classifier data i.e create temporary files with license text to be used for classification + licenseFiles, err := c.prepareClassifierData(ctx) + if err != nil { + return err + } + + files := make([]string, 0) + filesLicenseMap := make(map[string]License) + + // change license file list to map + for _, data := range licenseFiles { + if _, ok := filesLicenseMap[data.FileName]; !ok { + filesLicenseMap[data.FileName] = data.License + files = append(files, data.FileName) + } + } + + if len(filesLicenseMap) == 0 { + return nil + } + + // classify licenses + + // 1 minute is the timeout for license classification of a file + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + + // c.opt.Limit is the number of concurrent tasks spawned to process license files + errs := c.classifier.ClassifyLicensesWithContext(ctx, int(c.opt.Limit), files, true) + if len(errs) > 0 { + log.Println("errors in license classification ", errs) + } + + // extract results + results := c.classifier.GetResults() + sort.Sort(results) + + // process results to update the normalizedLicenseMap + if results.Len() > 0 { + for _, r := range results { + if licenseVal, ok := filesLicenseMap[r.Filename]; ok { + // since results are sorted, we can skip processing of data with confidence <90% + if r.Confidence < 0.9 { + break + } + + // skip processing since a higher confidence result is already processed + if licenseVal.ClassificationConfidence > r.Confidence { + continue + } + + licenseVal.ClassificationConfidence = r.Confidence + filesLicenseMap[r.Filename] = licenseVal + + // update normalized license map + normalizedLicenseMap[licenseVal.LicenseKey] = r.Name + } + } + } + + defer func() { + // update normalized license map for license keys which couldn't be classified or had no url in pom for classification + uniqLicenseKeys := c.uniqueLicenseKeys.Items() + for key, license := range uniqLicenseKeys { + if _, ok := normalizedLicenseMap[key]; !ok { + if len(license.Name) > 0 { + normalizedLicenseMap[key] = license.Name + } + } + } + + err := fileutil.WriteJSON(c.licensedir+types.NormalizedlicenseFileName, normalizedLicenseMap) + if err != nil { + log.Println(err) + } + }() + + return nil +} + +func (c *Crawler) prepareClassifierData(ctx context.Context) ([]licenseFilesMeta, error) { + log.Println("Preparing license classifier data") + + var licenseFiles []licenseFilesMeta + + // switch from concurrent to normal map + uniqLicenseKeyMap := c.uniqueLicenseKeys.Items() + uniqueLicenseKeyList := c.uniqueLicenseKeys.Keys() + + client := http.Client{ + Timeout: 10 * time.Second, + } + + licenseKeyChannel := make(chan string, len(uniqueLicenseKeyList)) + + log.Printf("Total license keys to be processed %d", len(uniqueLicenseKeyList)) + + // dump license keys to the channel so that they can be processed + for _, key := range uniqueLicenseKeyList { + licenseKeyChannel <- key + } + + limit := semaphore.NewWeighted(c.opt.Limit) + + // error channel + errCh := make(chan error) + defer close(errCh) + + // status channel to track processing of license keys + type status struct { + Meta licenseFilesMeta + Done bool + } + prepStatus := make(chan status, len(uniqueLicenseKeyList)) + defer close(prepStatus) + + // process license keys channel + go func() { + for licenseKey := range licenseKeyChannel { + + if err := limit.Acquire(ctx, 1); err != nil { + errCh <- xerrors.Errorf("semaphore acquire error: %w", err) + } + + // process license key to generate license file + go func(licenseKey string) { + defer limit.Release(1) + + licenseFileName := getLicenseFileName(c.licensedir, licenseKey) + licenseMeta := uniqLicenseKeyMap[licenseKey] + ok, err := c.generateLicenseFile(client, licenseFileName, licenseMeta) + if err != nil { + errCh <- xerrors.Errorf("generateLicenseFile error: %w", err) + } + + // update status post processing of license key + prepStatus <- status{ + Done: ok, + Meta: licenseFilesMeta{ + License: licenseMeta, + FileName: licenseFileName, + }, + } + }(licenseKey) + } + }() + + count := 0 +loop: + for { + select { + case status := <-prepStatus: + count++ + if status.Done { + licenseFiles = append(licenseFiles, status.Meta) + } + + if count%1000 == 0 { + log.Printf("Processed %d license keys", count) + } + + if count == len(uniqueLicenseKeyList) { + close(licenseKeyChannel) + break loop + } + case err := <-errCh: + close(licenseKeyChannel) + return licenseFiles, err + + } + } + + log.Println("Preparation of license classifier data completed") + + return licenseFiles, nil + +} + +func (c *Crawler) generateLicenseFile(client http.Client, licenseFileName string, licenseMeta License) (bool, error) { + + // if url not available then no point using the license classifier + // Names can be analyzed but in most cases license classifier does not result in any matches + if !strings.HasPrefix(licenseMeta.URL, "http") { + return false, nil + } + + // create file + f, err := os.Create(licenseFileName) + if err != nil { + return false, err + } + + defer f.Close() + + // download license url contents + resp, err := client.Get(licenseMeta.URL) + if resp == nil { + return false, nil + } + + if resp.StatusCode == http.StatusNotFound { + return false, nil + } + if err != nil { + return false, nil + } + defer resp.Body.Close() + + _, err = io.Copy(f, resp.Body) + if err != nil { + return false, nil + } + + return true, nil +} diff --git a/pkg/crawler/crawler_test.go b/pkg/crawler/crawler_test.go index 46a5d40..a200e05 100644 --- a/pkg/crawler/crawler_test.go +++ b/pkg/crawler/crawler_test.go @@ -2,22 +2,25 @@ package crawler_test import ( "context" - "github.com/stretchr/testify/assert" "net/http" "net/http/httptest" "os" "path/filepath" "testing" + "github.com/stretchr/testify/assert" + "github.com/aquasecurity/trivy-java-db/pkg/crawler" ) func TestCrawl(t *testing.T) { tests := []struct { - name string - fileNames map[string]string - goldenPath string - filePath string + name string + fileNames map[string]string + goldenPath string + goldenNormalizedlicensePath string + filePath string + normalizedLicensePath string }{ { name: "happy path", @@ -27,11 +30,17 @@ func TestCrawl(t *testing.T) { "/maven2/abbot/abbot/": "testdata/abbot_abbot.html", "/maven2/abbot/abbot/maven-metadata.xml": "testdata/maven-metadata.xml", "/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/abbot-0.12.3.jar.sha1", + "/maven2/abbot/abbot/0.12.3/abbot-0.12.3.pom": "testdata/abbot-0.12.3.pom", "/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/abbot-0.13.0.jar.sha1", + "/maven2/abbot/abbot/0.13.0/abbot-0.13.0.pom": "testdata/abbot-0.13.0.pom", "/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1": "testdata/abbot-1.4.0.jar.sha1", + "/maven2/abbot/abbot/1.4.0/abbot-1.4.0.pom": "testdata/abbot-1.4.0.pom", }, - goldenPath: "testdata/golden/abbot.json", - filePath: "indexes/abbot/abbot.json", + goldenPath: "testdata/golden/abbot.json", + goldenNormalizedlicensePath: "testdata/golden/normalized_license.json", + + filePath: "indexes/abbot/abbot.json", + normalizedLicensePath: "licenses/normalized_license.json", }, } for _, tt := range tests { @@ -63,6 +72,16 @@ func TestCrawl(t *testing.T) { assert.NoError(t, err) assert.JSONEq(t, string(want), string(got)) + + // normalized license json file check + got, err = os.ReadFile(filepath.Join(tmpDir, tt.normalizedLicensePath)) + assert.NoError(t, err) + + want, err = os.ReadFile(tt.goldenNormalizedlicensePath) + assert.NoError(t, err) + + assert.JSONEq(t, string(want), string(got)) + }) } diff --git a/pkg/crawler/testdata/abbot-0.12.3.pom b/pkg/crawler/testdata/abbot-0.12.3.pom new file mode 100644 index 0000000..be429a9 --- /dev/null +++ b/pkg/crawler/testdata/abbot-0.12.3.pom @@ -0,0 +1 @@ +invalid pom \ No newline at end of file diff --git a/pkg/crawler/testdata/abbot-0.13.0.pom b/pkg/crawler/testdata/abbot-0.13.0.pom new file mode 100644 index 0000000..be429a9 --- /dev/null +++ b/pkg/crawler/testdata/abbot-0.13.0.pom @@ -0,0 +1 @@ +invalid pom \ No newline at end of file diff --git a/pkg/crawler/testdata/abbot-1.4.0.pom b/pkg/crawler/testdata/abbot-1.4.0.pom new file mode 100644 index 0000000..c4d4132 --- /dev/null +++ b/pkg/crawler/testdata/abbot-1.4.0.pom @@ -0,0 +1,62 @@ + + + 4.0.0 + abbot + abbot + 1.4.0 + Abbot Java GUI Test Library + Abbot provides a wrapper around java.awt.Robot to make testing AWT and Swing Applications easier + http://abbot.sf.net/ + + + + + EPL + https://www.eclipse.org/legal/epl-v10.html + + + EPL + https://www.eclipse.org/legal/epl-v10.html + + + Random License with invalid URL in POM + Invalid url + + + Random License without URL in POM + + + + + Invalid license url with missing license name + + + + + + + Gerard Davisonr + gerard.davison@oracle.com + Oralce + http://www.oracle.com + + + + + + scm:svn://svn.code.sf.net/p/abbot/svn/trunkabbot/trunk/ + scm:svn://svn.code.sf.net/p/abbot/svn/trunkabbot/trunk/ + http://sourceforge.net/p/abbot/svn/HEAD/tree/abbot/trunk/ + + + + + junit + junit + 4.8.2 + + + + + + diff --git a/pkg/crawler/testdata/golden/abbot.json b/pkg/crawler/testdata/golden/abbot.json index 16130ad..12d896f 100644 --- a/pkg/crawler/testdata/golden/abbot.json +++ b/pkg/crawler/testdata/golden/abbot.json @@ -4,15 +4,18 @@ "Versions": [ { "Version": "0.12.3", - "SHA1": "UdKKJ9kZzoaQpA9PM1udWRzrFuk=" + "SHA1": "UdKKJ9kZzoaQpA9PM1udWRzrFuk=", + "License": "" }, { "Version": "0.13.0", - "SHA1": "WW2R5nYxsN6wX7aF2NG2c18+T2A=" + "SHA1": "WW2R5nYxsN6wX7aF2NG2c18+T2A=", + "License": "" }, { "Version": "1.4.0", - "SHA1": "ojY2RqndBZVWM7RQAQtZohr4pCM=" + "SHA1": "ojY2RqndBZVWM7RQAQtZohr4pCM=", + "License": "1028932157|2166136261|2425343180|3188410119" } ], "ArchiveType": "jar" diff --git a/pkg/crawler/testdata/golden/normalized_license.json b/pkg/crawler/testdata/golden/normalized_license.json new file mode 100644 index 0000000..c7a397f --- /dev/null +++ b/pkg/crawler/testdata/golden/normalized_license.json @@ -0,0 +1,5 @@ +{ + "1028932157": "EPL-1.0", + "2425343180": "Random License with invalid URL in POM", + "3188410119": "Random License without URL in POM" +} \ No newline at end of file diff --git a/pkg/crawler/types.go b/pkg/crawler/types.go index c0d5b7c..a280ddf 100644 --- a/pkg/crawler/types.go +++ b/pkg/crawler/types.go @@ -24,4 +24,22 @@ type Index struct { type Version struct { Version string SHA1 []byte + License string +} + +type PomProject struct { + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` + Name string `xml:"name"` + Description string `xml:"description"` + URL string `xml:"url"` + Licenses []License `xml:"licenses>license"` +} + +type License struct { + Name string `xml:"name"` + URL string `xml:"url"` + LicenseKey string + ClassificationConfidence float64 } diff --git a/pkg/crawler/utils.go b/pkg/crawler/utils.go new file mode 100644 index 0000000..2c00428 --- /dev/null +++ b/pkg/crawler/utils.go @@ -0,0 +1,36 @@ +package crawler + +import ( + "fmt" + "hash/fnv" + "strings" +) + +func getLicenseKey(l License) string { + if len(l.URL) > 0 && strings.HasPrefix(l.URL, "http") { + return hash(l.URL) + } + return hash(l.Name) +} + +func getPomURL(baseURL, artifactID, version string) string { + pomFileName := fmt.Sprintf("/%s-%s.pom", artifactID, version) + return baseURL + version + pomFileName +} + +func hash(s string) string { + h := fnv.New32a() + h.Write([]byte(s)) + return fmt.Sprint(h.Sum32()) +} + +func min(a int, b int) int { + if a > b { + return b + } + return a +} + +func getLicenseFileName(dir, key string) string { + return dir + "/trivy_license_" + key + ".txt" +} diff --git a/pkg/db/db.go b/pkg/db/db.go index e740730..0cdb0d6 100644 --- a/pkg/db/db.go +++ b/pkg/db/db.go @@ -59,7 +59,7 @@ func (db *DB) Init() error { if _, err := db.client.Exec("CREATE TABLE artifacts(id INTEGER PRIMARY KEY, group_id TEXT, artifact_id TEXT)"); err != nil { return xerrors.Errorf("unable to create 'artifacts' table: %w", err) } - if _, err := db.client.Exec("CREATE TABLE indices(artifact_id INTEGER, version TEXT, sha1 BLOB, archive_type TEXT, foreign key (artifact_id) references artifacts(id))"); err != nil { + if _, err := db.client.Exec("CREATE TABLE indices(artifact_id INTEGER, version TEXT, sha1 BLOB, archive_type TEXT, license TEXT, foreign key (artifact_id) references artifacts(id))"); err != nil { return xerrors.Errorf("unable to create 'indices' table: %w", err) } @@ -110,13 +110,13 @@ func (db *DB) InsertIndexes(indexes []types.Index) error { for _, index := range indexes { _, err = tx.Exec(` - INSERT INTO indices(artifact_id, version, sha1, archive_type) + INSERT INTO indices(artifact_id, version, sha1, archive_type, license) VALUES ( (SELECT id FROM artifacts WHERE group_id=? AND artifact_id=?), - ?, ?, ? + ?, ?, ?, ? ) ON CONFLICT(sha1) DO NOTHING`, - index.GroupID, index.ArtifactID, index.Version, index.SHA1, index.ArchiveType) + index.GroupID, index.ArtifactID, index.Version, index.SHA1, index.ArchiveType, index.License) if err != nil { return xerrors.Errorf("unable to insert to 'indices' table: %w", err) } @@ -147,12 +147,12 @@ func (db *DB) SelectIndexBySha1(sha1 string) (types.Index, error) { return index, xerrors.Errorf("sha1 decode error: %w", err) } row := db.client.QueryRow(` - SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type + SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type, i.license FROM indices i JOIN artifacts a ON a.id = i.artifact_id WHERE i.sha1 = ?`, sha1b) - err = row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType) + err = row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType, &index.License) if err != nil && !errors.Is(err, sql.ErrNoRows) { return index, xerrors.Errorf("select index error: %w", err) } @@ -162,12 +162,12 @@ func (db *DB) SelectIndexBySha1(sha1 string) (types.Index, error) { func (db *DB) SelectIndexByArtifactIDAndGroupID(artifactID, groupID string) (types.Index, error) { var index types.Index row := db.client.QueryRow(` - SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type + SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type, i.license FROM indices i JOIN artifacts a ON a.id = i.artifact_id WHERE a.group_id = ? AND a.artifact_id = ?`, groupID, artifactID) - err := row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType) + err := row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType, &index.License) if err != nil && !errors.Is(err, sql.ErrNoRows) { return index, xerrors.Errorf("select index error: %w", err) } @@ -178,7 +178,7 @@ func (db *DB) SelectIndexesByArtifactIDAndFileType(artifactID string, fileType t error) { var indexes []types.Index rows, err := db.client.Query(` - SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type + SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type, i.license FROM indices i JOIN artifacts a ON a.id = i.artifact_id WHERE a.artifact_id = ? AND i.archive_type = ?`, @@ -188,7 +188,7 @@ func (db *DB) SelectIndexesByArtifactIDAndFileType(artifactID string, fileType t } for rows.Next() { var index types.Index - if err = rows.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType); err != nil { + if err = rows.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType, &index.License); err != nil { return nil, xerrors.Errorf("scan row error: %w", err) } indexes = append(indexes, index) diff --git a/pkg/db/db_test.go b/pkg/db/db_test.go index 6f7dd7a..08728da 100644 --- a/pkg/db/db_test.go +++ b/pkg/db/db_test.go @@ -17,6 +17,7 @@ import ( var ( jstlSha1b, _ = hex.DecodeString("9c581de633e94be1e7a955bd4e8292f16e554387") javaxServletSha1b, _ = hex.DecodeString("bca201e52333629c59e459e874e5ecd8f9899e15") + junitSHA, _ = hex.DecodeString("1013627e3993319870863a020034004717505815") indexJstl = types.Index{ GroupID: "jstl", ArtifactID: "jstl", @@ -31,6 +32,14 @@ var ( SHA1: javaxServletSha1b, ArchiveType: types.JarType, } + indexJunit = types.Index{ + GroupID: "junit", + ArtifactID: "junit", + Version: "4.9", + SHA1: junitSHA, + ArchiveType: types.JarType, + License: "Common Public License Version 1.0", + } ) func TestSelectIndexBySha1(t *testing.T) { @@ -52,12 +61,19 @@ func TestSelectIndexBySha1(t *testing.T) { want: types.Index{}, assertErr: assert.NoError, }, + { + name: "index with license using sha", + sha1: "1013627e3993319870863a020034004717505815", + want: indexJunit, + assertErr: assert.NoError, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { dbc, err := dbtest.InitDB(t, []types.Index{ indexJstl, indexJavaxServlet, + indexJunit, }) require.NoError(t, err) @@ -97,12 +113,20 @@ func TestSelectIndexByArtifactIDAndGroupID(t *testing.T) { want: types.Index{}, assertErr: assert.NoError, }, + { + name: "index with license using groupid and artifactid", + groupID: "junit", + artifactID: "junit", + want: indexJunit, + assertErr: assert.NoError, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { dbc, err := dbtest.InitDB(t, []types.Index{ indexJstl, indexJavaxServlet, + indexJunit, }) require.NoError(t, err) @@ -139,12 +163,21 @@ func TestSelectIndexesByArtifactIDAndFileType(t *testing.T) { artifactID: "jstl", archiveType: "wrong", }, + { + name: "index with license using artifactid and archivetype", + artifactID: "junit", + archiveType: types.JarType, + want: []types.Index{ + indexJunit, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { dbc, err := dbtest.InitDB(t, []types.Index{ indexJstl, indexJavaxServlet, + indexJunit, }) require.NoError(t, err) diff --git a/pkg/fileutil/file.go b/pkg/fileutil/file.go index d9cad76..59e0d13 100644 --- a/pkg/fileutil/file.go +++ b/pkg/fileutil/file.go @@ -2,12 +2,13 @@ package fileutil import ( "encoding/json" - "golang.org/x/xerrors" "io" "io/fs" "log" "os" "path/filepath" + + "golang.org/x/xerrors" ) func Walk(root string, walkFn func(r io.Reader, path string) error) error { diff --git a/pkg/types/types.go b/pkg/types/types.go index 6306cc4..d4c2f49 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -7,7 +7,9 @@ const ( JarType = "jar" AarType = "aar" - IndexesDir = "indexes" + IndexesDir = "indexes" + LicenseDir = "licenses" + NormalizedlicenseFileName = "/normalized_license.json" ) type Index struct { @@ -16,4 +18,5 @@ type Index struct { Version string SHA1 []byte ArchiveType ArchiveType + License string }