Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

embed licenses and use them instead of database, if no database provided #63

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion classifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,22 @@ import (
"archive/tar"
"bytes"
"compress/gzip"
"errors"
"fmt"
"html"
"io"
"io/fs"
"log"
"math"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"unicode"

"github.com/google/licenseclassifier/licenses"
"github.com/google/licenseclassifier/stringclassifier"
"github.com/google/licenseclassifier/stringclassifier/searchset"
)
Expand Down Expand Up @@ -126,7 +132,7 @@ func New(threshold float64, options ...OptionFunc) (*License, error) {
}

if err := classifier.registerLicenses(); err != nil {
return nil, fmt.Errorf("cannot register licenses from archive: %v", err)
return nil, fmt.Errorf("cannot register licenses: %v", err)
}
return classifier, nil
}
Expand Down Expand Up @@ -219,6 +225,10 @@ func (c *License) registerLicenses() error {
var contents []byte
var err error
if c.archive == nil {
if _, statErr := os.Stat(LicenseArchive); errors.Is(statErr, fs.ErrNotExist) {
// if no LicenseArchive, default to load from embedded licenses
return c.registerLicensesFromEmbedded()
}
contents, err = ReadLicenseFile(LicenseArchive)
} else {
contents, err = c.archive()
Expand All @@ -227,6 +237,10 @@ func (c *License) registerLicenses() error {
return err
}

return c.registerLicensesFromArchive(contents)
}

func (c *License) registerLicensesFromArchive(contents []byte) error {
reader := bytes.NewReader(contents)
gr, err := gzip.NewReader(reader)
if err != nil {
Expand Down Expand Up @@ -283,6 +297,41 @@ func (c *License) registerLicenses() error {
return nil
}

func (c *License) registerLicensesFromEmbedded() error {
lics, err := licenses.ReadLicenseDir()
if err != nil {
log.Fatalf("error: cannot read licenses directory: %v", err)
}

for _, l := range lics {
// All license files have a ".txt" extension.
ext := filepath.Ext(l.Name())
if ext != ".txt" {
continue
}
name := strings.TrimSuffix(filepath.Base(l.Name()), ext)

// Read license text
contents, err := ReadLicenseFile(l.Name())
if err != nil {
return err
}
str := TrimExtraneousTrailingText(string(contents))
for _, n := range Normalizers {
str = n(str)
}

// Calculate the substrings' checksums
set := searchset.New(str, searchset.DefaultGranularity)

if err = c.c.AddPrecomputedValue(name, str, set); err != nil {
return err
}
}

return nil
}

// endOfLicenseText is text commonly associated with the end of a license. We
// can remove text that occurs after it.
var endOfLicenseText = []string{
Expand Down
2 changes: 1 addition & 1 deletion licenses/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"io/fs"
)

// go:embed *.db *.txt
//go:embed *.txt
var licenseFS embed.FS

// ReadLicenseFile locates and reads the license archive file. Absolute paths are used unmodified. Relative paths are expected to be in the licenses directory of the licenseclassifier package.
Expand Down