Skip to content

Commit

Permalink
Switch to a pure Go implementation of Git
Browse files Browse the repository at this point in the history
  • Loading branch information
dunglas committed Jan 17, 2017
1 parent bc6e746 commit ec17cf4
Show file tree
Hide file tree
Showing 6 changed files with 403 additions and 126 deletions.
99 changes: 48 additions & 51 deletions app.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,44 +9,24 @@ import (
"os"
"path/filepath"
"strings"
"sync"

"github.com/dunglas/calavera/extractor"
"github.com/dunglas/calavera/schema"
)

const filePerms = 0644
const dirPerms = 0755
const FILE_PERMS = 0644
const DIR_PERMS = 0755

func main() {
flag.Usage = func() {
fmt.Println("calavera input_directory output_directory")
}

prettifyBool := flag.Bool("prettify", false, "Prettify json output")

flag.Parse()

if len(flag.Args()) != 2 {
log.Fatalln("Input and output directories are mandatory arguments.")
}
inputPath, outputPath, prettify := parseFlags()

var files []string
var extractors = []extractor.Extractor{extractor.Markdown{}, extractor.Git{}}

inputPath, err := filepath.Abs(flag.Arg(0))
check(err)
var extractors = []extractor.Extractor{extractor.NewMarkdown(inputPath)}

outputPath, err := filepath.Abs(flag.Arg(1))
check(err)

wd, err := os.Getwd()
if nil != err {
check(err)
}

if err := os.Chdir(inputPath); err != nil {
check(err)
if ge, err := extractor.NewGit(inputPath); nil == err {
extractors = append(extractors, ge)
} else {
log.Println(`"` + inputPath + `" is not a Git repository. Authors and date metadata will NOT be extracted.`)
}

walkFunc := func(path string, _ os.FileInfo, err error) error {
Expand All @@ -68,24 +48,13 @@ func main() {
}

entrypoint := schema.NewItemList()
var wg sync.WaitGroup
for _, file := range files {
wg.Add(1)
go func(file string) {
convert(file, outputPath, extractors, *prettifyBool)
defer wg.Done()
}(file)

entrypoint.Element = append(entrypoint.Element, strings.Replace(file, ".md", ".jsonld", 1))
}

wg.Wait()

if err := os.Chdir(wd); err != nil {
check(err)
// Cannot use a go routine because src-d/go-git isn't thread safe
convert(file, outputPath, extractors, prettify)
entrypoint.Element = append(entrypoint.Element, getOutputPath(file))
}

check(ioutil.WriteFile(outputPath+"/_index.jsonld", marshal(entrypoint, *prettifyBool), filePerms))
check(ioutil.WriteFile(outputPath+"/_index.jsonld", marshal(entrypoint, prettify), FILE_PERMS))
}

func marshal(v interface{}, prettify bool) []byte {
Expand All @@ -101,6 +70,37 @@ func marshal(v interface{}, prettify bool) []byte {
return jsonContent
}

func check(err error) {
if nil == err {
return
}

log.Fatalln(err)
panic(err)
}

func parseFlags() (string, string, bool) {
flag.Usage = func() {
fmt.Println("calavera input_directory output_directory")
}

prettify := flag.Bool("prettify", false, "Prettify json output")

flag.Parse()

if len(flag.Args()) != 2 {
log.Fatalln("Input and output directories are mandatory arguments.")
}

inputPath, err := filepath.Abs(flag.Arg(0))
check(err)

outputPath, err := filepath.Abs(flag.Arg(1))
check(err)

return inputPath, outputPath, *prettify
}

func convert(path string, outputDirectory string, extractors []extractor.Extractor, prettify bool) {
creativeWork := schema.NewCreativeWork()

Expand All @@ -111,19 +111,16 @@ func convert(path string, outputDirectory string, extractors []extractor.Extract

jsonContent := marshal(creativeWork, prettify)

outputPath := fmt.Sprint(outputDirectory, "/", path[:len(path)-3], ".jsonld")
outputPath := outputDirectory + "/" + getOutputPath(path)
outputSubdirectory := filepath.Dir(outputPath)

err := os.MkdirAll(outputSubdirectory, dirPerms)
err := os.MkdirAll(outputSubdirectory, DIR_PERMS)
check(err)

err = ioutil.WriteFile(outputPath, jsonContent, filePerms)
err = ioutil.WriteFile(outputPath, jsonContent, FILE_PERMS)
check(err)
}

func check(err error) {
if err != nil {
log.Fatalln(err)
panic(err)
}
func getOutputPath(originalPath string) string {
return originalPath[:len(originalPath)-3] + ".jsonld"
}
21 changes: 12 additions & 9 deletions extractor/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,55 @@ package extractor

import (
"github.com/dunglas/calavera/schema"
"path/filepath"
"strings"
"testing"
)

func TestMarkdown_Extract(t *testing.T) {
creativeWork := schema.NewCreativeWork()

extractor := Markdown{}
err := extractor.Extract(creativeWork, "../fixtures/foo.md")
dir, _ := filepath.Abs("../fixtures")
extractor := NewMarkdown(dir)
err := extractor.Extract(creativeWork, "foo.md")

if err != nil {
t.Error(err)
}

if creativeWork.Name != "Foo" {
t.Errorf("Title should be \"Foo\", but is \"%s\"." + creativeWork.Name)
t.Errorf(`Title should be "Foo", but is "%s"."`, creativeWork.Name)
}

if strings.Contains(creativeWork.Text, ".md") {
t.Error("References to Markdown file must be changed to references to JSON-LD files.")
}

if strings.Contains(creativeWork.Text, "rel=\"nofollow\"") {
if strings.Contains(creativeWork.Text, `rel="nofollow"`) {
t.Error("Links must be followed by spiders.")
}

if !strings.Contains(creativeWork.Text, "class=\"language-php\"") {
if !strings.Contains(creativeWork.Text, `class="language-php"`) {
t.Error("Classes must be preserved.")
}
}

func TestGit_Extract(t *testing.T) {
creativeWork := schema.NewCreativeWork()

extractor := Git{}
err := extractor.Extract(creativeWork, "../fixtures/foo.md")
dir, _ := filepath.Abs("../fixtures")
extractor, _ := NewGit(dir)
err := extractor.Extract(creativeWork, "foo.md")

if err != nil {
t.Error(err)
}

if "" == creativeWork.DateModified {
if nil == creativeWork.DateModified {
t.Error("The creation date must be extracted.")
}

if "" == creativeWork.DateModified {
if nil == creativeWork.DateModified {
t.Error("The modifiation date must be extracted.")
}

Expand Down
97 changes: 63 additions & 34 deletions extractor/git.go
Original file line number Diff line number Diff line change
@@ -1,66 +1,95 @@
package extractor

import (
"bufio"
"log"
"os/exec"
"strings"

"errors"
"github.com/dunglas/calavera/schema"
gogit "gopkg.in/src-d/go-git.v4"
"os"
"path/filepath"
"strings"
)

var gitPath string
// Git extracts metadata from the Git repository containing Markdown files.
type Git struct {
inputDirectory string
gitDirectory string
repository *gogit.Repository
}

func init() {
var err error
func findGitDir(path string) (string, error) {
if _, err := os.Stat(path + "/.git/config"); err == nil {
return path + "/.git", nil
}

gitPath, err = exec.LookPath("git")
if nil != err {
log.Fatalln("git is not available in the PATH. Install it to extract git metadata.")
parentDir := filepath.Dir(path)
if strings.HasSuffix(parentDir, "/") {
return "", errors.New("No Git repository found")
}

return findGitDir(parentDir)
}

// Git extracts metadata from the Git repository containing Markdown files.
type Git struct {
// NewGit returns a new instance properly configured of the Git extractor
func NewGit(inputDirectory string) (*Git, error) {
var err error
var gitDirectory string

if gitDirectory, err = findGitDir(inputDirectory); nil != err {
return nil, err
}

if r, err := gogit.NewFilesystemRepository(gitDirectory); nil == err {
return &Git{
inputDirectory: inputDirectory,
gitDirectory: gitDirectory,
repository: r,
}, nil
}

return nil, err
}

// Extract extracts the list of contributors to the file, and date of modifications.
func (git Git) Extract(creativeWork *schema.CreativeWork, path string) error {
if "" == gitPath {
return nil
}
path, _ = filepath.Rel(filepath.Dir(git.gitDirectory), git.inputDirectory+"/"+path)

cmd := exec.Command(gitPath, "log", "--format=%an;%ae;%aI", path)
stdout, err := cmd.StdoutPipe()
ref, err := git.repository.Head()
if nil != err {
return err
}

if err := cmd.Start(); err != nil {
c, err := git.repository.Commit(ref.Hash())
if nil != err {
return err
}

scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
parts := strings.Split(strings.TrimSpace(scanner.Text()), ";")
revs, err := gogit.References(c, path)
if err != nil {
return err
}

author := schema.NewPerson(parts[0], parts[1])
creativeWork.Author = append([]schema.Person{*author}, creativeWork.Author...)
for _, v := range revs {
if !authorExists(creativeWork.Author, v.Author.Email) {
author := schema.NewPerson(v.Author.Name, v.Author.Email)
creativeWork.Author = append([]schema.Person{*author}, creativeWork.Author...)
}

creativeWork.DateCreated = parts[2]
if "" == creativeWork.DateModified {
creativeWork.DateModified = parts[2]
creativeWork.DateModified = &v.Author.When
if nil == creativeWork.DateCreated {
creativeWork.DateCreated = &v.Author.When
}
}

if err := scanner.Err(); err != nil {
return err
}
return nil
}

if err := cmd.Wait(); err != nil {
log.Fatalln("You are not in a git repository.")
return err
// authorExists tests if an author is already in the list
func authorExists(authors []schema.Person, email string) bool {
for _, p := range authors {
if email == p.Email {
return true
}
}

return nil
return false
}
8 changes: 7 additions & 1 deletion extractor/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,20 @@ import (

// Markdown is an extractor converting the content of .md files to HTML.
type Markdown struct {
inputDirectory string
}

// NewMarkdown creates a new Markdown extractor
func NewMarkdown(inputDirectory string) *Markdown {
return &Markdown{inputDirectory: inputDirectory}
}

// Extract converts the Markdown syntax to HTML in a secure way.
// The generated file is sanitized: all special tags and characters stripped (JavaScript, CSS...) or escaped.
// A "class" attributes containing language indication for syntax highlighting is added to all code snippets.
// All references to ".md" files are converted to links pointing to ".jsonld" files.
func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error {
markdownContent, err := ioutil.ReadFile(path)
markdownContent, err := ioutil.ReadFile(markdown.inputDirectory + "/" + path)
if nil != err {
return err
}
Expand Down
20 changes: 11 additions & 9 deletions schema/creativework.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
package schema

import "time"

// CreativeWork contains all data and metadata of a given page.
type CreativeWork struct {
JsonLd
Name string `json:"name"`
Text string `json:"text"`
About string `json:"about,omitempty"`
Author []Person `json:"author,omitempty"`
DateCreated string `json:"dateCreated,omitempty"`
DateModified string `json:"dateModified,omitempty"`
InLanguage string `json:"inLanguage,omitempty"`
License string `json:"license,omitempty"`
Publisher string `json:"publisher,omitempty"`
Name string `json:"name"`
Text string `json:"text"`
About string `json:"about,omitempty"`
Author []Person `json:"author,omitempty"`
DateCreated *time.Time `json:"dateCreated,omitempty"`
DateModified *time.Time `json:"dateModified,omitempty"`
InLanguage string `json:"inLanguage,omitempty"`
License string `json:"license,omitempty"`
Publisher string `json:"publisher,omitempty"`
}

// NewCreativeWork initializes a new CreativeWork instance with some sensitive default values.
Expand Down

0 comments on commit ec17cf4

Please sign in to comment.