-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat(document): add a way to convert pdf to docx using libreoffice * feat(documents): add the ability to convert docx to pdf using libreoffice * test(document): test both new features * ci(github-actions): install libreoffice on github actions * refactor(files): add a new method to the file interface to support mime types as they can differ from the know extensions * refactor(documents): add a new constant to the document package for docx * refactor(files): add the new method to every struct that implements the file interface * build(docker): install libreoffice at the release stage * docs(readme): add a new table on what's new in terms of file conversion * build(dockerfile): replace debian bookworm with debian trixie just to have more up to date image * refactor(pdf): add two buffers as stdout and stderr to see what's goin on at the time to convert pdf to docx * fix(pdf): use %q to make the filename a double-quoted string safely escaped with Go syntax * refactor(pdf): print the stderr when it is not empty * refactor(docx): replicate the changes added to the pdf file
- Loading branch information
1 parent
7ba6971
commit 24d6ac1
Showing
16 changed files
with
572 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,200 @@ | ||
package documents | ||
|
||
import "errors" | ||
import ( | ||
"archive/zip" | ||
"bytes" | ||
"errors" | ||
"fmt" | ||
"io" | ||
"log" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"slices" | ||
"strings" | ||
) | ||
|
||
type Docx struct{} | ||
// Docx struct implements the File and Document interface from the file package. | ||
type Docx struct { | ||
filename string | ||
compatibleFormats map[string][]string | ||
compatibleMIMETypes map[string][]string | ||
OutDir string | ||
} | ||
|
||
// NewDocx returns a pointer to Docx. | ||
func NewDocx(filename string) *Docx { | ||
d := Docx{ | ||
filename: filename, | ||
compatibleFormats: map[string][]string{ | ||
"Document": { | ||
PDF, | ||
}, | ||
}, | ||
compatibleMIMETypes: map[string][]string{ | ||
"Document": { | ||
PDF, | ||
}, | ||
}, | ||
} | ||
|
||
return &d | ||
} | ||
|
||
func (p *Docx) SupportedFormats() map[string][]string { | ||
return make(map[string][]string) | ||
// SupportedFormats returns a map witht the compatible formats that Docx is | ||
// compatible to be converted to. | ||
func (d *Docx) SupportedFormats() map[string][]string { | ||
return d.compatibleFormats | ||
} | ||
|
||
func (p *Docx) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, error) { | ||
// SupportedMIMETypes returns a map witht the compatible MIME types that Docx is | ||
// compatible to be converted to. | ||
func (d *Docx) SupportedMIMETypes() map[string][]string { | ||
return d.compatibleMIMETypes | ||
} | ||
|
||
func (d *Docx) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, error) { | ||
compatibleFormats, ok := d.SupportedFormats()[fileType] | ||
if !ok { | ||
return nil, fmt.Errorf("file type not supported: %s", fileType) | ||
} | ||
|
||
if !slices.Contains(compatibleFormats, subType) { | ||
return nil, fmt.Errorf("sub-type not supported: %s", subType) | ||
} | ||
|
||
switch strings.ToLower(fileType) { | ||
case documentType: | ||
switch subType { | ||
case PDF: | ||
var ( | ||
stdout bytes.Buffer | ||
stderr bytes.Buffer | ||
) | ||
|
||
docxFilename := filepath.Join("/tmp", d.filename) | ||
pdfFileName := fmt.Sprintf( | ||
"%s.pdf", | ||
strings.TrimSuffix(d.filename, filepath.Ext(d.filename)), | ||
) | ||
tmpPdfFileName := filepath.Join("/tmp", fmt.Sprintf( | ||
"%s.pdf", | ||
strings.TrimSuffix(d.filename, filepath.Ext(d.filename)), | ||
)) | ||
|
||
// Parses the file name of the Zip file. | ||
zipFileName := filepath.Join("/tmp", fmt.Sprintf( | ||
"%s.zip", | ||
strings.TrimSuffix(d.filename, filepath.Ext(d.filename)), | ||
)) | ||
|
||
docxFile, err := os.Create(docxFilename) | ||
if err != nil { | ||
return nil, fmt.Errorf( | ||
"error creating file to store the incoming docx locally %s: %w", | ||
d.filename, | ||
err, | ||
) | ||
} | ||
defer docxFile.Close() | ||
|
||
if _, err := docxFile.Write(fileBytes); err != nil { | ||
return nil, fmt.Errorf( | ||
"error storing the incoming pdf file %s: %w", | ||
d.filename, | ||
err, | ||
) | ||
} | ||
|
||
tmpPdfFile, err := os.Create(tmpPdfFileName) | ||
if err != nil { | ||
return nil, fmt.Errorf( | ||
"error at creating the pdf file to store the pdf content: %w", | ||
err, | ||
) | ||
} | ||
|
||
cmdStr := "libreoffice --headless --convert-to pdf:writer_pdf_Export --outdir %s %q" | ||
cmd := exec.Command( | ||
"bash", | ||
"-c", | ||
fmt.Sprintf(cmdStr, "/tmp", docxFilename), | ||
) | ||
|
||
cmd.Stdout = &stdout | ||
cmd.Stderr = &stderr | ||
|
||
if err := cmd.Run(); err != nil { | ||
return nil, fmt.Errorf( | ||
"error converting docx to pdf using libreoffice: %s", | ||
err, | ||
) | ||
} | ||
|
||
if stderr.String() != "" { | ||
return nil, fmt.Errorf( | ||
"error converting docx to pdf calling libreoffice: %s", | ||
stderr.String(), | ||
) | ||
} | ||
|
||
log.Println(stdout.String()) | ||
|
||
tmpPdfFile.Close() | ||
|
||
tmpPdfFile, err = os.Open(tmpPdfFileName) | ||
if err != nil { | ||
return nil, fmt.Errorf( | ||
"error at opening the pdf file: %w", | ||
err, | ||
) | ||
} | ||
defer tmpPdfFile.Close() | ||
|
||
// Creates the zip file that will be returned. | ||
archive, err := os.Create(zipFileName) | ||
if err != nil { | ||
return nil, fmt.Errorf( | ||
"error at creating the zip file to store the pdf file: %w", | ||
err, | ||
) | ||
} | ||
|
||
// Creates a Zip Writer to add files later on. | ||
zipWriter := zip.NewWriter(archive) | ||
|
||
w1, err := zipWriter.Create(pdfFileName) | ||
if err != nil { | ||
return nil, fmt.Errorf( | ||
"eror at creating a zip file: %w", | ||
err, | ||
) | ||
} | ||
|
||
if _, err := io.Copy(w1, tmpPdfFile); err != nil { | ||
return nil, fmt.Errorf( | ||
"error at writing the pdf file content to the zip writer: %w", | ||
err, | ||
) | ||
} | ||
|
||
// Closes both zip writer and the zip file after its done with the writing. | ||
zipWriter.Close() | ||
archive.Close() | ||
|
||
// Reads the zip file as an slice of bytes. | ||
zipFile, err := os.ReadFile(zipFileName) | ||
if err != nil { | ||
return nil, fmt.Errorf("error reading zip file: %v", err) | ||
} | ||
|
||
return zipFile, nil | ||
} | ||
} | ||
|
||
return nil, errors.New("not implemented") | ||
} | ||
|
||
func (p *Docx) DocumentType() string { | ||
func (d *Docx) DocumentType() string { | ||
return DOCX | ||
} |
Oops, something went wrong.