-
Notifications
You must be signed in to change notification settings - Fork 686
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
129 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import os | ||
import re | ||
import shutil | ||
import tempfile | ||
|
||
from ..converter import KnowledgePostConverter | ||
|
||
|
||
class DocxConverter(KnowledgePostConverter): | ||
|
||
_registry_keys = ['docx'] | ||
|
||
@property | ||
def dependencies(self): | ||
# Dependencies required for this converter on top of core knowledge-repo dependencies | ||
return ['pypandoc'] | ||
|
||
def from_file(self, filename, **opts): | ||
wd = tempfile.mkdtemp() | ||
target_file = os.path.join(wd, 'post.md') | ||
try: | ||
import pypandoc | ||
|
||
pypandoc.convert_file( | ||
filename, | ||
format='docx', | ||
to='markdown', | ||
outputfile=target_file, | ||
extra_args=[ | ||
'--standalone', | ||
'--wrap=none', | ||
'--extract-media={}'.format(wd) | ||
] | ||
) | ||
|
||
with open(target_file) as f: | ||
md = f.read() | ||
|
||
# Image embeddings exported from docx files have fixed sizes in inches | ||
# which browsers do not understand. We remove these annotations. | ||
md = re.sub('(\!\[\]\([^\)]+\))\{[^\}]+\}', lambda m: m.group(1), md) | ||
|
||
# Write markdown content to knowledge post (images will be extracted later) | ||
self.kp_write(md) | ||
finally: | ||
shutil.rmtree(wd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import os | ||
import re | ||
import sys | ||
import time | ||
|
||
import webbrowser | ||
|
||
import knowledge_repo | ||
import subprocess | ||
|
||
from .docx import DocxConverter | ||
|
||
|
||
class GDocConverter(DocxConverter): | ||
_registry_keys = ['gdoc'] | ||
|
||
def _find_doc(self, path, after=None): | ||
count = 0 | ||
while count < 60: | ||
count += 1 | ||
for filename in os.listdir(path): | ||
if filename.endswith('.docx'): | ||
fpath = os.path.join(path, filename) | ||
if os.path.getmtime(fpath) > after: | ||
return fpath | ||
time.sleep(1) | ||
raise RuntimeError("Cannot find document.") | ||
|
||
def from_file(self, url, download_path=None, **opts): | ||
m = re.match('https://docs.google.com/document/d/(?P<doc_id>[^/]+)/', url) | ||
|
||
if not m: | ||
raise ValueError("Invalid Google Docs url.") | ||
|
||
doc_id = m.group('doc_id') | ||
download_url = "https://docs.google.com/document/d/{doc_id}/export?format=doc".format(doc_id=doc_id) | ||
|
||
time_start = time.time() | ||
webbrowser.open(download_url) | ||
|
||
time.sleep(2) | ||
|
||
download_path = download_path or os.path.expanduser('~/Downloads') | ||
filename = self._find_doc(download_path, after=time_start) | ||
|
||
DocxConverter.from_file(self, filename, **opts) | ||
|
||
headers = self.kp.headers | ||
if headers['title'].startswith('[]'): | ||
headers['title'] = re.sub('\[\]\{[^\}]+\}', '', headers['title']) | ||
if headers['subtitle'].startswith('[]'): | ||
headers['subtitle'] = re.sub('\[\]\{[^\}]+\}', '', headers['subtitle']) | ||
self.kp.update_headers(**headers) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import re | ||
|
||
from ..converter import KnowledgePostConverter | ||
|
||
|
||
class ProxyConverter(KnowledgePostConverter): | ||
|
||
_registry_keys = ['proxy'] | ||
|
||
def from_file(self, url, **opts): | ||
|
||
# Deal with special cases, whereby url should be mutated before being | ||
# added to post headers. | ||
|
||
# Google presentations should be embedded in "embed" mode. | ||
gpres = re.match('^https://docs.google.com/presentation/d/(?P<pres_id>[^/]+)/edit(?P<slide_query>.*)$', url) | ||
if gpres: | ||
url = "https://docs.google.com/presentation/d/{}/embed{}".format(*gpres.groups()) | ||
|
||
self.kp_write("", headers={'proxy': url}) |