Skip to content

Commit

Permalink
Conversion: Add google drive support (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
EdwardMoyse committed Jun 13, 2024
1 parent abf00f8 commit 86a621c
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 24 deletions.
61 changes: 57 additions & 4 deletions conversion/indico_conversion/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import os
from datetime import timedelta
from urllib.parse import urlparse

import dateutil.parser
import requests
Expand Down Expand Up @@ -62,7 +63,7 @@ def retry_task(task, attachment, exception):

@celery.task(bind=True, max_retries=None)
def submit_attachment_doconverter(task, attachment):
"""Sends an attachment's file to the Doconvert conversion service"""
"""Send an attachment's file to the Doconverter conversion service."""
from indico_conversion.plugin import ConversionPlugin
if ConversionPlugin.settings.get('maintenance'):
task.retry(countdown=900)
Expand Down Expand Up @@ -92,8 +93,60 @@ def submit_attachment_doconverter(task, attachment):
ConversionPlugin.logger.info('Submitted %r to Doconverter', attachment)


@celery.task(bind=True, max_retries=None)
def request_pdf_from_googledrive(task, attachment):
"""Use the Google Drive API to convert a Google Drive file to a PDF."""
from indico_conversion.plugin import ConversionPlugin

# Google drive URLs have this pattern: https://docs.google.com/<TYPE>/d/<FILEID>[/edit]
try:
parsed_url = urlparse(attachment.link_url)
if parsed_url.netloc != 'docs.google.com':
raise ValueError('Not a google docs URL')
file_id = parsed_url.path.split('/')[3]
except (ValueError, IndexError) as exc:
ConversionPlugin.logger.warning('Could not parse URL %s: %s', attachment.link_url, exc)
return

# use requests to get the file from this URL:
mime_type = 'application/pdf'
api_key = ConversionPlugin.settings.get('googledrive_api_key')
request_text = f'https://www.googleapis.com/drive/v3/files/{file_id}/export?mimeType={mime_type}'
try:
response = requests.get(request_text, headers={'x-goog-api-key': api_key})
except requests.HTTPError as exc:
if exc.response.status_code == 404:
ConversionPlugin.logger.warning('Google Drive file %s not found', attachment.link_url)
pdf_state_cache.delete(str(attachment.id))
return
retry_task(task, attachment, exc)
else:
content_type = response.headers['Content-type']
if content_type.startswith('application/json'):
payload = response.json()
try:
error_code = payload['error']['code']
except (TypeError, KeyError):
error_code = 0
if error_code == 404:
ConversionPlugin.logger.info('Google Drive file %s not found (or not public)', attachment.link_url)
else:
ConversionPlugin.logger.warning('Google Drive file %s could not be converted: %s', attachment.link_url,
payload)
pdf_state_cache.delete(str(attachment.id))
return
elif content_type != 'application/pdf':
ConversionPlugin.logger.warning('Google Drive file %s conversion response is not a PDF: %s',
attachment.link_url, content_type)
pdf_state_cache.delete(str(attachment.id))
return
pdf = response.content
save_pdf(attachment, pdf)
db.session.commit()


class RHDoconverterFinished(RH):
"""Callback to attach a converted file"""
"""Callback to attach a converted file."""

CSRF_ENABLED = False

Expand All @@ -118,7 +171,7 @@ def _process(self):

@celery.task(bind=True, max_retries=None)
def submit_attachment_cloudconvert(task, attachment):
"""Sends an attachment's file to the CloudConvert conversion service"""
"""Send an attachment's file to the CloudConvert conversion service."""
from indico_conversion.plugin import ConversionPlugin
if ConversionPlugin.settings.get('maintenance'):
task.retry(countdown=900)
Expand Down Expand Up @@ -243,7 +296,7 @@ def check_attachment_cloudconvert(task, attachment_id, export_task_id):


class RHCloudConvertFinished(RH):
"""Callback to attach a converted file"""
"""Callback to attach a converted file."""

CSRF_ENABLED = False

Expand Down
70 changes: 52 additions & 18 deletions conversion/indico_conversion/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import os
from datetime import timedelta
from urllib.parse import urlparse

from flask import flash, g
from flask_pluginengine import render_plugin_template, uses
Expand All @@ -15,7 +16,7 @@

from indico.core import signals
from indico.core.plugins import IndicoPlugin, plugin_engine, url_for_plugin
from indico.modules.attachments.forms import AddAttachmentFilesForm
from indico.modules.attachments.forms import AddAttachmentFilesForm, AddAttachmentLinkForm
from indico.modules.attachments.models.attachments import AttachmentType
from indico.modules.events.views import WPSimpleEventDisplay
from indico.util.date_time import now_utc
Expand All @@ -26,7 +27,8 @@

from indico_conversion import _, pdf_state_cache
from indico_conversion.blueprint import blueprint
from indico_conversion.conversion import submit_attachment_cloudconvert, submit_attachment_doconverter
from indico_conversion.conversion import (request_pdf_from_googledrive, submit_attachment_cloudconvert,
submit_attachment_doconverter)
from indico_conversion.util import get_pdf_title


Expand Down Expand Up @@ -59,6 +61,8 @@ class SettingsForm(IndicoForm):
filters=[lambda exts: sorted({ext.lower().lstrip('.').strip() for ext in exts})],
description=_('File extensions for which PDF conversion is supported. '
'One extension per line.'))
googledrive_api_key = IndicoPasswordField(_('GoogleDrive API key'), toggle=True,
description=_('API key used for converting files on Google Docs.'))


@uses('owncloud')
Expand All @@ -73,17 +77,19 @@ class ConversionPlugin(IndicoPlugin):
'maintenance': False,
'server_url': '',
'cloudconvert_api_key': '',
'googledrive_api_key': '',
'cloudconvert_sandbox': False,
'cloudconvert_notify_threshold': None,
'cloudconvert_notify_email': '',
'valid_extensions': ['ppt', 'doc', 'pptx', 'docx', 'odp', 'sxi']}

def init(self):
super().init()
self.connect(signals.core.add_form_fields, self._add_form_fields, sender=AddAttachmentFilesForm)
self.connect(signals.core.add_form_fields, self._add_file_form_fields, sender=AddAttachmentFilesForm)
self.connect(signals.core.add_form_fields, self._add_url_form_fields, sender=AddAttachmentLinkForm)
if plugin_engine.has_plugin('owncloud'):
from indico_owncloud.forms import AddAttachmentOwncloudForm
self.connect(signals.core.add_form_fields, self._add_form_fields, sender=AddAttachmentOwncloudForm)
self.connect(signals.core.add_form_fields, self._add_file_form_fields, sender=AddAttachmentOwncloudForm)
self.connect(signals.core.form_validated, self._form_validated)
self.connect(signals.attachments.attachment_created, self._attachment_created)
self.connect(signals.core.after_commit, self._after_commit)
Expand All @@ -97,16 +103,29 @@ def get_blueprints(self):
def get_vars_js(self):
return {'urls': {'check': url_for_plugin('conversion.check')}}

def _add_form_fields(self, form_cls, **kwargs):
def _add_file_form_fields(self, form_cls, **kwargs):
exts = ', '.join(self.settings.get('valid_extensions'))
return 'convert_to_pdf', \
BooleanField(_('Convert to PDF'), widget=SwitchWidget(),
description=_('If enabled, your files will be be converted to PDF if possible. '
'The following file types can be converted: {exts}').format(exts=exts),
default=True)

def _add_url_form_fields(self, form_cls, **kwargs):
if not ConversionPlugin.settings.get('googledrive_api_key'):
return
return 'convert_to_pdf', \
BooleanField(_('Convert to PDF'), widget=SwitchWidget(),
description=_('If enabled, files hosted on Google Drive will be attempted to be converted '
'to PDF. Note that this will only work if the file on Google Drive is public '
'and that it will be converted only once, so any future changes made to it '
'will not be resembled in the PDF stored in Indico.'),
default=True)

def _form_validated(self, form, **kwargs):
classes = [AddAttachmentFilesForm]
if ConversionPlugin.settings.get('googledrive_api_key'):
classes.append(AddAttachmentLinkForm)
if plugin_engine.has_plugin('owncloud'):
from indico_owncloud.forms import AddAttachmentOwncloudForm
classes.append(AddAttachmentOwncloudForm)
Expand All @@ -115,11 +134,21 @@ def _form_validated(self, form, **kwargs):
g.convert_attachments_pdf = form.ext__convert_to_pdf.data

def _attachment_created(self, attachment, **kwargs):
if not g.get('convert_attachments_pdf') or attachment.type != AttachmentType.file:
return
ext = os.path.splitext(attachment.file.filename)[1].lstrip('.').lower()
if ext not in self.settings.get('valid_extensions'):
if not g.get('convert_attachments_pdf'):
return
if attachment.type == AttachmentType.file:
ext = os.path.splitext(attachment.file.filename)[1].lstrip('.').lower()
if ext not in self.settings.get('valid_extensions'):
return
else:
if not ConversionPlugin.settings.get('googledrive_api_key'):
return
parsed_url = urlparse(attachment.link_url)
split_path = parsed_url.path.split('/')
if parsed_url.netloc != 'docs.google.com' or len(split_path) < 5:
# We expect a URL matching this pattern:
# https://docs.google.com/<TYPE>/d/<FILEID>[/edit]
return
# Prepare for submission (after commit)
if 'convert_attachments' not in g:
g.convert_attachments = set()
Expand All @@ -128,20 +157,25 @@ def _attachment_created(self, attachment, **kwargs):
pdf_state_cache.set(str(attachment.id), 'pending', timeout=info_ttl)
if not g.get('attachment_conversion_msg_displayed'):
g.attachment_conversion_msg_displayed = True
flash(_('Your file(s) have been sent to the conversion system. The PDF file(s) will be attached '
'automatically once the conversion finished.').format(file=attachment.file.filename))
if attachment.type == AttachmentType.file:
flash(_('Your file(s) have been sent to the conversion system. The PDF file(s) will be attached '
'automatically once the conversion is finished.'))
elif attachment.type == AttachmentType.link:
flash(_('A PDF file has been requested for your Google drive link. The file will be attached '
'automatically once the conversion is finished.'))

def _after_commit(self, sender, **kwargs):
for attachment, is_protected in g.get('convert_attachments', ()):
if self.settings.get('use_cloudconvert') and not is_protected:
submit_attachment_cloudconvert.delay(attachment)
else:
submit_attachment_doconverter.delay(attachment)
if attachment.type == AttachmentType.file:
if self.settings.get('use_cloudconvert') and not is_protected:
submit_attachment_cloudconvert.delay(attachment)
else:
submit_attachment_doconverter.delay(attachment)
elif attachment.type == AttachmentType.link:
request_pdf_from_googledrive.delay(attachment)

def _event_display_after_attachment(self, attachment, top_level, has_label, **kwargs):
if attachment.type != AttachmentType.file:
return None
if now_utc() - attachment.file.created_dt > info_ttl:
if attachment.file and (now_utc() - attachment.file.created_dt > info_ttl):
return None
if pdf_state_cache.get(str(attachment.id)) != 'pending':
return None
Expand Down
14 changes: 12 additions & 2 deletions conversion/indico_conversion/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@


def get_pdf_title(attachment):
if attachment.type == AttachmentType.link:
return attachment.title
# Must be of type file
ext = os.path.splitext(attachment.file.filename)[1]
if attachment.title.endswith(ext):
return attachment.title[:-len(ext)] + '.pdf'
Expand All @@ -25,12 +28,19 @@ def get_pdf_title(attachment):

def save_pdf(attachment, pdf):
from indico_conversion.plugin import ConversionPlugin
name = os.path.splitext(attachment.file.filename)[0]
if attachment.type == AttachmentType.file:
name = os.path.splitext(attachment.file.filename)[0]
else:
name = attachment.title
title = get_pdf_title(attachment)
pdf_attachment = Attachment(folder=attachment.folder, user=attachment.user, title=title,
description=attachment.description, type=AttachmentType.file,
protection_mode=attachment.protection_mode, acl=attachment.acl)
pdf_attachment.file = AttachmentFile(user=attachment.file.user, filename=f'{name}.pdf',
if attachment.type == AttachmentType.file:
user = attachment.file.user
else:
user = attachment.user
pdf_attachment.file = AttachmentFile(user=user, filename=f'{name}.pdf',
content_type='application/pdf')
pdf_attachment.file.save(pdf)
db.session.add(pdf_attachment)
Expand Down

0 comments on commit 86a621c

Please sign in to comment.