In [3]:
import os
!pip install --upgrade -q gspread

import gspread
import pandas as pd
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [4]:
# PDF processing requirements
!pip install --upgrade -q reportlab pypdf4

from reportlab.lib.units import mm
from reportlab.pdfgen import canvas
from PyPDF4.pdf import PdfFileReader, PdfFileWriter

[K     |████████████████████████████████| 2.7 MB 7.6 MB/s 
[K     |████████████████████████████████| 63 kB 3.1 MB/s 
[?25h  Building wheel for pypdf4 (setup.py) ... [?25l[?25hdone


In [5]:
BASE_PATH = '/content/drive/MyDrive/DCASE/'
CAMERA_READYS_PATH = os.path.join(BASE_PATH, 'DCASE 2021 Workshop Materials', 'CameraReadys')
PROCEEDINGS_PATH = os.path.join(BASE_PATH, 'DCASE 2021 Workshop Materials', 'Proceedings')
MASTER_PLAN_FILE_URL = 'https://docs.google.com/spreadsheets/d/16qBvQB9lF5pKcnYIWwjQj4Q9xEnBkkwMNlK2WVJy9M4/edit#gid=2129398936'

In [6]:
# Utils functions (some from https://stackoverflow.com/questions/25164871/how-to-add-page-numbers-to-pdf-file-using-python-and-matplotlib)

def find_camera_ready_pdf_from_submission_id(submission_id):
  for filename in os.listdir(CAMERA_READYS_PATH):
    if filename.startswith(f'{submission_id}\\Camera'):
      return os.path.join(CAMERA_READYS_PATH, filename)
  return None


def createPagePdf(num, offset_number, tmp):
    c = canvas.Canvas(tmp)
    for i in range(1, num + 1):
        c.setFont('Times-Roman', 10)
        c.drawString((210 // 2) * mm, (7) * mm, str(offset_number + i))
        c.showPage()
    c.save()


def add_page_numbers(pdf_path, out_pdf_path, offset_number=0):
    """
    Add page numbers to a pdf, save the result as a new pdf
    @param pdf_path: path to pdf

    Returns number of pages
    """
    tmp = "__tmp.pdf"

    output = PdfFileWriter()
    with open(pdf_path, 'rb') as f:
        pdf = PdfFileReader(f, strict=False)
        n = pdf.getNumPages()

        # create new PDF with page numbers
        createPagePdf(n, offset_number, tmp)

        with open(tmp, 'rb') as ftmp:
            numberPdf = PdfFileReader(ftmp)
            # iterarte pages
            for p in range(n):
                page = pdf.getPage(p)
                numberLayer = numberPdf.getPage(p)
                # merge number page with actual page
                page.mergePage(numberLayer)
                output.addPage(page)

            # write result
            if output.getNumPages():
                with open(out_pdf_path, 'wb') as f:
                    output.write(f)
        os.remove(tmp)
        return n

In [7]:
# Read data from spreadsheet

wb = gc.open_by_url(MASTER_PLAN_FILE_URL)
sheet = wb.worksheet('Poster sessions')
rows = sheet.get_all_values()
papers_data = {}

In [12]:
# Pre-process data from spreadsheet

paper_keys_used = []
for row in rows[1:]:
  paper_id = row[0]
  if int(paper_id) > 99:
    # Ignore these entries as correspond to "fake" poster IDs used for challenge task posters
    continue
  paper_pdf_path = find_camera_ready_pdf_from_submission_id(paper_id)
  if paper_pdf_path is None:
    raise Exception(f'No PDF for paper id {paper_id}?')
  papers_data[paper_id] = {
      'id': row[0],
      'author': row[2].replace('; ', ' and '),
      'title': row[1],
      'booktitle': 'Proceedings of the Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)',
      'address': 'Barcelona, Spain',
      'month': 'November',
      'year': '2021',
      'pages': 'X--Y',
      'abstract': row[8].replace('\n', ' ').replace('"', '\''),
      '_camera_ready_pdf_path': paper_pdf_path,
      '_authors': row[9],
      '_video': row[5],
  }
  first_author_surname = papers_data[paper_id]['author'].split(' ')[0][:-1]
  paper_key = first_author_surname + '2021'
  if paper_key in paper_keys_used:
    paper_key = paper_key + 'b'
    if paper_key in paper_keys_used:
      paper_key = paper_key[-1] = 'c'
      if paper_key in paper_keys_used:
        paper_key = paper_key[-1] = 'd'
        if paper_key in paper_keys_used:
          raise Exception('Too many papers for same author?')
  paper_keys_used.append(paper_key)
  papers_data[paper_id]['key'] = paper_key
  paper_dest_pdf_name = f'DCASE2021Workshop_{first_author_surname}_{paper_id}.pdf'
  papers_data[paper_id]['_pdf'] = f'../documents/workshop2021/proceedings/{paper_dest_pdf_name}'
  papers_data[paper_id]['_dest_pdf_path'] = os.path.join(PROCEEDINGS_PATH, paper_dest_pdf_name)

In [9]:
# Process proceedings PDF files and copy to destination
acc_n_pages = 0
for count, (paper_id, paper_data) in enumerate(papers_data.items()):
  print(f'Processing paper {count + 1} of {len(papers_data)}')
  n = add_page_numbers(paper_data['_camera_ready_pdf_path'], paper_data['_dest_pdf_path'], acc_n_pages)
  papers_data[paper_id]['pages'] = f'{acc_n_pages + 1}--{acc_n_pages + n}'
  acc_n_pages += n

Processing paper 1 of 47
Processing paper 2 of 47
Processing paper 3 of 47
Processing paper 4 of 47
Processing paper 5 of 47
Processing paper 6 of 47
Processing paper 7 of 47
Processing paper 8 of 47
Processing paper 9 of 47
Processing paper 10 of 47
Processing paper 11 of 47
Processing paper 12 of 47
Processing paper 13 of 47
Processing paper 14 of 47
Processing paper 15 of 47
Processing paper 16 of 47
Processing paper 17 of 47
Processing paper 18 of 47
Processing paper 19 of 47
Processing paper 20 of 47
Processing paper 21 of 47
Processing paper 22 of 47
Processing paper 23 of 47
Processing paper 24 of 47
Processing paper 25 of 47
Processing paper 26 of 47
Processing paper 27 of 47
Processing paper 28 of 47
Processing paper 29 of 47
Processing paper 30 of 47
Processing paper 31 of 47
Processing paper 32 of 47
Processing paper 33 of 47
Processing paper 34 of 47
Processing paper 35 of 47
Processing paper 36 of 47
Processing paper 37 of 47
Processing paper 38 of 47
Processing paper 39 o

In [13]:
# Print the contents of the proceedings .bib file

BIB_FILE_ENTRY_TEMPLATE = """@inproceedings{{{key},
    author = "{author}",
    title = "{title}",
    booktitle = "{booktitle}",
    address = "{address}",
    month = "{month}",
    year = "{year}",
    pages = "{pages}",
    abstract = "{abstract}",
    _pdf = {{{_pdf}}},
    _authors = "{_authors}",
    _video = {{{_video}}}
}}

"""

all_paper_ids = list(papers_data.keys())
bib_file_contents = ""

for paper_id in sorted(all_paper_ids):
  paper_data = papers_data[paper_id]
  bib_file_contents += BIB_FILE_ENTRY_TEMPLATE.format(**paper_data)

print(bib_file_contents)
  

@inproceedings{Lopez2021,
    author = "Lopez, Jose A and Stemmer, Georg and Lopez Meyer, Paulo and Singh, Pradyumna and Del Hoyo Ontiveros, Juan and Cordourier, Hector",
    title = "Ensemble Of Complementary Anomaly Detectors Under Domain Shifted Conditions",
    booktitle = "Proceedings of the Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)",
    address = "Barcelona, Spain",
    month = "November",
    year = "2021",
    pages = "X--Y",
    abstract = "We present our submission to the DCASE2021 Challenge Task 2, which aims to promote research in anomalous sound detection. We found that blending the predictions of various anomaly detectors, rather than relying on well-known domain adaptation techniques alone, gave us the best performance under domain shifted conditions. Our submission is composed of two self-supervised classifier models, a probabilistic model we call NF-CDEE, and an ensemble of the three -- the latter obtained the top rank in the