In [1]:
%load_ext autoreload
%autoreload 2

import genanki
import webscrape as ws
import anki_utils as au

In [2]:
import requests
from bs4 import BeautifulSoup

from tqdm.notebook import tqdm

In [3]:
WIKI_JOURNAL_TRIAL_LIST_URL = "https://www.wikijournalclub.org/wiki/WikiJournalClub:Usable_articles"

In [4]:
class Section:
    def __init__(self, section_header):
        self.header = section_header
        self.info = section_header.next_element.next_element.next_element
        self.title = section_header.next_element
    
    def __repr__(self):
        return self.title.text.strip() + "\n" + self.info.text


class Trial:
    def __init__(self, link):
        self.url = "https://www.wikijournalclub.org" + link.attrs["href"]
        self.soup_link = link
        self.title, self.short_desc = link.parent.text.split(":")
        self.short_desc = self.short_desc.strip()
        self.get_sections()
        self.get_categories()
        
    def __repr__(self):
        s = self.title + ":\n"
        s += "  -" + self.short_desc + "\n"
        s += "  Bottom Line:\n\t" + self.bottom_line.text
        return s
    
    def get_sections(self):
        response = requests.get(self.url)
        self.page_soup = BeautifulSoup(response.text, 'html.parser')
        self.sections = [Section(section) for section in self.page_soup.find_all("span", class_="mw-headline")]
        for section in self.sections:
            section_title = section.title.lower().strip().replace(" ", "_").replace("outcomes", "outcome")
            vars(self)[section_title] = section.info
    
    def get_categories(self):
        self.tags = []
        for page_link in self.page_soup.find_all("a", href=True):
            if "title" in page_link.attrs and "Category" in page_link.attrs["title"] and "Usable" not in page_link.text:
                self.tags.append(page_link.text.strip().lower().replace(" ", "_"))

In [5]:
def get_trials():
    response = requests.get(WIKI_JOURNAL_TRIAL_LIST_URL)
    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.find_all("a", href=True)
    trials = []
    i = 0
    for link in tqdm(links, desc="Scraping clinical trials", unit=" Trials"):
        if "/wiki/" in link.attrs["href"] and list(link.attrs.keys()) == ["href", "title"]:
            if "WikiJournalClub" in link.attrs["href"] or "Usable_articles" in link.attrs["href"]:
                continue
            trials.append(Trial(link))
            i += 1
            if i > 10:
                break
            
    return trials

In [6]:
trials = get_trials()

Scraping clinical trials:   0%|          | 0/1138 [00:00<?, ? Trials/s]

In [7]:
trial = trials[0]

# Anki Cards

In [9]:
import genanki
from cached_property import cached_property
import re
import genanki as ga

import os

class ModelX(genanki.Model):
    def __init__(self, model_id=None, name=None, fields=None, templates=None, css='', type=0):
        super().__init__(model_id, name, fields, templates, css)
        self._type = type

    def to_json(self, now_ts, deck_id):
        j = super().to_json(now_ts, deck_id)
        j["type"] = self._type
        return j

class NoteX(genanki.Note):
    def _cloze_cards(self):
        """
        returns a Card with unique ord for each unique cloze reference
        """
        card_ords = set()
        # find cloze replacements in first template's qfmt, e.g "{{cloze::Text}}"
        cloze_replacements = set(re.findall("{{[^}]*?cloze:(?:[^}]?:)*(.+?)}}", self.model.templates[0]['qfmt']) +
                                 re.findall("<%cloze:(.+?)%>", self.model.templates[0]['qfmt']))
        for field_name in cloze_replacements:
          field_index = next((i for i, f in enumerate(self.model.fields) if f['name'] == field_name), -1)
          field_value = self.fields[field_index] if field_index >= 0 else ""
          # update card_ords with each cloze reference N, e.g. "{{cN::...}}"
          card_ords.update([int(m)-1 for m in re.findall("{{c(\d+)::.+?}}", field_value) if int(m) > 0])

        if card_ords == {}:
            card_ords = {0}

        return([genanki.Card(ord) for ord in card_ords])

    @cached_property
    def cards(self):
        if self.model._type == 1:
            return self._cloze_cards()
        else:
            return super().cards


In [49]:
import constants

In [57]:
def create_qfmt(content):
    return content + constants.ANKING_QFMT_APPEND

def create_afmt(content):
    return content + constants.ANKING_AFMT_APPEND

In [66]:
templates = [{'name': 'Card 1',
              'qfmt': create_qfmt("{{TRIAL_NAME}}<br>{{cloze::Question}}<br>"),
              'afmt': create_afmt('{{TRIAL_NAME}}<br>{{primary_outcome}}<br>{{bottom_line}}<br>'),
            }]

fields = [{"name": "TRIAL_NAME"}, {"name", "Question"}, {"name": "primary_outcome"},
          {"name": "bottom_line"}]

trial_cards_model = ModelX(1371313, "show_teach_trial_model", fields=fields, templates=templates, css=CSS)

In [71]:
trial_deck = ga.Deck(13713131, 'clinical_trials')

model = trial_cards_model
deck = trial_deck
media_files = []

names = {}

for i, trial in tqdm(enumerate(trials), desc="Creating Cards", unit=" Cards", total=len(trials)):
    if trial.title in names:
        names[trial.title] += 1
#         trial.title = trial.title + f"_{names[trial.title]}"
    else:
        names[trial.title] = 1
    trial.save_name = f"{i:0003}_" + trial.title.lower().replace(" ", "_").replace("/", "_")
    note = NoteX(model=trial_cards_model,
                   fields=[f"{trial.title} Trial:\n",
                           f"question {{c1::{trial.title}}}",
                           trial.primary_outcome.text,
                           trial.bottom_line.text],
                  tags=trial.tags)
    deck.add_note(note)

Creating Cards:   0%|          | 0/11 [00:00<?, ? Cards/s]

In [72]:
trial_package = genanki.Package(trial_deck)

trial_package.write_to_file('clinical_trials.apkg')

TypeError: 'set' object does not support item assignment