# `nbgallery` Bulk Uploader - TM351

Quick hack to automate the uploading of notebooks to `nbgallery`.

Uses `selenium` (I couldn't get `mechanize` / `mechanical soup` to work?).

In [56]:
from selenium import webdriver

#Selenium package includes several utilitities
# for waiting until things are ready
#https://selenium-python.readthedocs.io/waits.html
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


In [57]:
driver = webdriver.Chrome()

#Allow the driver to poll the DOM for up to 10s when
# trying to find an element
driver.implicitly_wait(10)

#We might also want to explicitly define wait conditions
# on a particular element
wait = WebDriverWait(driver, 10)

driver.get("http://localhost:3000/")

In [4]:
#Close the browser with:
#driver.close()

## Login

In [1]:
NBGALLERY_EMAIL=''
NBGALLERY_PASSWORD=''

In [59]:
def nbgallery_login(driver, wait, user, pwd):
    ''' Login to nbgallery.
        Return once the login dialogue has disappeared.
    '''
    
    driver.find_element_by_id("gearDropdown").click()
    
    element = driver.find_element_by_id("user_email")
    element.click()
    
    element.clear()
    element.send_keys(user)
    
    element = driver.find_element_by_id("user_password")
    element.clear()
    element.send_keys(pwd)
    element.click()
    
    driver.find_element_by_xpath("//input[@value='Login']").click()
    
    #Wait until the login has been accepted
    #May be better to check for presence of logged in indicator?
    #A crude way would be to test for presence of /Logout/ in page
    #Alternatively, is a new page is loaded following a login?
    wait.until(EC.invisibility_of_element_located((By.ID, 'user_email')))
    

## Bulk Uploader


In [60]:
def nbgallery_upload(driver, wait,
                     path, title='', desc='', tags=None, private=False):
    ''' Upload a notebook.
        Return once the notebook page has loaded.
    '''

    #path is full path to file
    if not path.endswith('.ipynb'):
        print('Not a notebook (.ipynb) file? [{}]'.format(path))
        return
    
    #Part 1
    
    element = wait.until(EC.element_to_be_clickable((By.ID, 'uploadModalButton')))
    element.click()
    
    driver.find_element_by_id("uploadFile").send_keys(path);
    driver.find_element_by_xpath('//*[@id="uploadFileForm"]/div[3]/div/div/label/input').click()
    driver.find_element_by_id("uploadFileSubmit").click()
    
    
    #Part 2
    element = driver.find_element_by_id("stageTitle")
    element.click()
    
    
    #Is there notebook metadata we can search for title?
    if not title:
        title = path.split('/')[-1].replace('.ipynb','')
    element.clear()
    element.send_keys(title)
    
    element = driver.find_element_by_id("stageDescription")
    element.click()
    
    #Is there notebook metadata we can search for description?
    #Any other notebook metadata we could make use of here?
    element.clear()
    #Description needs to be not null
    desc= 'No description.' if not desc else desc
    element.send_keys(desc)
    
    element = driver.find_element_by_id("stageTags-tokenfield")
    element.click()
    
    #Handle various tagging styles
    #Is there notebook metadata we can search for tags?
    tags = '' if not tags else tags
    if isinstance(tags, list):
        tags=','.join(tags)
    tags = tags if tags.endswith(',') else tags+','
    
    element.clear()
    element.send_keys(tags) #need the final comma to set it?
    
    if private:
        driver.find_element_by_id("stagePrivate").click()
        
    driver.find_element_by_xpath('//*[@id="stageForm"]/div[9]/div/div/label/input').click()
    driver.find_element_by_id("stageSubmit").click()
    
    #https://blog.codeship.com/get-selenium-to-wait-for-page-load/
    #Wait for new page to load
    wait.until(EC.staleness_of(driver.find_element_by_tag_name('html')))


## Bulk Upload



In [3]:
import os

d = '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351'

nbdirs = [os.path.join(d, o) for o in os.listdir(d) 
                             if os.path.isdir(os.path.join(d,o))
                              and o.startswith('Part ') and o.endswith('Notebooks')]
nbdirs[:3]

['/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks',
 '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 15 Notebooks',
 '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 01 Notebooks']

In [4]:
#Peek at files in a subdir
nbsubdir = nbdirs[0]
nbs=os.listdir(nbsubdir)
nbsubdir, nbs

('/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks',
 ['20.1 The k-nearest neighbours classifier.ipynb',
  '20.2 The leave-one-out algorithm.ipynb',
  'data'])

In [5]:
#Get individual file paths
#Also need to check file starts with a digit to eg ignore "old ..."
fps = [os.path.join(nbsubdir, nb) for nb in nbs 
     if os.path.isfile(os.path.join(nbsubdir, nb)) and nb.endswith('ipynb')
          and nb[0].isdigit()]
fps

['/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks/20.1 The k-nearest neighbours classifier.ipynb',
 '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks/20.2 The leave-one-out algorithm.ipynb']

In [68]:
#Neater

for nbsubdir in nbdirs[:2]:
    nbs=os.listdir(nbsubdir)
    fps = [os.path.join(nbsubdir, nb) for nb in nbs 
             if os.path.isfile(os.path.join(nbsubdir, nb)) and nb.endswith('.ipynb')]
    tag= '_'.join(nbsubdir.split('/')[-1].split()[:2])

    print(nbsubdir,'\n', tag,'\n', fps, '\n\n')

/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks 
 Part_20 
 ['/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks/20.1 The k-nearest neighbours classifier.ipynb', '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 20 Notebooks/20.2 The leave-one-out algorithm.ipynb'] 


/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 15 Notebooks 
 Part_15 
 ['/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 15 Notebooks/15.4solutions.ipynb', '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 15 Notebooks/15.3solutions.ipynb', '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 15 Notebooks/15.5 Investigating accident rates.ipynb', '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 15 Notebooks/15.2solutions.ipynb', '/Users/tonyhirst/Documents/GitHub/tm351-undercertainty

In [71]:
#Open browser
driver = webdriver.Chrome()
driver.implicitly_wait(10)
wait = WebDriverWait(driver, 10)

driver.get("http://localhost:3000/")

#Login
nbgallery_login(driver, wait, NBGALLERY_EMAIL, NBGALLERY_PASSWORD)

#Upload two notebooks, one after the other
#nbgallery_upload(driver, wait, '/Users/tonyhirst/notebooks/Untitled.ipynb')
#nbgallery_upload(driver, wait, '/Users/tonyhirst/notebooks/Untitled2.ipynb')
for nbsubdir in nbdirs:
    nbs=os.listdir(nbsubdir)
    fps = [os.path.join(nbsubdir, nb) for nb in nbs 
             if os.path.isfile(os.path.join(nbsubdir, nb)) and nb.endswith('.ipynb')]
    tag= '_'.join(nbsubdir.split('/')[-1].split()[:2])
    for fp in fps:
        nbgallery_upload(driver, wait, fp, tags=tag )
    
#Close browser
driver.close()

In [70]:
driver.close()