In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

This javascript makes sure that the output does not condense into a window that must be scrolled through

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
# Import necessary packages

import requests as req
import plotly.graph_objs as go
import pandas as pd
import plotly.plotly as py
from IPython.display import display
import io

In [4]:
# Create User GUI Checkbox interface class

import Tkinter
import tkMessageBox

class MyGUI:
    def __init__(self,names,title):
        # Create the main window.
        self.main_window = Tkinter.Tk()
        self.main_window.title(title)
        self.main_window.geometry("600x500")

        # Create two frames. One for the checkbuttons
        # and another for the regular Button widgets.
        self.top_frame = Tkinter.Frame(self.main_window)
        self.bottom_frame = Tkinter.Frame(self.main_window)
        
        # Create the Checkbuttons
        self.names = {}
        self.var_dict = {}
        self.button_dict = {}
        count = 0
        for name in names:
            self.var_dict["var{0}".format(count)] = Tkinter.IntVar()
            self.var_dict["var{0}".format(count)].set(0)
            self.names["var{0}".format(count)] = name
            self.button_dict["button{0}".format(count)] = Tkinter.Checkbutton(self.top_frame, \
                                                text = name, variable=self.var_dict["var{0}".format(count)])
            self.button_dict["button{0}".format(count)].pack()
            count += 1
        
        # Create an OK button and a Confirm button.
        self.confirm_button = Tkinter.Button(self.bottom_frame, \
                      text='Confirm', command=self.show_choice)
        self.ok_button = Tkinter.Button(self.bottom_frame, \
                      text='OK', command=self.main_window.destroy)

        # Pack the Confirm and OK Buttons.
        self.confirm_button.pack(side='left')
        self.ok_button.pack(side='left')

        # Pack the frames.
        self.top_frame.pack()
        self.bottom_frame.pack()
        
        # Start the mainloop.
        Tkinter.mainloop()

    # The show_choice method is the callback function for the Confirm button.
    def show_choice(self):
        # Create a message string.
        self.message = 'You selected:\n'

        # Determine which Checkbuttons are selected and
        # build the message string accordingly.
        for key in self.var_dict:
            if self.var_dict[key].get() == 1:
                self.message = self.message + self.names[key] + "\n"
        
        # Display the message in an info dialog box.
        tkMessageBox.showinfo('Selection', self.message)
        
    # The return_choice method returns the names of the choices made by the user in an array 
    def return_choice(self):
        choice = []
        for key in self.var_dict:
            if self.var_dict[key].get() == 1:
                choice.append(self.names[key])    
        return choice

In [5]:
## This code block defines necessary functions used in the main loop.

# This function creates an array containing the titles of all of the experiments using an array of
# BeautifulSoup Tags.  It then returns the array
def get_experiments(data):
    experiments = []
    for item in data:
        experiments.append(item.text)
    return experiments

# This function creates a Checkbox GUI in which the user chooses one experiment to analyze.  Note that if
# multiple experiments are selected, only the first will be analyzed.  It then returns the choice.
def choose_experiment(experiments):
    Gui = MyGUI(experiments, "Choose one experiment to analyze")
    return Gui.return_choice()

# This function obtains the datasets' titles, descriptions, and links by sifting through the BeautifulSoup
# ResultSet.  It returns a dictionary matching the datasets with their links, along with an array of the datasets.
def get_dataSets(rows):
    count = 0
    dataDict = {}
    datasets = []
    for iteration in range(len(rows)/5):
        target = rows[count].text
        link = rows[count].find("a").get("href")
        dataCols = rows[count+4].text
        info = target + ":  " + dataCols
        dataDict[info] = link
        datasets.append(info)
        count += 5
    return dataDict, datasets

# This function matches the experiment choice with the url of the link to the experiment, and then returns
# the BeautifulSoup object of that link along with setting dataset_loop=True
def Exp_Loop(filtered,exp_choice):
    for item in filtered:
        if item.text == exp_choice[0]:
            url = 'https://misportal.jlab.org%s' %(item.get("href"))

    r_exp = req.get(url)

    soupData = bs(r_exp.content, "html5lib")
    dataset_loop = True
    return soupData,dataset_loop

# This function uses the function get_dataSets, and then creaes a Gui for the user to choose one dataset to analyze.
# It then creates a BeautifulSoup object for the dataset link, and then downloads the csv file of the data
# This is then converted to a pandas dataframe, and is returned along with plot_loop=True and data_choice
def Dataset_Loop(soupData):
        table = soupData.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="dsTable") 
        #rows_tr = table.findAll(lambda tag: tag.name=='tr')
        rows_td = table.findAll(lambda tag: tag.name=='td')

        dataDict, datasets = get_dataSets(rows_td)

        secondGui = MyGUI(datasets, "Choose one dataset to analyze")
        data_choice = secondGui.return_choice()

        url2 = 'https://misportal.jlab.org%s' %(dataDict[data_choice[0]])

        r_data = req.get(url2)

        soup3 = bs(r_data.content, "html5lib")

        downloads = soup3.find("div", id = "downloadOptions")

        url3= "https://misportal.jlab.org%s" %(downloads.find("a").get("href"))
        s = req.get(url3).content
        c=pd.read_csv(io.StringIO(s.decode('utf-8')))
        plot_loop = True
        return c,plot_loop,data_choice
    

In [6]:
## This code block defines functions used in the plot_loop

# This function creates GUIs for the user to select most of the plot options.  It then returns x, y, and
# the type of the plot
def Plot_Options():
    xGui = MyGUI(pandaDF.columns, "Choose one variable for the x axis")
    yGui = MyGUI(pandaDF.columns, "Choose one or multiple variables for the y axis")
    x = xGui.return_choice()
    y = yGui.return_choice()
    typeOfPlotGui = MyGUI(["Line Chart","Scatterplot with Error Bars"],"What kind of plot would you like?")
    typeOfPlot = typeOfPlotGui.return_choice()[0]
    return x,y,typeOfPlot

# This function creates the line chart figure and returns it for display in the loop
def Make_Line_Chart(x,y,exp_choice,data_choice):
    dataList = []
    for yItem in y:
        obj = go.Scatter(
        x=sortedFrame[x[0]], y=sortedFrame[yItem],
        line=go.Line(
            width=3),
        name='%s vs %s' %(x[0],yItem))

        dataList.append(obj)

    data = go.Data(dataList)
    length = len(dataList)
    layout = go.Layout(             # all "layout" attributes: /python/reference/#layout
        title="Experiment:  " + exp_choice[0] + "<br>" + "Data:  " + data_choice[0].replace("\n","<br>"),
        updatemenus=list([
            dict(
                x=-0.05,
                y=1,
                yanchor='top',
                buttons=list([
                    dict(
                        args=['visible', [True]*length],
                        label='Total Graph',
                        method='restyle'
                    )
                ]),
            )
        ]),
    )

    figure = go.Figure(data=data, layout=layout)
    return figure

# This function creates the Scatterplot figure and returns it for display in the loop
def Make_Scatterplot(x,y,exp_choice,data_choice,sortedFrame):
    errorGui = MyGUI(sortedFrame.columns, "Choose errorbar data")
    errors = errorGui.return_choice()
    dataList = []
    error = {}
    for yItem in y:
        length = len(yItem)
        for item in errors:
            if yItem == item[:length]:
                error[yItem] = item

        try:
            obj = go.Scatter(
                x=sortedFrame[x[0]], y=sortedFrame[yItem],
                error_y = dict(
                    type = "data",
                    array = sortedFrame[error[yItem]],
                    visible = True
                ),
                mode='markers',
                name='%s vs %s' %(x[0],yItem)
            )
        except NameError:
            obj = go.Scatter(
            x=sortedFrame[x[0]], y=sortedFrame[yItem],
                name ='%s vs %s' %(x[0],yItem))

        dataList.append(obj)

    data = go.Data(dataList)
    length = len(dataList)
    layout = go.Layout(             # all "layout" attributes: /python/reference/#layout
        title="Experiment:  " + exp_choice[0] + "<br>" + "Data:  " + data_choice[0].replace("\n","<br>"),
    )

    figure = go.Figure(data=data, layout=layout)
    return figure

# This function creates a GUI for the user to select what to do next.
# It then assigns the boolean variables according to the choice selected, returning them to the main loop.
def Next_Run(exp_loop,dataset_loop,plot_loop):
    runGui = MyGUI(["Choose different experiment","Choose different dataset for same experiment",\
                         "Choose different plot options for same dataset", "End Run"],\
                        "What would you like to do next?")
    run = runGui.return_choice()

    if run[0] == "Choose different experiment":
        exp_loop = True
        dataset_loop = False
        plot_loop = False
    elif run[0] == "Choose different dataset for same experiment":
        exp_loop = True
        dataset_loop = True
        plot_loop = False
    elif run[0] == "End Run":
        exp_loop = False
        dataset_loop = False
        plot_loop = False
    return exp_loop,dataset_loop,plot_loop

In [7]:
# Grab experiment list page
r = req.get('https://misportal.jlab.org/jamDb/')

Between $\text{<tbody>}$ and $\text{</tbody>}$ contains all of the information that we need from this page, including links to follow, names of experiments, and descriptions.  href is the link reference.
name is between $\text{<a>}$ and $\text{</a>}$
description is line below the experiment line, or next $\text{<td>}$ entry.

Looks like maybe should use BeautifulSoup instead to be able to better deal with the data

In [8]:
# convert to BeautifulSoup object
from bs4 import BeautifulSoup as bs

soup = bs(r.content, "html5lib")

Get rid of junk data that we don't want

In [9]:
g_data = soup.find_all("a")
filtered = g_data[9:-2]
filtered = filtered[7:]
experiments = get_experiments(filtered)

Now we start main loop that will allow user to graph multiple times

In [10]:
exp_loop = True

while exp_loop:
    
    exp_choice = choose_experiment(experiments)

    soupData,dataset_loop = Exp_Loop(filtered,exp_choice)
    
    while dataset_loop:
        
        pandaDF,plot_loop,data_choice = Dataset_Loop(soupData)        
        
        while plot_loop:

            x,y,typeOfPlot = Plot_Options()
            sortedFrame = pandaDF.sort_values(x[0])
            
            if typeOfPlot == "Line Chart":
                figure = Make_Line_Chart(x,y,exp_choice,data_choice)
                display(py.iplot(figure))
            
            else:
                figure = Make_Scatterplot(x,y,exp_choice,data_choice,sortedFrame)
                display(py.iplot(figure))                
                         
            exp_loop,dataset_loop,plot_loop = Next_Run(exp_loop,dataset_loop,plot_loop)