In [1]:
# Imports
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, clear_output
import ipywidgets
import glob
import csv
import numpy as np


# Names for things
# Make a name for the data to go into
info = None
# Make names to hold lists for categorizing the parameters
changers = None
constants = None
measures = None
# Default working directory
wd = 'C:/Users/'

# Plot width
plotwidth = 850

# Helper function definitions

def id_headers(info):
    """ Work out which column names are varying params, which are static, and which are parsed stats
    
    PARAMS:
        info, DataFrame:
            The info dataframe returned by the parser.
            
    RETURNS:
        (indep_vars, constants, measures), tuple:
            A 3-tuple of the column names of the varying params, static params, and parsed stats
    """
    # Make a list to store the various categories
    indep_vars = [] # parameters that change
    constants = [] # parameters that don't change
    measures = [] # data calculated in parsing
    # Iterate through all the columns in info
    for col in info:
        # And get the ones that are parameters (are all upper-case)
        if col.isupper():
            # Find how many different values that parameter takes over the course of the run
            n=len(info[col].unique())
            # If it's more than one
            if n > 1:
                # Then the parameter varies and we want to store it in indep_vars.
                indep_vars.append(col)
            else: 
                # The parameter doesn't very and we want it in constants.
                constants.append(col)
        else:
            # It's not a parameter
            # Check if it's a "bookkeeping" column
            if (col != "Path") and (col != "Unnamed: 0"):
                # It's a calculated data point and we store it in measures
                measures.append(col)
    # Now we return the lists we've made.
    return indep_vars, constants, measures

def plots(runname):
    """ Show the full complement of plots for a given runname"""
    # Get the run you want

    d_file = f"{wdir.value}/{folder.value}/{runname}_data.csv"
    p_file = f"{wdir.value}/{folder.value}/{runname}_params.csv"

    # First get the stats we want from the datafile
    data = pd.read_csv(d_file, engine='python')

    # Then get the parameters
    param_file = open(p_file,'r')
    reader = csv.reader(param_file)
    params = {rows[0]:rows[1] for rows in reader}
    param_file.close()

    # The plots

    stats = data

    iterations = data["Iteration"].max()


    plots = ["Signed EE of Polymers by Iteration","Proportion of Bond Types",
           "Homochirality vs Length",
           "Homochirality vs Age",
           "Homochirality vs Length through Time","Length histogram over time",
           "Length by homochirality through age","Age by homochirality through length"]
    
    children = [ipywidgets.Output() for name in plots]
    
    tb = ipywidgets.Tab()
    tb.children = children
    display(tb)
    
    with children[plots.index("Signed EE of Polymers by Iteration")]:
        clear_output()
        df = stats
        counts = df.groupby("Iteration")['Signed ee'].value_counts().reset_index(name='count')
        fig = px.scatter(counts, x="Iteration", y="Signed ee", size='count')
        fig.update_layout(width=plotwidth)
        fig.show()


    with children[plots.index("Proportion of Bond Types")]:
        clear_output()

        df = stats
        lefts = df.groupby("Iteration")["#LeftHomochiral"].sum().rename("LL")
        rights = df.groupby("Iteration")["#RightHomochiral"].sum().rename("RR")
        total = (df.groupby("Iteration")['Length'].sum() - df.groupby("Iteration")['Length'].count()).rename("Total")
        bondcounts = pd.DataFrame([lefts, rights, total]).transpose()
        bondcounts["LR"] = bondcounts["Total"] - bondcounts["RR"] - bondcounts["LL"]
        bondcounts = bondcounts.apply(lambda x : x / bondcounts["Total"])

        fig = go.Figure()
        fig.add_trace(go.Scatter(y=bondcounts["LL"],
                            mode='lines',
                            name='left homochiral'))
        fig.add_trace(go.Scatter(y=bondcounts["RR"],
                            mode='lines',
                            name='right homochiral'))
        fig.add_trace(go.Scatter(y=bondcounts["LR"],
                            mode='lines', name='heterochiral'))
        fig.update_layout(title='Proportion of LL, RR, and LR bonds by iteration',
                        xaxis_title='Iteration',
                        yaxis_title='Proportion')
        fig.update_yaxes(range=[0, 1])
        fig.update_layout(width=plotwidth)
        fig.show()

    with children[plots.index("Homochirality vs Length through Time")]:
        clear_output()
        df = stats
        maxlen = max(df["Length"])

        fig = px.scatter(df, x="Length", y="%Homochirality",animation_frame="Iteration",
                        range_x=[-1,maxlen+1],range_y=[-0.01,1.2])
        fig.update_layout(width=plotwidth)
        fig.show()

    with children[plots.index("Homochirality vs Length")]:
        clear_output()
        df = stats
        df = df.groupby("Length")['%Homochirality'].value_counts().reset_index(name='count')
        fig = px.scatter(df, x="Length",y="%Homochirality",size='count')
        fig.update_traces(marker=dict(line=dict(color='DarkSlateGrey')),
                        selector=dict(mode='markers'))
        fig.update_layout(width=plotwidth)
        fig.show()

    with children[plots.index("Homochirality vs Age")]:
        clear_output()
        df = stats
        df = df.groupby("Age")['%Homochirality'].value_counts().reset_index(name='count')
        fig = px.scatter(df, x="Age",y="%Homochirality",size='count')
        fig.update_traces(marker=dict(line=dict(color='DarkSlateGrey')),
                            selector=dict(mode='markers'))
        # This gets the mode of col b for each value of col a
        modes = df.groupby("Age")["%Homochirality"].agg(lambda x:x.value_counts().index[0])
        fig.add_trace(go.Scatter(y=modes, mode="markers"))
        fig.update_layout(width=plotwidth)
        fig.show()

    with children[plots.index("Length by homochirality through age")]:
        clear_output()
        df = stats
        fig = px.scatter(df, x="Length", y="%Homochirality", animation_frame="Age", hover_name="%Homochirality", range_x=[1,maxlen+1])
        fig.update_layout(width=plotwidth)
        fig.show()

    with children[plots.index("Age by homochirality through length")]:
        clear_output()
        df = stats
        fig = px.scatter(df, x="Age", y="%Homochirality", animation_frame="Length", hover_name="%Homochirality", range_y=(0,1), range_x=[-1,iterations+1])
        fig.update_layout(width=plotwidth)
        fig.show()

    with children[plots.index("Length histogram over time")]:
        clear_output()
        df = stats

        # Just the polymers please
        df = df[df["Length"]>1]

        # Find the largest count overall
        count_iteration = df.groupby("Iteration")["Length"].value_counts()
        biggest = max(count_iteration)

        fig = px.histogram(df, x="Length", animation_frame="Iteration",range_x=(0,max(df["Length"])),
                            nbins=max(df["Length"]))
        fig.update_layout(width=plotwidth)
        fig.show()
        
    for text in plots:
        i = plots.index(text)
        tb.set_title(i, text)
        
        
    

    
    
###############################################

# Functions to calculate stats in data parsing

def max_len(df):
    return max(df["Length"])

def longest_chain(df):
    try:
        return df["Longest chain length"].dropna().max()
    except: 
        return None
    
def get_steady_state(pandas_series,epsilon=0.01,window_size=30, step=10):
    # A place to keep the calculated means
    means = []
    stdevs = []
    # Make a window
    window = np.array([0,window_size])
    # Put this in a loop
    while window[1] <= len(pandas_series):
        # Get all the data in the window
        frame = pandas_series[window[0]:window[1]]
        # Get the average and store it
        means.append(frame.mean())
        stdevs.append(frame.std())
        # increment the window
        window += step
    # find the index at which all subsequent stdevs are less than epsilon
    index = 0
    while index < len(stdevs):
        if all([sd < epsilon for sd in stdevs[index:]]):
            return round(sum(means[index:])/len(means[index:]),3), index
        else:
            index += 1
    return (None, None)

def get_chirality_ratio(df,epsilon=0.01,window_size=30, step=10):
    """ Suppose we want the steady state of the chirality ratio of a run. """
    # This makes a dataframe of the bond types in a run (from plots)
    lefts = df.groupby("Iteration")["#LeftHomochiral"].sum().rename("LL")
    rights = df.groupby("Iteration")["#RightHomochiral"].sum().rename("RR")
    total = (df.groupby("Iteration")['Length'].sum() - df.groupby("Iteration")['Length'].count()).rename("Total")
    bondcounts = pd.DataFrame([lefts, rights, total]).transpose()
    bondcounts["LR"] = bondcounts["Total"] - bondcounts["RR"] - bondcounts["LL"]
    bondcounts = bondcounts.apply(lambda x : x / bondcounts["Total"])

    # Then get the steady states of the columns
    steady_LL = get_steady_state(bondcounts["LL"],epsilon=epsilon,window_size=window_size,step=step)
    steady_RR = get_steady_state(bondcounts["RR"],epsilon=epsilon,window_size=window_size,step=step)
    steady_LR = get_steady_state(bondcounts["LR"],epsilon=epsilon,window_size=window_size,step=step)

    # And return!
    return steady_LL, steady_RR, steady_LR
    
###############################################


# Functions for widgets

def get_data(b):
    """Run on click for Get Data button"""
    progress_bar.value = 0
    global info
    batch_path = f"{wdir.value}/{folder.value}/{batch.value}"
    # Do we want to force a re-parse?
    if not reparse.value:
        try:
            with status_text:
                clear_output()
                print("loading info")
            info = pd.read_csv(f"{batch_path}_info.csv")
        except FileNotFoundError:
            # parse
            info = parse(batch_path)
            # export
            info.to_csv(f"{batch_path}_info.csv")
    else:
        # parse
        info = parse(batch_path)
        # export
        info.to_csv(f"{batch_path}_info.csv")
    
    # Get changers, constants, measures
    global changers
    global constants
    global measures
    changers, constants, measures = id_headers(info)
    
    
    # Update exploration tabs
    
    with boxplot_box:
        clear_output()
        boxplot()
    
    with singlerun_box:
        clear_output()
        single_run()
    
    
    # Say "I'm done!"
    with status_text:
        clear_output()
        print("ok done")
    progress_bar.value = 100
    with param_print_box:
        for col in info:
            if col.isupper():
                print(f"{col}: {info[col].unique()}")
                
    
            
def parse(path):
    """ Parse a batch (called in get_data)
    
    PARAMS:
        path, string:
            The full path of the batch including the batchname component
            
    RETURNS:
        info, DataFrame:
            The dataframe used by all the rest of the things.
    
    """
    # Prep the feedback
    with status_text:
        clear_output()
        print("parsing info")

    # Get all the filenames under that batchname
    params_filenames = []
    data_filenames = [] 
    for filename in glob.glob(f"{path}_*.csv"):
        if filename[-10:-4] == "params":
            params_filenames.append(filename)
        elif filename[-8:-4] == "data":
            data_filenames.append(filename)
            
    # Are there even any filenames? Let's not try to parse something that doesn't exist.
    if len(data_filenames) == 0:
        raise FileNotFoundError("Batch not found.")

    # Get the amount to increment the progress bar after each file
    increment = 1/len(data_filenames) * 100

    # Sort the filenames lists so they'll definitely sync
    params_filenames.sort()
    data_filenames.sort()

    info_list = []
    for p_file, d_file in zip(params_filenames,data_filenames):

        # First step is getting the general path for this run!
        run_path = d_file[:-9]

        # First get the stats we want from the datafile
        data = pd.read_csv(d_file, engine='python')
        # Get the steady-states of the chiralities
        steadies = get_chirality_ratio(data,epsilon=0.05)
        stats = {"MaxLen":max_len(data),
                 "LongestChain":longest_chain(data),
                 "SteadyLL":steadies[0][0],
                 "SteadyRR":steadies[1][0],
                 "SteadyLR":steadies[2][0]}

        # Then get the parameters
        params = open(p_file,'r')
        reader = csv.reader(params)
        param_dict = {rows[0]:rows[1] for rows in reader}
        params.close()

        # Merge the parameters and the stats into a single dictionary
        info_row = {"Path":run_path,**param_dict,**stats}

        # Store the row
        info_list.append(info_row)

        # Last step is incrementing the progress bar!
        progress_bar.value += increment

    # Turn this list into a DataFrame and return it
    return pd.DataFrame(info_list)


#################################################
# THE BOXPLOT CODE #

def boxplot():
    
    # Boxplot helpers

    def make_boxplot(b):
        """ Make the boxplot using the values selected in boxplot widgets """
        fig = px.box(info, 
                     x=boxplot_groups.value,
                     y=boxplot_measures.value,
                     color=boxplot_colors.value,
                     animation_frame=boxplot_animation.value,
                     hover_data=["Path"])
        fig.show()

    def clear(b):
        """Deletes the plots it's already made"""
        clear_output(wait=True)
        display(boxplot_container)
    
    
        
    
    # Now we're cooking with gas.

    # Widget making and handling
    # Dropdown menus
    boxplot_groups = ipywidgets.Dropdown(options=changers, description="group")
    boxplot_colors = ipywidgets.Dropdown(options=[None]+changers, description="color")
    boxplot_animation=ipywidgets.Dropdown(options=[None]+changers, description="slider axis")
    boxplot_measures = ipywidgets.Dropdown(options=measures, description="stat")
    
    
    
    
    # Buttons
    boxplot_button = ipywidgets.Button(description="plot")
    boxplot_clear_button = ipywidgets.Button(description="clear output")
    # Layout
    boxplot_selectors = ipywidgets.VBox([boxplot_groups,boxplot_colors,boxplot_animation,boxplot_measures])
    boxplot_container = ipywidgets.VBox([boxplot_selectors,ipywidgets.HBox([boxplot_button,boxplot_clear_button])])
    # Button behavior
    boxplot_button.on_click(make_boxplot)
    boxplot_clear_button.on_click(clear)
    # Show the widgets
    display(boxplot_container)
    
    
########################################################################
# THE SINGLE RUN CODE #

def single_run():
    
    # Single run helpers

    # Get just the filename part of the Paths series
    def filenames_from_paths(df):
        trim = lambda x : x[len(wdir.value)+len(folder.value)+2:]
        return list(map(trim,list(df["Path"])))

    # Get the filenames of the runs that match the selected var values
    def get_values(b):
        value = lambda x : x.value
        selected_values = list(map(value, var_widgets))
        df = info
        for n in range(len(vars)):
            df = df[df[vars[n]]==selected_values[n]]
        file_selector.options = filenames_from_paths(df)

    # Show the plots of the the selected run
    def plot(b):
        clear_output(wait=True)
        display(selector_box)
        plots(file_selector.value)

    # Main

    # Here's a list of the filenames you can pick.
    filenames = filenames_from_paths(info)

    # Widgets
    button = ipywidgets.Button(description="Update file list")
    out = ipywidgets.Output()
    file_selector = ipywidgets.Select(options=filenames,description="Select file:")
    plot_button = ipywidgets.Button(description="Plot this run")

    var_vals = []
    var_widgets = []

    # Now we need the values that each of the vars takes
    for var in changers:
        # Get the unique values that are taken by var
        uniques = info[var].unique()
        uniques.sort()
        var_vals.append(uniques)

    # Make a slider widget for each variable
    for n in range(len(changers)):
        var_widgets.append(ipywidgets.SelectionSlider(options=var_vals[n],description=changers[n]))

    # What the buttons do
    button.on_click(get_values)
    plot_button.on_click(plot)

    # Widget layouts
    selector_box = ipywidgets.VBox([ipywidgets.HBox([ipywidgets.VBox(var_widgets),file_selector]),
                                  ipywidgets.HBox([button,plot_button])])
    # Show the widgets
    display(selector_box)


##################################
# MAIN #
########
  


    
# Widget instantiation:

# Standard import widgets   

# Get working directory
wdir = ipywidgets.Text(description="working directory:",value=wd)
# Get folder name
folder = ipywidgets.Text(description="subfolder:",placeholder="enter subfolder name")
# Get batch name
batch = ipywidgets.Text(description="batch:",placeholder="enter batch name")
# A box to hold the text entry widgets
loc_box = ipywidgets.VBox([wdir,folder,batch])

# Checkbox for if we wanna force a reparse
reparse = ipywidgets.Checkbox(description="Force reparse",value=False)

# Button for running the parsing stuff
parse_button = ipywidgets.Button(description="Get data")
parse_button.on_click(get_data)

# Text and int bar widgets for the progress bar, bundled in a box
status_text = ipywidgets.Output()
progress_bar = ipywidgets.FloatProgress(min=0, max=100) 
progress = ipywidgets.VBox([status_text,progress_bar])

# A place to print the detected params
param_print_box = ipywidgets.Output()

# Bundle the import widgets in a box
import_interact_box = ipywidgets.VBox([loc_box,reparse,parse_button,progress])
import_box = ipywidgets.HBox([import_interact_box,param_print_box])


# Widgets for showing output

# Boxplot output
boxplot_box = ipywidgets.Output()
with boxplot_box:
    print("Please import a batch.")
# Single run output
singlerun_box = ipywidgets.Output()
with singlerun_box:
    print("Please import a batch.")

# Bundle those in tabs
tabs = ipywidgets.Tab(children=[boxplot_box,singlerun_box])
tabs.set_title(0,"Boxplot")
tabs.set_title(1,"Single run plot")

# And here are actual actions.

display(import_box)
display(tabs)





        


HBox(children=(VBox(children=(VBox(children=(Text(value='C:/Users/', description='working directory:'), Text(v…

Tab(children=(Output(), Output()), _titles={'0': 'Boxplot', '1': 'Single run plot'})