<a href="https://colab.research.google.com/github/annikaaross/Homochirality-project/blob/master/History_reader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title **Mount drive**
from google.colab import drive
drive.mount('/content/drive',force_remount=True)
wdir = '/content/drive/Shared drives/Homochirality'

Mounted at /content/drive


In [2]:
# @title **Imports**
import numpy as np
import copy
from google.colab import widgets, output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import time
import datetime
import uuid
from IPython.display import display, Markdown, clear_output
import ipywidgets
import glob
import csv


parameters = ["BASE_BOND_BREAK_PROBABILITY","HOMOCHIRAL_BREAK_FACTOR","HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR","LENGTH_FACTOR","N","LAMBDA","HOMOCHIRAL_BREAK_FACTOR_LEFT","HOMOCHIRAL_BREAK_FACTOR_RIGHT","HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR_LEFT","HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR_RIGHT","POOF_CHANCE","BOND_PROB","POOL_SIZE","iterations","METHOD","POISSON_FACTOR","FUSION"]
statistics = ["MaxLen"]

In [3]:
# Option 3: Pick a single plot to compare over runs

In [12]:
#@title **Helper functions**

# A function to work out which column names are what 
def id_headers(info):
  # Make a list to store the various categories
  indep_vars = [] # parameters that change
  constants = [] # parameters that don't change
  measures = [] # data calculated in parsing
  # Iterate through all the columns in info
  for col in info:
    # And get the ones that are parameters (are all upper-case)
    if col.isupper():
      # Find how many different values that parameter takes over the course of the run
      n=len(info[col].unique())
      # If it's more than one
      if n > 1:
        # Then the parameter varies and we want to store it in indep_vars.
        indep_vars.append(col)
      else: 
        # The parameter doesn't very and we want it in constants.
        constants.append(col)
    else:
      # It's not a parameter
      # Check if it's a "bookkeeping" column
      if (col != "Path") and (col != "Unnamed: 0"):
        # It's a calculated data point and we store it in measures
        measures.append(col)
  # Now we return the lists we've made.
  return indep_vars, constants, measures

def plots(runname):
  # Get the run you want

  d_file = f"{wdir}/{folder.value}/{runname}_data.csv"
  p_file = f"{wdir}/{folder.value}/{runname}_params.csv"

  # First get the stats we want from the datafile
  data = pd.read_csv(d_file, engine='python')

  # Then get the parameters
  param_file = open(p_file,'r')
  reader = csv.reader(param_file)
  params = {rows[0]:rows[1] for rows in reader}
  param_file.close()

  # The plots

  stats = data

  iterations = data["Iteration"].max()


  plots = ["Signed EE of Polymers by Iteration","Proportion of Bond Types",
           "Homochirality vs Length",
           "Homochirality vs Age",
           "Homochirality vs Length through Time","Length histogram over time",
           "Length by homochirality through age","Age by homochirality through length"]
  tb = widgets.TabBar(plots)

  with tb.output_to("Signed EE of Polymers by Iteration", select=True):
    tb.clear_tab()
    df = stats
    counts = df.groupby("Iteration")['Signed ee'].value_counts().reset_index(name='count')
    fig = px.scatter(counts, x="Iteration", y="Signed ee", size='count')
    fig.show()


  with tb.output_to("Proportion of Bond Types", select=False):
    tb.clear_tab()

    df = stats
    lefts = df.groupby("Iteration")["#LeftHomochiral"].sum().rename("LL")
    rights = df.groupby("Iteration")["#RightHomochiral"].sum().rename("RR")
    total = (df.groupby("Iteration")['Length'].sum() - df.groupby("Iteration")['Length'].count()).rename("Total")
    bondcounts = pd.DataFrame([lefts, rights, total]).transpose()
    bondcounts["LR"] = bondcounts["Total"] - bondcounts["RR"] - bondcounts["LL"]
    bondcounts = bondcounts.apply(lambda x : x / bondcounts["Total"])
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=bondcounts["LL"],
                        mode='lines',
                        name='left homochiral'))
    fig.add_trace(go.Scatter(y=bondcounts["RR"],
                        mode='lines',
                        name='right homochiral'))
    fig.add_trace(go.Scatter(y=bondcounts["LR"],
                        mode='lines', name='heterochiral'))
    fig.update_layout(title='Proportion of LL, RR, and LR bonds by iteration',
                    xaxis_title='Iteration',
                    yaxis_title='Proportion')
    fig.update_yaxes(range=[0, 1])

    fig.show()

  with tb.output_to("Homochirality vs Length through Time", select=False):
    tb.clear_tab()
    df = stats
    maxlen = max(df["Length"])

    fig = px.scatter(df, x="Length", y="%Homochirality",animation_frame="Iteration",
                    range_x=[-1,maxlen+1],range_y=[-0.01,1.2])
    fig.show()

  with tb.output_to("Homochirality vs Length", select=False):
    tb.clear_tab()
    df = stats
    df = df.groupby("Length")['%Homochirality'].value_counts().reset_index(name='count')
    fig = px.scatter(df, x="Length",y="%Homochirality",size='count')
    fig.update_traces(marker=dict(line=dict(color='DarkSlateGrey')),
                    selector=dict(mode='markers'))
    fig.show()

  with tb.output_to("Homochirality vs Age", select=False):
    tb.clear_tab()
    df = stats
    df = df.groupby("Age")['%Homochirality'].value_counts().reset_index(name='count')
    fig = px.scatter(df, x="Age",y="%Homochirality",size='count')
    fig.update_traces(marker=dict(line=dict(color='DarkSlateGrey')),
                    selector=dict(mode='markers'))
    # This gets the mode of col b for each value of col a
    modes = df.groupby("Age")["%Homochirality"].agg(lambda x:x.value_counts().index[0])
    fig.add_trace(go.Scatter(y=modes, mode="markers"))
    fig.show()

  with tb.output_to("Length by homochirality through age", select = False):
    tb.clear_tab()
    df = stats
    fig = px.scatter(df, x="Length", y="%Homochirality", animation_frame="Age", hover_name="%Homochirality", range_x=[1,maxlen+1])
    fig.show()

  with tb.output_to("Age by homochirality through length", select = False):
    tb.clear_tab()
    df = stats
    fig = px.scatter(df, x="Age", y="%Homochirality", animation_frame="Length", hover_name="%Homochirality", range_y=(0,1), range_x=[-1,iterations+1])
    fig.show()

  with tb.output_to("Length histogram over time", select=False):
    tb.clear_tab()
    df = stats

    # Just the polymers please
    df = df[df["Length"]>1]

    # Find the largest count overall
    count_iteration = df.groupby("Iteration")["Length"].value_counts()
    biggest = max(count_iteration)

    fig = px.histogram(df, x="Length", animation_frame="Iteration",range_x=(0,max(df["Length"])),
                        nbins=max(df["Length"]))
    fig.show()



###############

# Functions to calculate stats in data parsing

def max_len(df):
  return max(df["Length"])

def longest_chain(df):
  return df["Longest chain length"].dropna().max()



In [5]:
# @title **Import data**

# Get folder name and batch name
folder = ipywidgets.Text(description="subfolder:",placeholder="enter subfolder name",value="small_tests")
batch = ipywidgets.Text(description="batch:",placeholder="enter batch name",value="test")
# Checkbox for if we wanna force a reparse
reparse = ipywidgets.Checkbox(description="Force reparse",value=False)
# Text and int bar widgets for the progress bar
status_text = ipywidgets.Output()
progress_bar = ipywidgets.FloatProgress(min=0, max=100) 
# Make a name for the data to go into
info = None
# Make a spot for storing out the variables that change
changers = None

def get_data(b):
  progress_bar.value = 0
  global info
  batch_path = f"{wdir}/{folder.value}/{batch.value}"
  # Do we want to force a re-parse?
  if not reparse.value:
    try:
      with status_text:
        clear_output()
        print("loading info")
      info = pd.read_csv(f"{batch_path}_info.csv")
    except:
      # parse
      info = parse(batch_path)
      # export
      info.to_csv(f"{batch_path}_info.csv")
  else:
    # parse
    info = parse(batch_path)
    # export
    info.to_csv(f"{batch_path}_info.csv")
  # Say "I'm done!"
  with status_text:
    clear_output()
    print("ok done")
  progress_bar.value = 100
  for col in info:
    if col.isupper():
      print(f"{col}: {info[col].unique()}")
  



  

def parse(path):
  # Prep the feedback
  with status_text:
    clear_output()
    print("parsing info")

  # Get all the filenames under that batchname
  params_filenames = []
  data_filenames = [] 
  for filename in glob.glob(f"{path}_*.csv"):
    if filename[-10:-4] == "params":
      params_filenames.append(filename)
    elif filename[-8:-4] == "data":
      data_filenames.append(filename)

  # Get the amount to increment the progress bar after each file
  increment = 1/len(data_filenames) * 100

  # Sort the filenames lists so they'll definitely sync
  params_filenames.sort()
  data_filenames.sort()

  info_list = []
  for p_file, d_file in zip(params_filenames,data_filenames):

    # First step is getting the general path for this run!
    run_path = d_file[:-9]

    # First get the stats we want from the datafile
    data = pd.read_csv(d_file, engine='python')
    stats = {"MaxLen":max_len(data),
             "LongestChain":longest_chain(data)}

    # Then get the parameters
    params = open(p_file,'r')
    reader = csv.reader(params)
    param_dict = {rows[0]:rows[1] for rows in reader}
    params.close()

    # Merge the parameters and the stats into a single dictionary
    info_row = {"Path":run_path,**param_dict,**stats}
    
    # Store the row
    info_list.append(info_row)

    # Last step is incrementing the progress bar!
    progress_bar.value += increment

  # Turn this list into a DataFrame and return it
  return pd.DataFrame(info_list)


with status_text:
  clear_output()
progress_bar.value = 0
loc_box = ipywidgets.VBox([folder,batch])
display(loc_box)
# Button for running the parsing stuff
parse_button = ipywidgets.Button(description="Get data")
parse_button.on_click(get_data)
display(reparse)
display(parse_button)
# Box for progress bars!
progress = ipywidgets.VBox([status_text,progress_bar])
display(progress)



 



VBox(children=(Text(value='small_tests', description='subfolder:', placeholder='enter subfolder name'), Text(v…

Checkbox(value=False, description='Force reparse')

Button(description='Get data', style=ButtonStyle())

VBox(children=(Output(), FloatProgress(value=0.0)))

BASE_BOND_BREAK_PROBABILITY: ['0.5']
HOMOCHIRAL_BREAK_FACTOR: ['0.5']
HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR: ['0.5']
LENGTH_FACTOR: ['0.6']
N: ['40']
LAMBDA: ['6']
HOMOCHIRAL_BREAK_FACTOR_LEFT: ['0.5']
HOMOCHIRAL_BREAK_FACTOR_RIGHT: ['0.5']
HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR_LEFT: ['0.5']
HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR_RIGHT: ['0.5']
POOF_CHANCE: ['0.33']
BOND_PROB: ['0.33']
POOL_SIZE: ['10' '85' '160' '235' '310' '385' '460' '535' '610' '685' '760' '835'
 '910' '985']
ITERATIONS: ['100']
METHOD: ['standard']
POISSON_FACTOR: ['1']
FUSION: ['True' 'False']
REFILLRANDOM: ['False']
REFILLPERCENT: ['[False']
REFILLNUMBER: ['[False']
REFILLNUMBERDECREASE: ['[False']
REFILLNORMAL: ['False']
REFILLPERCENTDECREASE: ['[False']
BASE_BOND_BREAK_PROBABILITY: ['0.5']
HOMOCHIRAL_BREAK_FACTOR: ['0.5']
HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR: ['0.5']
LENGTH_FACTOR: ['0.6']
N: ['40']
LAMBDA: ['6']
HOMOCHIRAL_BREAK_FACTOR_LEFT: ['0.5']
HOMOCHIRAL_BREAK_FACTOR_RIGHT: ['0.5']
HOMOCHIRAL_NEIGHBOR_IMPROV_FACTOR_LEF

In [9]:
#@title **Boxplot**
# First check if info has been imported
if not isinstance(info,pd.DataFrame):
  print("Please import your batch, then re-run this cell.")

else:
  # An import to allow clearing the output down the line
  import IPython.display


  def make_boxplot(b):
    # Make the boxplot using the values selected in widgets
    fig = px.box(info, 
                 x=boxplot_groups.value,
                 y=boxplot_measures.value,
                 color=boxplot_colors.value,
                 hover_data=["Path"])
    fig.show()


  def clear(b):
    # Deletes the plots it's already made
    IPython.display.clear_output(wait=True)
    display(boxplot_container)


  # Now we're cooking with gas.
  # Run id_headers to get the varying and static parameters
  # and the measured stats
  vars, constants, measures = id_headers(info)

  # Widget making and handling
  # Dropdown menus
  boxplot_groups = ipywidgets.Dropdown(options=vars, description="group")
  boxplot_colors = ipywidgets.Dropdown(options=[None]+vars, description="color")
  boxplot_measures = ipywidgets.Dropdown(options=measures, description="stat")
  # Buttons
  boxplot_button = ipywidgets.Button(description="plot")
  boxplot_clear_button = ipywidgets.Button(description="clear output")
  # Layout
  boxplot_selectors = ipywidgets.VBox([boxplot_groups,boxplot_colors,boxplot_measures])
  boxplot_container = ipywidgets.VBox([boxplot_selectors,ipywidgets.HBox([boxplot_button,boxplot_clear_button])])
  # Button behavior
  boxplot_button.on_click(make_boxplot)
  boxplot_clear_button.on_click(clear)
  # Show the widgets
  display(boxplot_container)
  

VBox(children=(VBox(children=(Dropdown(description='group', options=('POOL_SIZE', 'FUSION'), value='POOL_SIZE'…

In [11]:
#@title **Plot an individual run**

# First check if info has been imported
if not isinstance(info,pd.DataFrame):
  print("Please import your batch, then re-run this cell.")

else:

  # Definitions

  # Get just the filename part of the Paths series
  def filenames_from_paths(df):
    trim = lambda x : x[len(wdir)+len(folder.value)+2:]
    return list(map(trim,list(df["Path"])))

  # Get the filenames of the runs that match the selected var values
  def get_values(b):
    value = lambda x : x.value
    selected_values = list(map(value, var_widgets))
    df = info
    for n in range(len(vars)):
      df = df[df[vars[n]]==selected_values[n]]
    file_selector.options = filenames_from_paths(df)


  def clear(b):
      # Deletes the plots it's already made
      clear_output(wait=True)
      display(boxplot_container)

  # Show the plots of the the selected run
  def plot(b):
    clear_output(wait=True)
    display(selector_box)
    plots(file_selector.value)

  # Main

  # Here's a list of the filenames you can pick.
  filenames = filenames_from_paths(info)

  # Widgets
  button = ipywidgets.Button(description="Update file list")
  out = ipywidgets.Output()
  file_selector = ipywidgets.Select(options=filenames,description="Select file:")
  plot_button = ipywidgets.Button(description="Plot this run")

  # Run id_headers to get the varying and static parameters
  # and the measured stats
  vars, constants, measures = id_headers(info)
  var_vals = []
  var_widgets = []

  # Now we need the values that each of the vars takes
  for var in vars:
    # Get the unique values that are taken by var
    uniques = info[var].unique()
    uniques.sort()
    var_vals.append(uniques)

  # Make a slider widget for each variable
  for n in range(len(vars)):
    var_widgets.append(ipywidgets.SelectionSlider(options=var_vals[n],description=vars[n]))
      
  # What the buttons do
  button.on_click(get_values)
  plot_button.on_click(plot)

  # Widget layouts
  selector_box = ipywidgets.VBox([ipywidgets.HBox([ipywidgets.VBox(var_widgets),file_selector]),
                                  ipywidgets.HBox([button,plot_button])])
  # Show the widgets
  display(selector_box)




VBox(children=(HBox(children=(VBox(children=(SelectionSlider(description='POOL_SIZE', index=9, options=('10', …

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>