# Plotting NanoDSF Data

This script extracts NanoDSF data exported into an excel spreadsheet or CSV file. The 350/330 nm ratio and derviative of this ratio are plotted as a function of temperature. Peaks are automatically identified and Tm assigned based on the position of the largest peak. The data for each experiment are plotted individually in a large grid. In addition, samples can be overlayed in a single plot for direct comparison.

## Input files

**Required:** Excel (.xlsx) or csv (.csv) file exported from Prometheus NanoDSF instrument.<br>

**Optional:** Text file with a list of sample names, one per line. *The number of names should match the number of samples in the data.*

### Initializing the script

Run the following cell to initialize the script.

In [1]:
# Import packages and define functions
import ipywidgets as widgets
import glob, time
import numpy as np
from sys import path, exit
import matplotlib.pyplot as plt
from jupyter_ui_poll import ui_events
import pandas as pd


def read_data(datafile_name):
    import pandas as pd
    if datafile_name[-3:] == 'csv':
        data_list = pd.read_csv(datafile_name)
        data = np.array(data_list[1:]).astype(float)
    elif datafile_name[-4:] == 'xlsx':
        data_list = pd.read_excel(datafile_name, sheet_name='Ratio')
        data = np.array(data_list[1:]).astype(float)
    else:
        exit("Data file needs to be either 'csv' or 'xlsx'")
    num_samples = len(data_list.iloc[0].values.flatten().tolist())/3
    return data, num_samples

def get_names(exp_names_list):
    names = exp_names_list.split("\n")
    return names

def make_data_df(names, data,
                rl_win = 3,
                ord = 2):
    from scipy.signal import find_peaks
    import derivative
    sg = derivative.SavitzkyGolay(left=rl_win, right=rl_win, order=ord, periodic=False)
    data_df = pd.DataFrame()
    Tm_dict = {}

    for i in range(len(names)):
        x = data[:,(i*3) + 1]
        y = data[:,(i*3) + 2]
        dy = sg.d(y,x)
        if (-1.0*dy).max() > dy.max():
            dy = -1.0*dy
        
        peaks, _ = find_peaks(dy, height = dy.max()*0.5, distance = len(x)*0.05)
        if peaks.size != 0:
            Tm = x[peaks[dy[peaks].argmax()]]
        Tm_dict[names[i] + '_Tm'] = Tm
        Tm_dict[names[i] + '_peaks'] = peaks
        data_df[names[i],'x'] = x
        data_df[names[i],'y'] = y
        data_df[names[i],'dy'] = dy
    return data_df, Tm_dict

def make_blank_overlay(overlay_list,
                       bgcol = 'white'):
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots

    fig = make_subplots(rows=2, cols=1,
                        shared_xaxes=True,
                        vertical_spacing=0.)

    for overlay_name in overlay_list:
        fig.add_trace(go.Scatter(x = [],y=[], showlegend = False,
                                    mode = "lines",
                                    line=dict(color='black')), row =1, col = 1)

        fig.add_trace(go.Scatter(x = [],y = [], showlegend = True,
                                    name = '',
                                    mode = "lines",
                                    line=dict(color='black')), row = 2, col = 1)

        fig.add_trace(go.Scatter(x = [], y = [], showlegend = False, mode = 'markers', 
                                 marker = dict(color ='red', size = 6)), row = 2, col = 1)  

    fig.update_xaxes(gridcolor='light gray',gridwidth = 0.2,showgrid = True,title_font_size = 16, mirror = True)
    fig.update_yaxes(gridcolor='light gray',gridwidth = 0.2,showgrid = True,title_font_size = 16, mirror = True)
    fig.update_layout(
                        legend = dict(title_text = "Samples",
                                    orientation = 'v',
                                    yanchor="bottom",
                                    y = 1.01,
                                    xanchor = "center",
                                    x = 0.5),
                        template = 'simple_white',
                        height = 500,
                        width = 700,
                        xaxis2 = dict(title_text = "Temperature (℃)"),
                        yaxis = dict(title_text = "350nm/330nm", side = "right"),
                        yaxis2 = dict(title_text = "d/dT(350nm/330nm)"),
                        paper_bgcolor=bgcol
                    )
    return fig


def response(change):
    import plotly.express as px
    global old_list, new_list, overlay_names_list, g,\
          Tm_dict, data_df, color_palette
    colorscl = color_palette.value
    old_list=new_list
    new_list = []

    for new_name in overlay_names_list.value:
        new_list.append(new_name)

    with g.batch_update():
        for i in range(0,3*len(old_list),3):
            g.data[i]['x'] = []
            g.data[i]['y'] = []
            g.data[i+1]['x'] = []
            g.data[i+1]['y'] = []
            g.data[i+2]['x'] = []
            g.data[i+2]['y'] = []
            g.data[i]['line']=dict(color='black')
            g.data[i+1]['line']=dict(color='black')
            g.data[i+1]['name']=''

    n_colors = len(new_list)
    if len(new_list) > 1:
        plot_colors = px.colors.sample_colorscale(colorscl, [n/(n_colors -1) for n in range(n_colors)])
    elif len(new_list) == 1:
        plot_colors = px.colors.sample_colorscale(colorscl, [n/(n_colors) for n in range(n_colors)])
    with g.batch_update():
        col=0
        i=0
        for overlay_name in new_list:
            Tm = Tm_dict[overlay_name + '_Tm']
            peaks = Tm_dict[overlay_name + '_peaks']
            line_color=plot_colors[col]
            g.data[i]['x'] = data_df[overlay_name,'x']
            g.data[i]['y'] = data_df[overlay_name,'y']
            g.data[i+1]['x'] = data_df[overlay_name,'x']
            g.data[i+1]['y'] = data_df[overlay_name,'dy']
            g.data[i+2]['x'] = data_df[overlay_name,'x'][peaks]
            g.data[i+2]['y'] = data_df[overlay_name,'dy'][peaks]*1.1
            g.data[i]['line']=dict(color=line_color)
            g.data[i+1]['line']=dict(color=line_color)
            g.data[i+1]['name']='{0}, Tm = {1:4.1f}'.format(overlay_name.rstrip(' ').lstrip(' '),Tm)
            i+=3
            col+=1


def update_background(change):
    global g, background_color
    g.layout['paper_bgcolor']= background_color.value


def plot_all(names, data_df, Tm_dict,
             rl_win = 3,
             ord = 2):
    
    # Create subplots based on number of plotted experiments (names - skipped)
    
    names_lower = [n.lower() for n in names]
    num_plotted = len(names) - names_lower.count('skip')
    
    if num_plotted > 3:
        if num_plotted % 3 == 0:
            tot_rows = num_plotted//3
        else:
            tot_rows = num_plotted//3 + 1
        fig,ax = plt.subplots(nrows = tot_rows, 
                              ncols = 3, 
                              figsize = (20,5*tot_rows), 
                              dpi=300, 
                              sharey=True, 
                              sharex=True,
                              squeeze=False)
    else:
        tot_rows = 1
        fig,ax = plt.subplots(nrows = tot_rows, 
                              ncols = num_plotted,
                              figsize = (20*(num_plotted/3),5*tot_rows), 
                              dpi=300, 
                              sharey=True, 
                              sharex=True,
                              squeeze=False)
    plt.subplots_adjust(wspace=0.3, hspace=0.3)

    # Delete axes for each subplot so only the top and bottom inset axes are shown
    if tot_rows > 1:
        for row in range(0,tot_rows):
            for col in range(0,3):
                ax[row,col].axis('off')
    
    if tot_rows == 1:
        for col in range(0,num_plotted):
            ax[0,col].axis('off')
    
    count = 0
    for name in names:
        if name.casefold() != 'skip'.casefold():
            # Identify row and column for each plot and extract the appropriate data
            row = (count // 3)
            col = count % 3
            x = data_df[name,'x']
            y = data_df[name,'y']
            dy = data_df[name,'dy']
            peaks = Tm_dict[name + '_peaks']
            Tm = Tm_dict[name + '_Tm']

            # Create top and bottom inset axes for plotting 
            # 350/330 ratio in the top half and 
            # d/dT(350/330 ratio) in the bottom half
            axtop = ax[row,col].inset_axes([0, 0.5, 1.0, 0.5])
            axbot = ax[row,col].inset_axes([0, 0, 1.0, 0.5])


            # Use a dashed line to separate top and bottom plots
            axtop.spines['bottom'].set_visible(False)
            axbot.spines['top'].set_linestyle((10, (1, 5)))
            
            # Remove axis labels and use the experiment name as the title
            axbot.set_ylabel('')
            axtop.set_ylabel('')
            axtop.set_title(name, fontsize=16)
            
            # Show the top panel y-axis labels on the right
            axtop.tick_params(bottom=False,labelbottom=False,right=True,labelright=True,left=False,labelleft=False, labelsize=14)
            axbot.tick_params(bottom=True,labelbottom=True,right=False,labelright=False,left=True,labelleft=True, labelsize=14)
            # Plot the top panel data
            axtop.plot(x,y) 

            
            # Find peaks and plot an asterisk to identify each
            axbot.plot(x,dy)
            axbot.plot(x[peaks],(1.1*dy[peaks]),"*")

            # Find Tm for largest peak and annotate the graph with it
            if peaks.size != 0:
                Tm_y = dy[peaks].max()
                axbot.annotate('Tm = {:3.2f}'.format(Tm), xy=(Tm,Tm_y), xytext=(Tm*1.15, Tm_y * 0.85), fontsize=14)

            # Create y- and x-axes labels for the entire figure using dummy axes 
            # Note that this convoluted method seems necessary to create right and left y-axis labels
            
            # dummy axes 1 for left ylabel
            ax1 = fig.add_subplot(1, 1, 1)
            ax1.set_xticks([])
            ax1.set_yticks([])
            [ax1.spines[side].set_visible(False) for side in ('left', 'top', 'right', 'bottom')]
            ax1.patch.set_visible(False)
            ax1.set_xlabel('Temperature (℃)', labelpad=40, fontsize = 20)
            ax1.set_ylabel('d/dT(350nm/330nm)', labelpad=60, fontsize = 20, rotation = 90)

            # dummy axes 2 for right ylabel
            ax2 = fig.add_subplot(1, 1, 1)
            ax2.set_xticks([])
            ax2.set_yticks([])
            [ax2.spines[side].set_visible(False) for side in ('left', 'top', 'right', 'bottom')]
            ax2.patch.set_visible(False)
            ax2.yaxis.set_label_position('right')
            ax2.set_ylabel('350nm/330nm', labelpad=60, fontsize = 20,fontweight = 'normal', rotation = 270)
            count+=1
    return plt, fig



# Define widget functions
# Buttons
select_clicked = False

continue_clicked = False

def on_select_clicked(b):
    global select_clicked
    select_clicked = True

def on_continue_clicked(b):
    global continue_clicked
    continue_clicked = True

select_button = widgets.Button(description="Select")
select_button.on_click(on_select_clicked)

continue_button = widgets.Button(description="Continue")
continue_button.on_click(on_continue_clicked)

# File uploading

file_uploader = widgets.FileUpload(multiple = True)

def on_file_upload(change):
    global select_file, current_file, current_list, select_list, output
    
    for details_dict in change['new']:
        if details_dict['name'][-4:] == 'xlsx'or details_dict['name'][-3:] == 'csv':
            files.append(details_dict['name'])
        elif details_dict['name'][-3:] == 'txt'or details_dict['name'][-3:] == 'asc':
            lists.append(details_dict['name'])
        with open(details_dict['name'], "wb") as f:
            f.write(details_dict['content'])

    current_file = select_file.value
    current_list = select_list.value
    select_file.options=tuple(files)
    select_list.options=tuple(lists)

def on_select_file(change):
    global current_file
    current_file = change['new']

def on_select_list(change):
    global current_list
    current_list = change['new']


### Importing data

Run the following cell to select the data and sample names files. <br><br>Choose *Default* in the sample lists and "Sample #xx" will be assigned to each.

In [2]:
# Selecting files

files = glob.glob("*.xlsx") + glob.glob("*.csv")
lists = ["Default"] + (glob.glob("*.asc") + glob.glob("*.txt"))

current_file = None
current_list = None

select_markdown = widgets.HTML(value="Use the <b>Upload</b> button to "
                               "upload your files if they are not among the choices you see "
                               "listed below.<br/>Click on the file names among the lists below "
                               "to select the <b>Data</b> to plot and list "
                               " of <b>Samples</b>. <br>Click the "
                               " <b>Continue</b> button to proceed.")


select_file = widgets.Select(options = files, description = 'Data files:')
select_list = widgets.Select(options = lists, description = 'Sample lists:')

vbox = widgets.VBox([select_markdown])
hbox = widgets.HBox([file_uploader, select_file, select_list])

display(vbox)
display(hbox)
display(continue_button)

file_uploader.observe(on_file_upload, 'value')
select_file.observe(on_select_file, 'value')
select_list.observe(on_select_file, 'value')

with ui_events() as poll:
    while continue_clicked is False:
        poll(10)          # React to UI events (upto 10 at a time)
        time.sleep(0.1)

continue_clicked = False
# Importing data

current_file = select_file.value
current_list = select_list.value

print("Reading data from ", current_file, current_list)

data, num_samples = read_data(current_file)

# Listing sample names for editing
if continue_clicked == False:
    if current_list != "Default":
        names_text = ''
        with open(current_list) as file:
                for line in file:
                    names_text = names_text + line
    else:
        names_text = ''
        for i in range(int(num_samples - 1)):
            names_text = names_text + "Sample #" + str(i + 1) + "\n"
        names_text = names_text + "Sample #" + str(int(num_samples))

textbox = widgets.Textarea(
    value=names_text,
    placeholder='Type something',
    description='Samples:',
    disabled=False,
    rows = names_text.count('\n') + 1,
    layout=widgets.Layout(height="auto", width="40%")
)

samples_markdown = widgets.HTML(value="The current sample names are listed to the right.<br>"
                                      "You can edit them individually. If you need to <br>"
                                      "skip a particular plot, then replace the name with <br>"
                                      "<b>'skip'</b> (case insensitive). <br><br>"
                                    #   "Click 'Continue' to accept and read the names.<br>"
                                      "You can edit the names and re-plot "
                                      "without <br>running this cell again.")


hbox2 = widgets.HBox([samples_markdown, textbox])
display(hbox2)


VBox(children=(HTML(value='Use the <b>Upload</b> button to upload your files if they are not among the choices…

HBox(children=(FileUpload(value=(), description='Upload', multiple=True), Select(description='Data files:', op…

Button(description='Continue', style=ButtonStyle())

Reading data from  example_excel.xlsx samples.txt


HBox(children=(HTML(value="The current sample names are listed to the right.<br>You can edit them individually…

## Plotting all data

Run the following cell to plot all of the listed experiments.

In [12]:
# Plotting all samples
names = []
for line in textbox.value.splitlines():
    if line.rstrip().lstrip() != '':
        names.append(line.rstrip().lstrip())
names_text = textbox.value

names_lower = [n.lower() for n in names]
num_plotted = len(names) - names_lower.count('skip')
num_skip = names_lower.count('skip')

validate = False
if num_skip > 0:
    if (len(set(names)) - 1 == num_plotted) and (num_plotted + num_skip == len(data[0,:])/3):
        validate = True
    else:
        validate = False
elif num_skip == 0:
    if (len(set(names)) == num_plotted) and (num_plotted + num_skip == len(data[0,:])/3):
        validate = True
    else:
        validate = False

if validate:
    data_df, Tm_dict = make_data_df(names, data)

    plot_all_out = widgets.Output(layout={'border': '1px solid black','width': '50%'},clear_output=True,wait=True)

    @plot_all_out.capture()
    def plotting_all_out(names, data_df, Tm_dict):
        global plot_all_fig
        plt_all, plot_all_fig = plot_all(names, data_df, Tm_dict)
        plt_all.show()

    def save_plotall(b):
        global save_plotall_name, save_plotall_file_type, plot_all_fig
        
        from pathlib import Path
        Path('./Saves').mkdir(parents=True, exist_ok = True)
        
        file_name = './Saves/' + save_plotall_name.value.rstrip().lstrip()
        if (file_name[-4:] in ['.pdf','.png','.jpg']):
            file_name = file_name[:-4] + '.' + save_plotall_file_type.value
        else:
            file_name = file_name + '.' + save_plotall_file_type.value
        
        if (save_plotall_file_type.value == 'pdf') or (save_plotall_file_type.value == 'png') or (save_plotall_file_type.value == 'jpg'):
            plot_all_fig.savefig(file_name, dpi = 300)

        print("Saved to: ",file_name)

    def save_table(b):
        global save_table_name, save_table_file_type, Tm_dict
        from pathlib import Path
        Path('./Saves').mkdir(parents=True, exist_ok = True)
        
        file_name = './Saves/' + save_table_name.value.rstrip().lstrip() 
        if (file_name[-4:] in ['.csv']):
            file_name = file_name[:-4] + '.' + save_table_file_type.value
        elif (file_name[-5:] in ['.xlsx']):
            file_name = file_name[:-5] + '.' + save_table_file_type.value
        else:
            file_name = file_name + '.' + save_table_file_type.value
        
        Tm_df = pd.DataFrame()
        Tm_df['Samples'] = names
        Tm_values = []
        for name in names:
            Tm_values.append(Tm_dict[name + '_Tm'])
        Tm_df['Tm (℃)'] = Tm_values

        if (save_table_file_type.value == 'csv'):
            Tm_df.to_csv(file_name)
        if (save_table_file_type.value == 'xlsx'):
            Tm_df.to_excel(file_name)
        
        print("Saved to: ",file_name)  

    description_style = {'description_width': 'initial'}
    save_plotall_button = widgets.Button(description="Save")
    save_plotall_name = widgets.Text(
        value='plotall',
        placeholder='name = *',
        description='Name:',
        disabled=False,
        #style = description_style
        )

    save_plotall_file_type = widgets.RadioButtons(
        options=['pdf', 'png', 'jpg'],
        description='Image file type:',
        disabled=False
    )

    save_table_button = widgets.Button(description="Save Tm Table")
    save_table_name = widgets.Text(
        value='Tm_table',
        placeholder='name = *',
        description='Name:',
        disabled=False
        )
    save_table_file_type = widgets.RadioButtons(
        options=['csv', 'xlsx'],
        description='Table file type:',
        disabled=False,
        #style = description_style
    )
    _style = widgets.HTML(
        "<style>.widget-radio-box {flex-direction: row}.widget-radio-box"
        " label{margin:10px; width: 100px}</style>",
        layout=widgets.Layout(display="none")
    
    )

    plotall_hbox = widgets.HBox([save_plotall_file_type, _style])
    plotall_vbox = widgets.VBox([save_plotall_name, plotall_hbox, save_plotall_button])
                                 
    save_table_hbox = widgets.HBox([save_table_file_type, _style])
    save_table_vbox = widgets.VBox([save_table_name, save_table_hbox, save_table_button])

    save_table_plotall_hbox = widgets.HBox([plotall_vbox, save_table_vbox], 
                                           layout={'width': '50%'})

    final_hbox = widgets.HBox([plot_all_out, save_table_plotall_hbox])

    #display(plot_all_out)
    display(final_hbox)#, save_table_plotall_hbox)
    plotting_all_out(names, data_df, Tm_dict)

    save_plotall_button.on_click(save_plotall)
    save_table_button.on_click(save_table)

else:
    print('The number of unique names does not match the data set size.\n', 
          'Enter one unique name per line in the text box. Enter "skip"\n',
           'for any samples you do not want to plot. Then re-run \n',
           'this cell.')

HBox(children=(Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_rig…

## Choosing samples to overlay

Run the following cell to choose which samples to overlay in a single plot.

In [4]:
# Plotting selected samples on one graph
import plotly.graph_objects as go

def save_plotly(b):
    global save_plotly_name, save_plotly_file_type, g
    
    from pathlib import Path
    Path('./Saves').mkdir(parents=True, exist_ok = True)
    
    file_name = './Saves/' + save_plotly_name.value.rstrip().lstrip()
    if (file_name[-4:] in ['.pdf','.png']):
        file_name = file_name[:-4] + '.' + save_plotly_file_type.value
    elif (file_name[-5:] in ['.html']):
        file_name = file_name[:-5] + '.' + save_plotly_file_type.value
    else:
        file_name = file_name + '.' + save_plotly_file_type.value
    if (save_plotly_file_type.value == 'pdf') or (save_plotly_file_type.value == 'png'):
        g.write_image(file_name, scale = 4)
    elif save_plotly_file_type.value == 'html':
        g.write_html(file_name)
    print("Saved to: ",file_name)

overlay_markdown = widgets.HTML(value="Choose which samples to overlay in a "
                                      "single plot for direct comparison. Use the "
                                      "Shift/Command keys to select multiple files.<br>"
                                      "You can select different samples and it should automatically"
                                      "update without running this cell again.")

names_noskip = []
for n in names:
    if n.lower() != "skip":
        names_noskip.append(n) 

new_list=[]
overlay_names_list = widgets.SelectMultiple(options = names_noskip, 
                                            description = 'Overlay list:',
                                            rows = len(names_noskip),
                                            layout=widgets.Layout(height="auto", width="40%"))

color_palette = widgets.Dropdown(
    options=['jet', 'rainbow', 'viridis', 'fall', 'bluered', 'earth', 'ice'],
    value='jet',
    description='Line Colors:',
    disabled=False
)

background_color = widgets.Dropdown(
    options=['aliceblue', 'white', 'palegoldenrod', 'lightgrey', 'lightskyblue', 'lightgreen', 'honeydew'],
    value='white',
    description='Sheet Color:',
    disabled=False
)

save_plotly_button = widgets.Button(description="Save")
save_plotly_name = widgets.Text(
    value='overlay',
    placeholder='name = *.pdf, *.png, *.jpeg',
    description='Name of file:',
    disabled=False
    )

save_plotly_file_type = widgets.RadioButtons(
    options=['pdf', 'png', 'html'],
    description='Image file type:',
    disabled=False
)
_style = widgets.HTML(
    "<style>.widget-radio-box {flex-direction: row}.widget-radio-box"
    " label{margin:10px; width: 100px}</style>",
    layout=widgets.Layout(display="none"),
)

fig = make_blank_overlay(names)

g = go.FigureWidget(fig)

vbox4=widgets.VBox([color_palette, background_color, save_plotly_name, save_plotly_file_type, _style, save_plotly_button])
hbox3=widgets.HBox([overlay_names_list,vbox4])
vbox3=widgets.VBox([overlay_markdown, g, hbox3])
display(vbox3)

save_plotly_button.on_click(save_plotly)
overlay_names_list.observe(response, names="value")
color_palette.observe(response, names = "value")
background_color.observe(update_background, names = "value")


VBox(children=(HTML(value='Choose which samples to overlay in a single plot for direct comparison. Use the Shi…

Saved to:  ./Saves/overlay.pdf
Saved to:  ./Saves/overlay.png
