In [7]:
# Scientific Library
import numpy as np
import pandas as pd
from pathlib import Path
#import plotly as py
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from metadamage import utils

In [8]:
input_files = list(Path("").rglob("./data/fits/*.csv"))

In [9]:
dfs = []
for file in input_files:
    df = pd.read_csv(file)
    cols = list(df.columns)
    cols[0] = 'taxid'
    df.columns = cols
    name = utils.extract_name(file, max_length=20)
    df['name'] = name
    dfs.append(df)


In [10]:
df = pd.concat(dfs, axis=0, ignore_index=True)
df["N_alignments_log10"] = np.log10(df["N_alignments"])
df["N_alignments_sqrt"] = np.sqrt(df["N_alignments"])
df['N_alignments_str'] = df.apply(lambda row: utils.human_format(row['N_alignments']), axis = 1) 
df['N_sum_total_log10'] = np.log10(df["N_sum_total"])
df['N_sum_total_str'] = df.apply(lambda row: utils.human_format(row['N_sum_total']), axis = 1) 

df

Unnamed: 0,taxid,D_max,n_sigma,D_max_lower_hpdi,D_max_upper_hpdi,q_mean,concentration_mean,D_max_marginalized_mean,N_alignments,N_z1_forward,...,asymmetry,normalized_noise,normalized_noise_forward,normalized_noise_reverse,name,N_alignments_log10,N_alignments_sqrt,N_alignments_str,N_sum_total_log10,N_sum_total_str
0,1,0.448445,8.412160,0.435209,0.464514,0.625865,1559.919819,0.447680,60445656,13113435,...,4.819366,0.292090,0.293354,0.290583,KapK-198A-Ext-55-Lib...,7.781365,7774.680444,60.4M,8.532024,340M
1,131567,0.448161,8.422496,0.430949,0.460320,0.626324,1552.312281,0.447992,60445181,13113307,...,4.800994,0.292094,0.293356,0.290588,KapK-198A-Ext-55-Lib...,7.781362,7774.649896,60.4M,8.532019,340M
2,2759,0.451838,8.312691,0.435601,0.464251,0.626059,1527.901714,0.451684,60094660,13001213,...,4.934529,0.293807,0.295013,0.292372,KapK-198A-Ext-55-Lib...,7.778836,7752.074561,60.1M,8.528095,337M
3,33090,0.452352,8.499618,0.438455,0.467702,0.625721,1549.861381,0.451898,60008321,12978386,...,5.308374,0.293940,0.294898,0.292749,KapK-198A-Ext-55-Lib...,7.778211,7746.503792,60M,8.527348,337M
4,35493,0.452262,8.533230,0.440029,0.469333,0.625563,1550.216294,0.452137,60008225,12978366,...,4.739024,0.293940,0.294898,0.292750,KapK-198A-Ext-55-Lib...,7.778211,7746.497596,60M,8.527347,337M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,1396,0.000000,-2.556453,0.000000,0.000000,0.707621,957.671831,1.056107,12,1,...,-2.370465,3.741657,3.741657,3.741657,SJArg-1-Nit__number_...,1.079181,3.464102,12,1.778151,60
3996,2785011,,-0.477179,,,0.539371,970.336541,0.210705,11,0,...,1.979100,5.385165,6.182412,4.422166,SJArg-1-Nit__number_...,1.041393,3.316625,11,1.681241,48
3997,2633480,0.166667,0.431609,0.000000,0.333333,0.496597,963.755151,0.197924,11,6,...,-0.541363,5.147815,5.961543,3.840573,SJArg-1-Nit__number_...,1.041393,3.316625,11,2.071882,118
3998,2623841,0.000000,0.397445,0.000000,0.250000,0.525748,994.856521,0.149555,11,4,...,2.275519,4.580591,6.086050,2.712113,SJArg-1-Nit__number_...,1.041393,3.316625,11,2.075547,119


In [11]:
# https://plotly.com/python/discrete-color/#color-sequences-in-plotly-express
cmap = px.colors.qualitative.D3

d_cmap = {}
for i, (name, _) in enumerate(df.groupby("name", sort=False)):
    d_cmap[name] = cmap[i]

#cmap = px.colors.qualitative.Dark24


In [56]:
hovertemplate = ("<b>%{text}</b><br><br>" +
                 "taxid: %{customdata[0]}<br>" +
                 "<br>n sigma: %{x:5.2f}<br>" +
                 "D max:    %{y:.2f}<br>" +
                 "<br>N alignments: %{customdata[1]}<br>"
                 "N sum total:   %{customdata[2]}<br>"
                 "<extra></extra>")

d_marker = dict(opacity=0.2, line_width=0, sizemode='area', sizeref=10, sizemin=0)

fig = go.Figure()


hover_columns = ['taxid', 'N_alignments_str', 'N_sum_total_str']
customdata = df[hover_columns]

for name, group in df.groupby("name", sort=False):
    fig.add_trace(go.Scatter(
        x=group['n_sigma'],
        y=group['D_max'],
        name=name, # legend entry
        
        mode="markers",
        marker_size=group['N_alignments_sqrt'],
        marker_color=d_cmap[name],
        marker=d_marker,

        text=group['name'], # 'text' in hovertemplate
        customdata=customdata,
        hovertemplate=hovertemplate,
        ),
        )

fig.update_layout(
    title='Fit Results',
    xaxis_title=r"$\Large n_\sigma$", 
    yaxis_title=r"$\Large D_\mathrm{max}$", 
    font_size=16, 
    legend=dict(title="Files", title_font_size=20), 
    width=1400, 
    height=800,
    )

fig.update_yaxes(range=[0, 1])

fig.write_html('./figures/fig.html', include_mathjax='cdn', auto_open=True)

In [8]:
subtitles = [r'$\Large n_\sigma$',  r'$\Large D_\mathrm{max}$', r'$\Large N_{z=1}$', r'$\Large N_\mathrm{sum}$', r'$\Large \text{Noise (normalized)}$']


fig_forward_reverse = make_subplots(rows=3, cols=2, 
            subplot_titles=subtitles)

kwargs = {}
for i, (name, _) in enumerate(df.groupby("name", sort=False)):
    kwargs[name] = dict(name=name, mode="markers", legendgroup=name, marker=dict(color=cmap[i], opacity=0.2))


for i, (name, group) in enumerate(df.groupby("name", sort=False)):

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['n_sigma_forward'], y=group['n_sigma_reverse'], **kwargs[name]),
        row=1, col=1)

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['D_max_forward'], y=group['D_max_reverse'], showlegend=False, **kwargs[name]),
        row=1, col=2)

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['N_z1_forward'], y=group['N_z1_reverse'], showlegend=False, **kwargs[name]),
        row=2, col=1)

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['N_sum_forward'], y=group['N_sum_reverse'], showlegend=False, **kwargs[name]),
        row=2, col=2)


    fig_forward_reverse.add_trace(
        go.Scatter(x=group['normalized_noise_forward'], y=group['normalized_noise_reverse'], showlegend=False, **kwargs[name]),
        row=3, col=1)



# Update xaxis properties
fig_forward_reverse.update_xaxes(row=1, col=1, title_text=r"$\Large n_\sigma \text{ forward}$")
fig_forward_reverse.update_yaxes(row=1, col=1, title_text=r"$\Large n_\sigma \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=1, col=2, title_text=r"$\Large D_\mathrm{max} \text{ forward}$") # range=[10, 50], showgrid=False, type="log"
fig_forward_reverse.update_yaxes(row=1, col=2, title_text=r"$\Large D_\mathrm{max} \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=2, col=1, title_text=r"$\Large N_{z=1} \text{ forward}$")
fig_forward_reverse.update_yaxes(row=2, col=1, title_text=r"$\Large N_{z=1} \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=2, col=2, title_text=r"$\Large N_\mathrm{sum} \text{ forward}$")
fig_forward_reverse.update_yaxes(row=2, col=2, title_text=r"$\Large N_\mathrm{sum} \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=3, col=1, title_text=r'$\Large \text{Noise forward}$')
fig_forward_reverse.update_yaxes(row=3, col=1, title_text=r"$\Large \text{Noise reverse}$")

# Update title and width, height
fig_forward_reverse.update_layout(height=1100, width=1500, title=dict(text="Forward vs Reverse", font_size=20), 
    legend=dict(
        title_text="Files",
        title_font_size=20,
        font_size=16,
        tracegroupgap=2)
    )

#fig

fig_forward_reverse.write_html('./figures/plotly_forward_reverse.html', include_mathjax='cdn', auto_open=True)


In [10]:
hover_data={'name':False, 'N_alignments_str': True, 'N_alignments_log10': False, "N_sum_total_str": True, "N_sum_total_log10": False}
dimensions = ["n_sigma", "D_max", "N_alignments_log10", "N_sum_total_log10", "q_mean", "concentration_mean", "asymmetry", "normalized_noise"]


fig_scatter_matrix = px.scatter_matrix(df, dimensions=dimensions,                    
                 color="name", 
                 hover_name="name",
                 size_max=10, 
                 width=1600, 
                 height=1200, 
                 hover_data=hover_data,
                 color_discrete_sequence=cmap, 
                 opacity=0.1, 
                 title='Scatter Matrix',
)
fig_scatter_matrix.update_traces(diagonal_visible=False)
fig_scatter_matrix.update_layout(legend_title="Files", font_size=16)

fig_scatter_matrix.write_html('./figures/plotly_scatter_matrix.html', include_mathjax='cdn', auto_open=True)



In [123]:
def plotly_histogram(data, name, bins=50, density=True, range=None, ):
    data = data[~np.isnan(data)]
    binned = np.histogram(data, bins=bins, density=density, range=range)
    trace = go.Scatter(
        x=binned[1],
        y=binned[0],
        mode='lines',
        name=name,
        line_shape='hv', # similar to 'mid' in matplotlib,
        showlegend=False,
    )
    return trace


In [124]:
dimensions = ["n_sigma", "D_max", "N_alignments_log10", "N_sum_total_log10", 
              "q_mean", "concentration_mean", "asymmetry", "normalized_noise"]
N_cols = len(dimensions)

d_axis_names = {"n_sigma": r'$\Large n_\sigma$',
                "D_max": r"$\Large D_\mathrm{max}$",
                "N_alignments_log10": r"$\Large \log_{10} N_\mathrm{alignments}$",
                "N_sum_total_log10": r"$\Large \log_{10} N_\mathrm{sum total}$",
                "q_mean": r"$\Large q_\mathrm{mean}$",
                "concentration_mean": r"$\Large \phi_\mathrm{mean}$",
                "asymmetry": r"$\Large \alpha$",
                "normalized_noise": r"$\Large \text{noise}$",
                }


fig_forward_reverse = make_subplots(rows=N_cols, cols=N_cols, 
                                    shared_xaxes=True, shared_yaxes=True,
                                    horizontal_spacing=0.05/N_cols, vertical_spacing=0.05/N_cols,
                                    )



#showlegend = True
for i, col_i in enumerate(dimensions):
    for j, col_j in enumerate(dimensions):
        
        # upper triangular
        if i<j:
            continue

        for name, group in df.groupby("name", sort=False):

            # diagonal
            if i==j:
                #continue
                fig_forward_reverse.add_trace(
                    plotly_histogram(group[col_j], name, bins=50, density=True, range=None),
                    row=i+1, col=j+1)

            #lower triangular
            else:
                fig_forward_reverse.add_trace(
                go.Scatter( x=group[col_j], 
                            y=group[col_i], 
                
                            showlegend=True if i == N_cols-1 and j == 0 else False, 
                            
                            name=name,

                            mode="markers",
                            #marker_size=group['N_alignments_sqrt'],
                            marker_color=d_cmap[name],
                            marker=d_marker,

                            #text=group['name'], # 'text' in hovertemplate
                            #customdata=customdata,
                            #hovertemplate=hovertemplate,                        
            
            ),
                row=i+1, col=j+1)

        # bottom row
        if i == N_cols-1:   
            fig_forward_reverse.update_xaxes(row=i+1, col=j+1, title_text=d_axis_names[col_j])

        # first column
        if j == 0:  
            fig_forward_reverse.update_yaxes(row=i+1, col=j+1, title_text=d_axis_names[col_i])


# range=[10, 50], showgrid=False, type="log"

# Update title and width, height
fig_forward_reverse.update_layout(height=1200, width=1600, title=dict(text="Scatter Matrix", font_size=30),
    #font_size=16, 
    legend=dict(
        title_text="Files",
        title_font_size=20,
        font_size=16,
        tracegroupgap=2)
    )

fig_forward_reverse.write_html('./figures/plotly_scatter2.html', include_mathjax='cdn', auto_open=True)



In [125]:
fig_forward_reverse.layout

Layout({
    'height': 1200,
    'legend': {'font': {'size': 16}, 'title': {'font': {'size': 20}, 'text': 'Files'}, 'tracegroupgap': 2},
    'template': '...',
    'title': {'font': {'size': 30}, 'text': 'Scatter Matrix'},
    'width': 1600,
    'xaxis': {'anchor': 'y', 'domain': [0.0, 0.11953125], 'matches': 'x57', 'showticklabels': False},
    'xaxis10': {'anchor': 'y10', 'domain': [0.12578125, 0.24531250000000002], 'matches': 'x58', 'showticklabels': False},
    'xaxis11': {'anchor': 'y11', 'domain': [0.2515625, 0.37109375], 'matches': 'x59', 'showticklabels': False},
    'xaxis12': {'anchor': 'y12',
                'domain': [0.37734375000000003, 0.49687500000000007],
                'matches': 'x60',
                'showticklabels': False},
    'xaxis13': {'anchor': 'y13', 'domain': [0.503125, 0.62265625], 'matches': 'x61', 'showticklabels': False},
    'xaxis14': {'anchor': 'y14', 'domain': [0.62890625, 0.7484375], 'matches': 'x62', 'showticklabels': False},
    'xaxis15': {'anc

In [32]:
df

Unnamed: 0,Column A,Column B,Column C,Column D,Fruit
0,-0.628858,0.348824,0.74476,1.217626,apple
1,-1.255304,-1.850041,-0.107762,-0.708734,apple
2,-0.280992,-0.149318,0.134042,0.021615,grape
3,0.979794,-1.390203,0.735194,-0.582987,apple
4,1.725568,-0.00519,0.595986,-0.085435,apple
5,-0.779018,-0.339418,-0.086081,-0.265089,grape
6,0.854589,0.374251,0.173451,-0.175383,pear
7,-0.755388,1.160708,0.560554,1.222288,pear
8,-1.242681,-0.763406,-1.963429,-0.353671,apple
9,0.324444,-0.245808,-0.220272,-0.339451,pear


In [None]:
fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet


fig.update_xaxes(matches='x')
fig.update_yaxes(matches='y')


In [97]:

fig = make_subplots(
    rows=3, cols=1,
    shared_xaxes=True)


fig.add_trace(go.Scatter(x= [1, 1.75, 2.5, 3.5], y=[-1, 3, 0, 3,  5]),
              row=1, col=1)

fig.add_trace(go.Scatter(x= [1, 1.75, 2.5, 3.5], y=[4, 2, 6, 3,  5]),
              row=2, col=1)
fig.add_trace(go.Scatter(x= [1, 1.5,  2, 2.5, 3, 3.5], y=[4, 2, 6, 3,  5, 0]),
              row=3, col=1)

fig.update_layout(width=700, height=500)

#fig.update_layout(xaxis_showticklabels=True, xaxis2_showticklabels=True)
fig.layout
fig.update_layout(xaxis2_matches=None)



In [98]:
fig.layout

Layout({
    'height': 500,
    'template': '...',
    'width': 700,
    'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'matches': 'x3', 'showticklabels': False},
    'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0], 'showticklabels': False},
    'xaxis3': {'anchor': 'y3', 'domain': [0.0, 1.0]},
    'yaxis': {'anchor': 'x', 'domain': [0.7333333333333333, 1.0]},
    'yaxis2': {'anchor': 'x2', 'domain': [0.36666666666666664, 0.6333333333333333]},
    'yaxis3': {'anchor': 'x3', 'domain': [0.0, 0.26666666666666666]}
})

In [126]:
df2 = px.data.iris()
fig = px.scatter_matrix(df2,
    dimensions=["sepal_width", "sepal_length", "petal_width", "petal_length"],
    color="species", symbol="species",
    title="Scatter matrix of iris data set",
    labels={col:col.replace('_', ' ') for col in df.columns}) # remove underscore
fig.update_traces(diagonal_visible=False)
fig.show()


In [127]:
fig.layout

Layout({
    'dragmode': 'select',
    'legend': {'title': {'text': 'species'}, 'tracegroupgap': 0},
    'template': '...',
    'title': {'text': 'Scatter matrix of iris data set'}
})

In [128]:
fig.layout.xaxis

layout.XAxis()

In [130]:
fig.update_layout(xaxis_matches=None)

In [None]:

df2 = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/iris-data.csv')
index_vals = df2['class'].astype('category').cat.codes

trace = go.Splom(
                dimensions=[dict(label='sepal length',
                                 values=df2['sepal length']),
                            dict(label='sepal width',
                                 values=df2['sepal width']),
                            dict(label='petal length',
                                 values=df2['petal length']),
                            dict(label=r'$\mu$',
                                 values=df2['petal width'])],
                showupperhalf=False, # remove plots on diagonal
                diagonal_visible=False,
                text=df2['class'],
                marker=dict(color=index_vals,
                            showscale=False, # colors encode categorical variables
                            line_width=0)
                )

fig = go.Figure(data=trace)

fig.update_layout(
    title='Iris Data set',
    width=600,
    height=600,
)

fig.show()

In [138]:
trace

Splom({
    'diagonal': {'visible': False},
    'dimensions': [{'label': 'sepal length',
                    'values': array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8, 4.3,
                                     5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. , 5. , 5.2,
                                     5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4, 5.1, 5. , 4.5,
                                     4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4, 6.9, 5.5, 6.5, 5.7,
                                     6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6,
                                     5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6. , 5.7, 5.5, 5.5, 5.8, 6. ,
                                     5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2,
                                     5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4,
                                     6.8, 5.7, 5.8, 6.4

In [143]:

import dash
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run_server(debug=True, use_reloader=False)  # Turn off reloader if inside Jupyter

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


In [54]:

dimensions = ['D_max', 'n_sigma', 'q_mean']
labels_list = [r'$D max$', r'$n \sigma$', r'$q mean$']
labels = {dimension: label for dimension, label in zip(dimensions, labels_list)}


# setup custom data (hover tooltip)

custom_data_columns = ['name', 'taxid', 'N_alignments_str', 'N_sum_total_str']
hovertemplate = ("<b>%{customdata[0]}</b><br><br>" 
                 "taxid: %{customdata[1]}<br>" 
                 "<br>n sigma: %{x:5.2f}<br>" 
                 "D max:    %{y:.2f}<br>" 
                 "<br>N alignments: %{customdata[2]}<br>" 
                 "N sum total:   %{customdata[3]}<br>" 
                 "<extra></extra>")


fig = px.scatter_matrix(df, dimensions=dimensions,                    
                 color="name", 
                 hover_name="name",
                 size_max=10, 
                 width=800, 
                 height=600, 
                 color_discrete_sequence=cmap, 
                 labels=labels,
                 opacity=0.1, 
                 title='Scatter Matrix',
                 custom_data=custom_data_columns
                )

fig.update_traces(diagonal_visible=False, showupperhalf=False, hovertemplate=hovertemplate)
fig.update_layout(legend_title="Files", font_size=16)



In [55]:
fig.data[0].customdata.shape

(1000, 4)

In [2]:
df = px.data.iris()

In [3]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,3
146,6.3,2.5,5.0,1.9,virginica,3
147,6.5,3.0,5.2,2.0,virginica,3
148,6.2,3.4,5.4,2.3,virginica,3


In [4]:
df.species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [6]:

cmap = px.colors.qualitative.D3

d_cmap = {}
for i, (name, _) in enumerate(df.groupby("species", sort=False)):
    d_cmap[name] = cmap[i]
d_cmap

{'setosa': '#1F77B4', 'versicolor': '#FF7F0E', 'virginica': '#2CA02C'}

In [12]:

N_alignments_min = df.N_alignments.min()
N_alignments_max = df.N_alignments.max()
N_steps = 1000
step = (N_alignments_max - N_alignments_min) / N_steps


In [13]:
N_alignments_min

11

In [14]:
N_alignments_max

60445656

In [15]:
step

60445.645

In [16]:
df_iris = px.data.iris()

In [17]:
df_iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,3
146,6.3,2.5,5.0,1.9,virginica,3
147,6.5,3.0,5.2,2.0,virginica,3
148,6.2,3.4,5.4,2.3,virginica,3


In [18]:
df

Unnamed: 0,taxid,D_max,n_sigma,D_max_lower_hpdi,D_max_upper_hpdi,q_mean,concentration_mean,D_max_marginalized_mean,N_alignments,N_z1_forward,...,asymmetry,normalized_noise,normalized_noise_forward,normalized_noise_reverse,name,N_alignments_log10,N_alignments_sqrt,N_alignments_str,N_sum_total_log10,N_sum_total_str
0,1,0.448445,8.412160,0.435209,0.464514,0.625865,1559.919819,0.447680,60445656,13113435,...,4.819366,0.292090,0.293354,0.290583,KapK-198A-Ext-55-Lib...,7.781365,7774.680444,60.4M,8.532024,340M
1,131567,0.448161,8.422496,0.430949,0.460320,0.626324,1552.312281,0.447992,60445181,13113307,...,4.800994,0.292094,0.293356,0.290588,KapK-198A-Ext-55-Lib...,7.781362,7774.649896,60.4M,8.532019,340M
2,2759,0.451838,8.312691,0.435601,0.464251,0.626059,1527.901714,0.451684,60094660,13001213,...,4.934529,0.293807,0.295013,0.292372,KapK-198A-Ext-55-Lib...,7.778836,7752.074561,60.1M,8.528095,337M
3,33090,0.452352,8.499618,0.438455,0.467702,0.625721,1549.861381,0.451898,60008321,12978386,...,5.308374,0.293940,0.294898,0.292749,KapK-198A-Ext-55-Lib...,7.778211,7746.503792,60M,8.527348,337M
4,35493,0.452262,8.533230,0.440029,0.469333,0.625563,1550.216294,0.452137,60008225,12978366,...,4.739024,0.293940,0.294898,0.292750,KapK-198A-Ext-55-Lib...,7.778211,7746.497596,60M,8.527347,337M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,1396,0.000000,-2.556453,0.000000,0.000000,0.707621,957.671831,1.056107,12,1,...,-2.370465,3.741657,3.741657,3.741657,SJArg-1-Nit__number_...,1.079181,3.464102,12,1.778151,60
3996,2785011,,-0.477179,,,0.539371,970.336541,0.210705,11,0,...,1.979100,5.385165,6.182412,4.422166,SJArg-1-Nit__number_...,1.041393,3.316625,11,1.681241,48
3997,2633480,0.166667,0.431609,0.000000,0.333333,0.496597,963.755151,0.197924,11,6,...,-0.541363,5.147815,5.961543,3.840573,SJArg-1-Nit__number_...,1.041393,3.316625,11,2.071882,118
3998,2623841,0.000000,0.397445,0.000000,0.250000,0.525748,994.856521,0.149555,11,4,...,2.275519,4.580591,6.086050,2.712113,SJArg-1-Nit__number_...,1.041393,3.316625,11,2.075547,119


In [20]:

column='N_alignments'

N_alignments_log = np.log10(df[column])

N_alignments_min = np.floor(N_alignments_log.min())
N_alignments_max = np.ceil(N_alignments_log.max())

In [21]:
N_alignments_min

1.0

In [23]:
N_alignments_max

8.0

In [24]:
np.arange(N_alignments_min, N_alignments_max)

array([1., 2., 3., 4., 5., 6., 7.])