In [1]:
# Scientific Library
import numpy as np
import pandas as pd
from pathlib import Path
import plotly as py
import plotly.express as px
import plotly.figure_factory as ff

from metadamage import utils

In [2]:
input_files = list(Path("").rglob("./data/fits/*.csv"))

In [3]:
dfs = []
for file in input_files:
    df = pd.read_csv(file)
    cols = list(df.columns)
    cols[0] = 'taxid'
    df.columns = cols
    name = utils.extract_name(file, max_length=20)
    df['name'] = name
    dfs.append(df)


In [4]:
df = pd.concat(dfs, axis=0, ignore_index=True)
df["N_alignments_log10"] = np.log10(df["N_alignments"])
df["N_alignments_sqrt"] = np.sqrt(df["N_alignments"])
df['N_alignments_str'] = df.apply(lambda row: utils.human_format(row['N_alignments']), axis = 1) 
df['N_sum_total_log10'] = np.log10(df["N_sum_total"])
df['N_sum_total_str'] = df.apply(lambda row: utils.human_format(row['N_sum_total']), axis = 1) 

df

Unnamed: 0,taxid,D_max,n_sigma,D_max_lower_hpdi,D_max_upper_hpdi,q_mean,concentration_mean,D_max_marginalized_mean,N_alignments,N_z1_forward,...,D_max_forward,n_sigma_reverse,D_max_reverse,asymmetry,name,N_alignments_log10,N_alignments_sqrt,N_alignments_str,N_sum_total_log10,N_sum_total_str
0,1,0.448445,8.412160,0.435209,0.464514,0.625865,1559.919819,0.447680,60445656,13113435,...,0.452185,4.978596,0.444106,4.819366,KapK-198A-Ext-55-Lib...,7.781365,7774.680444,60.4M,8.532024,340M
1,131567,0.448161,8.422496,0.430949,0.460320,0.626324,1552.312281,0.447992,60445181,13113307,...,0.451769,4.943124,0.444402,4.800994,KapK-198A-Ext-55-Lib...,7.781362,7774.649896,60.4M,8.532019,340M
2,2759,0.451838,8.312691,0.435601,0.464251,0.626059,1527.901714,0.451684,60094660,13001213,...,0.455556,5.190804,0.448406,4.934529,KapK-198A-Ext-55-Lib...,7.778836,7752.074561,60.1M,8.528095,337M
3,33090,0.452352,8.499618,0.438455,0.467702,0.625721,1549.861381,0.451898,60008321,12978386,...,0.456176,5.212990,0.448543,5.308374,KapK-198A-Ext-55-Lib...,7.778211,7746.503792,60M,8.527348,337M
4,35493,0.452262,8.533230,0.440029,0.469333,0.625563,1550.216294,0.452137,60008225,12978366,...,0.455952,4.925043,0.447841,4.739024,KapK-198A-Ext-55-Lib...,7.778211,7746.497596,60M,8.527347,337M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,1396,0.000000,-2.556453,0.000000,0.000000,0.707621,957.671831,1.056107,12,1,...,0.000000,2.561011,0.000000,-2.370465,SJArg-1-Nit__number_...,1.079181,3.464102,12,1.778151,60
3996,1866,0.000000,-0.079891,0.000000,0.500000,0.454435,1006.405936,0.184299,11,2,...,0.000000,-0.255351,0.000000,0.424536,SJArg-1-Nit__number_...,1.041393,3.316625,11,2.033424,108
3997,1707,0.142857,0.343010,0.000000,0.285714,0.490549,980.597713,0.167310,11,7,...,0.142857,1.535959,0.285714,-0.159356,SJArg-1-Nit__number_...,1.041393,3.316625,11,2.100371,126
3998,1405,0.000000,0.643811,0.000000,1.000000,0.556612,988.557643,0.349230,11,1,...,0.000000,1.029279,0.000000,1.956575,SJArg-1-Nit__number_...,1.041393,3.316625,11,1.748188,56


In [5]:
df[["N_alignments", "N_sum_total", "N_alignments_log10", "N_sum_total_log10"]]

Unnamed: 0,N_alignments,N_sum_total,N_alignments_log10,N_sum_total_log10
0,60445656,340426665,7.781365,8.532024
1,60445181,340422786,7.781362,8.532019
2,60094660,337361306,7.778836,8.528095
3,60008321,336781435,7.778211,8.527348
4,60008225,336780774,7.778211,8.527347
...,...,...,...,...
3995,12,60,1.079181,1.778151
3996,11,108,1.041393,2.033424
3997,11,126,1.041393,2.100371
3998,11,56,1.041393,1.748188


In [6]:
# https://plotly.com/python/discrete-color/#color-sequences-in-plotly-express
cmap = px.colors.qualitative.D3
#cmap = px.colors.qualitative.Dark24


In [7]:

hover_data={'name':False, # remove species from hover data
            'n_sigma':':.2f', 
            'D_max':':.2f', 
            'N_alignments_str': True, 
            'N_alignments_sqrt': False, 
            }


fig_fit_results = px.scatter(df, x="n_sigma", y="D_max",
                 size="N_alignments_sqrt", color="name", 
                 hover_name="name",
                 size_max=30, 
                 width=1400, height=800, 
                 hover_data=hover_data,
                 color_discrete_sequence=cmap, opacity=0.2, range_y=(0, 1), 
                 title='Fit Results',
                 )

fig_fit_results.update_traces(marker=dict(line=dict(width=0)), selector=dict(mode='markers'))
fig_fit_results.update_layout(xaxis_title=r"$\Large n_\sigma$", 
                              yaxis_title=r"$\Large D_\mathrm{max}$", 
                              font_size=16, 
                              legend=dict(title="Files", title_font_size=20), 
                            )

py.offline.plot(fig_fit_results, filename='./figures/plotly_fit_results', include_mathjax='cdn')
#fig


'./figures/plotly_fit_results.html'

In [8]:

hover_data={'name':False, 'N_alignments_str': True, 'N_alignments_log10': False, "N_sum_total_str": True, "N_sum_total_log10": False}

fig_scatter_matrix = px.scatter_matrix(df, dimensions=["n_sigma", "D_max", "N_alignments_log10", "q_mean", "concentration_mean", "asymmetry", "N_sum_total_log10"],                    color="name", 
                 hover_name="name",
                 size_max=10, 
                 width=1600, 
                 height=1200, 
                 hover_data=hover_data,
                 color_discrete_sequence=cmap, 
                 opacity=0.1, 
                 title='Scatter Matrix',
)
fig_scatter_matrix.update_traces(diagonal_visible=False)

fig_scatter_matrix.update_layout(legend_title="Files", font_size=16)

py.offline.plot(fig_scatter_matrix, filename='./figures/plotly_scatter_matrix', include_mathjax='cdn')
#fig_scatter_matrix



'./figures/plotly_scatter_matrix.html'

In [9]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_forward_reverse = make_subplots(rows=2, cols=2, 
            subplot_titles=(r'$\Large n_\sigma$',  r'$\Large D_\mathrm{max}$', r'$\Large N_{z=1}$', r'$\Large N_\mathrm{sum}$'))

kwargs = {}
for i, (name, _) in enumerate(df.groupby("name", sort=False)):
    kwargs[name] = dict(name=name, mode="markers", legendgroup=name, marker=dict(color=cmap[i], opacity=0.2))



for i, (name, group) in enumerate(df.groupby("name", sort=False)):

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['n_sigma_forward'], y=group['n_sigma_reverse'], **kwargs[name]),
        row=1, col=1)

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['D_max_forward'], y=group['D_max_reverse'], showlegend=False, **kwargs[name]),
        row=1, col=2)

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['N_z1_forward'], y=group['N_z1_reverse'], showlegend=False, **kwargs[name]),
        row=2, col=1)

    fig_forward_reverse.add_trace(
        go.Scatter(x=group['N_sum_forward'], y=group['N_sum_reverse'], showlegend=False, **kwargs[name]),
        row=2, col=2)



# Update xaxis properties
fig_forward_reverse.update_xaxes(row=1, col=1, title_text=r"$\Large n_\sigma \text{ forward}$")
fig_forward_reverse.update_yaxes(row=1, col=1, title_text=r"$\Large n_\sigma \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=1, col=2, title_text=r"$\Large D_\mathrm{max} \text{ forward}$") # range=[10, 50], showgrid=False, type="log"
fig_forward_reverse.update_yaxes(row=1, col=2, title_text=r"$\Large D_\mathrm{max} \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=2, col=1, title_text=r"$\Large N_{z=1} \text{ forward}$")
fig_forward_reverse.update_yaxes(row=2, col=1, title_text=r"$\Large N_{z=1} \text{ reverse}$")

fig_forward_reverse.update_xaxes(row=2, col=2, title_text=r"$\Large N_\mathrm{sum} \text{ forward}$")
fig_forward_reverse.update_yaxes(row=2, col=2, title_text=r"$\Large N_\mathrm{sum} \text{ reverse}$")

# Update title and width, height
fig_forward_reverse.update_layout(height=800, width=1400, title=dict(text="Forward vs Reverse", font_size=20), 
    legend=dict(
        title_text="Files",
        title_font_size=20,
        font_size=16,
        tracegroupgap=2)
    )

#fig
py.offline.plot(fig_forward_reverse, filename='./figures/plotly_forward_reverse', include_mathjax='cdn')



'./figures/plotly_forward_reverse.html'