In [None]:
import pandas as pd

df = pd.read_csv("./scverse_scanpy_metadata.csv")
df.head()

In [None]:
proportions = dict(width=1600, height=1000)
xl_proportions = dict(width=2000, height=1400)

In [None]:
import plotly.express as px
import plotly.graph_objects as go

tag = "0.0"
by_tag = df[df['tag'] == tag].groupby(["tag", "extension"]).size().reset_index(name='counts')
by_tag.head()


In [None]:
fig = px.scatter(df[df['tag'] == tag], x="file_count", y="extension", color="extension", size="file_count",
                 title=f"Scatterplot of file_count vs line_count for tag {tag}")
fig.update_layout(**proportions)
fig.show()

In [None]:
fig = px.sunburst(df, path=["tag", "extension"], values="file_count", color="extension", color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(**proportions)
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": "Path: Tag -> Extension",
                    "method": "update",
                    "args": [{"path": ["tag", "extension"]}]
                },
                {
                    "label": "Path: Extension -> Tag",
                    "method": "update",
                    "args": [{"path": ["extension", "tag"]}]
                }
            ],
            "direction": "down",
            "showactive": True,
        }
    ]
)
fig.show()

In [None]:
fig = px.sunburst(df, path=["extension", "tag"], values="line_count", color="extension", color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(**proportions)
fig.show()

In [None]:
x_field = "file_count"
size_field = "file_count"
df_bumped_size = df
df_bumped_size[f'{size_field}_10'] = df_bumped_size[size_field] * 100 
fig = px.scatter(df, x=x_field, y="extension", color="extension", 
                 # size=f'{size_field}_10', 
                 animation_frame="tag",
                 animation_group="extension",
                 log_x=True, range_x=df[x_field].agg(["min", "max"]).tolist())

fig["layout"].pop("updatemenus")  # optional, drop animation buttons
fig.update_layout(**proportions)
fig.show()

In [None]:
size_field = "file_count"
df_bumped_size = df
df_bumped_size[f'{size_field}_10'] = df_bumped_size[size_field] * 100
fig = px.scatter(df, x="tag", y="file_count", color="extension",
                 # size=f'{size_field}_10', 
                 animation_frame="extension",
                 log_y=True, range_y=df["file_count"].agg(["min", "max"]).tolist(),
                 title="scatter")

fig["layout"].pop("updatemenus")  # optional, drop animation buttons
fig.update_layout(**proportions)
fig.show()

In [None]:
fig = px.histogram(df, y="extension", x="line_count", animation_frame="tag", range_x=df["line_count"].agg(["min", "max"]).tolist(), color="category", log_x=True)
       

fig.update_yaxes(categoryorder="total ascending")
fig.update_layout(**proportions)
fig.show()