In [None]:
# Cross-Dataset: Faith PD vs Taxonomic Richness
import pandas as pd
import plotly.express as px

# Load Faith PD data - use max value per sample instead of max depth
df_faith = pd.read_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/faith_pd_long.tsv', sep='\t')
df_faith_max = df_faith.groupby('sample')['faith_pd'].max().reset_index()

# Load taxonomy data and calculate richness
df_tax = pd.read_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/taxonomy_long.tsv', sep='\t')
df_richness = df_tax[df_tax['count'] > 0].groupby(['sample', 'habitat']).size().reset_index(name='richness')

# Merge datasets
df_modified = df_faith_max.merge(df_richness, on='sample')

fig = px.scatter(df_modified, x='richness', y='faith_pd', color='habitat', size='faith_pd',
                 hover_data=['sample'], title='Faith Phylogenetic Diversity vs Taxonomic Richness',
                 labels={'richness': 'Taxonomic Richness (# taxa)', 'faith_pd': 'Faith PD (max)'},
                 trendline='ols', template='plotly_white')
fig.show()

In [None]:
# Differential Abundance: Top Significant ASVs by Phylum
import pandas as pd
import plotly.express as px

df = pd.read_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/ancom_volcano.tsv', sep='\t')

# Filter only significant ASVs (W > 70% of max) and top 30 by W statistic
w_threshold = df['W'].max() * 0.7
df_significant = df[df['W'] >= w_threshold].nlargest(30, 'W')

# Count by phylum
df_modified = df_significant.groupby('Phylum').size().reset_index(name='count').sort_values('count', ascending=True)

fig = px.bar(df_modified, x='count', y='Phylum', orientation='h', color='count', title='Top 30 Significant ASVs: Distribution by Phylum', labels={'count': 'Number of Significant ASVs', 'Phylum': 'Phylum'}, color_continuous_scale='Viridis')
fig.show()

In [None]:
# Taxonomic Composition: Mean Phylum Abundance by Habitat (Top 10)
import pandas as pd
import plotly.express as px

df = pd.read_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/taxonomy_long.tsv', sep='\t')

# Calculate relative abundance per sample, then mean per habitat
df_temp = df.groupby(['sample', 'Phylum', 'habitat'])['count'].sum().reset_index()
df_temp['percent'] = df_temp.groupby('sample')['count'].transform(lambda x: 100 * x / x.sum())

# Get top 10 phyla by overall abundance
top_phyla = df_temp.groupby('Phylum')['percent'].mean().nlargest(10).index

# Filter and aggregate
df_modified = df_temp[df_temp['Phylum'].isin(top_phyla)].groupby(['habitat', 'Phylum'])['percent'].mean().reset_index()

fig = px.bar(df_modified, x='habitat', y='percent', color='Phylum', title='Mean Phylum Relative Abundance by Habitat (Top 10)', labels={'percent': 'Mean Relative Abundance (%)'}, barmode='stack')
fig.show()

In [None]:
# Alpha Diversity: Faith PD by Habitat (max depth comparison)
import pandas as pd
import plotly.express as px

df_faith = pd.read_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/faith_pd_long.tsv', sep='\t')
df_tax = pd.read_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/taxonomy_long.tsv', sep='\t')

# Get unique sample-habitat mapping
sample_habitat = df_tax[['sample', 'habitat']].drop_duplicates()

# Filter max depth and merge with habitat
df_modified = df_faith[df_faith['depth'] == df_faith['depth'].max()].merge(sample_habitat, on='sample')

fig = px.box(df_modified, x='habitat', y='faith_pd', color='habitat', points='all', title='Faith Phylogenetic Diversity by Habitat (max depth)', labels={'faith_pd': 'Faith PD', 'habitat': 'Habitat'})
fig.show()

In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/alpha-rarefaction/faith_pd.csv')

# Melt the dataframe from wide to long format
df_long = df.melt(
    id_vars=['sample-id'],
    var_name='iteration',
    value_name='faith_pd'
)

# Extract depth from iteration column (keeping as nullable integer)
df_long['depth'] = df_long['iteration'].str.extract(r'depth-(\d+)')[0].astype('Int64')
df_long['iter'] = df_long['iteration'].str.extract(r'depth-(\d+)_iter-(\d+)')[1].astype('Int64')

# Rename sample-id to sample for cleaner column name
df_long = df_long.rename(columns={'sample-id': 'sample'})

# Select and reorder columns
df_modified = df_long[['sample', 'depth', 'iter', 'faith_pd']].copy()

# Optionally drop NaN values
df_modified = df_modified.dropna()


df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/faith_pd_long.tsv', sep='\t', index=False)
df_modified 

In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Read your CSV
df = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/alpha-rarefaction/faith_pd.csv')

# Check what columns we have
print("Columns:", df.columns.tolist())

# Extract depth from column names and pivot
data_long = []
for col in df.columns:
    # Skip non-depth columns
    if not col.startswith('depth-'):
        continue
    
    # Extract depth and iteration
    parts = col.split('_')
    depth = int(parts[0].replace('depth-', ''))
    iteration = int(parts[1].replace('iter-', ''))
    
    for idx, row in df.iterrows():
        data_long.append({
            'sample_id': row['sample-id'],
            'depth': depth,
            'iteration': iteration,
            'faith_pd': row[col]
        })

df_long = pd.DataFrame(data_long)

# Get unique depths and samples
depths = sorted(df_long['depth'].unique())
samples = df_long['sample_id'].unique()

# Create figure
fig = go.Figure()

# Color palette
import plotly.express as px
colors = px.colors.qualitative.Plotly + px.colors.qualitative.Set2

# For each sample, create a curve with box plots
for i, sample in enumerate(samples):
    sample_data = df_long[df_long['sample_id'] == sample]
    
    # Calculate median for each depth for this sample
    medians = sample_data.groupby('depth')['faith_pd'].median().reset_index()
    
    # Add median line
    fig.add_trace(go.Scatter(
        x=medians['depth'],
        y=medians['faith_pd'],
        mode='lines+markers',
        name=sample,
        line=dict(color=colors[i % len(colors)], width=2),
        marker=dict(size=6)
    ))
    
    # Add box plot at each depth for this sample
    for depth in depths:
        depth_vals = sample_data[sample_data['depth'] == depth]['faith_pd']
        
        fig.add_trace(go.Box(
            x=[depth] * len(depth_vals),
            y=depth_vals,
            boxpoints='all',
            jitter=0.2,
            pointpos=0,
            marker=dict(
                size=3, 
                opacity=0.5,
                color=colors[i % len(colors)]
            ),
            line=dict(width=1, color=colors[i % len(colors)]),
            fillcolor=f'rgba({int(colors[i % len(colors)][4:-1].split(",")[0])},{int(colors[i % len(colors)][4:-1].split(",")[1])},{int(colors[i % len(colors)][4:-1].split(",")[2])},0.3)' if colors[i % len(colors)].startswith('rgb') else colors[i % len(colors)],
            showlegend=False,
            name=sample,
            legendgroup=sample,
            hovertemplate=f'{sample}<br>Depth: %{{x}}<br>Faith PD: %{{y}}<extra></extra>'
        ))

fig.update_layout(
    title='Faith PD Rarefaction Curves by Sample',
    xaxis_title='Sequencing Depth',
    yaxis_title='Faith PD (observed_features)',
    hovermode='closest',
    height=700,
    width=1200
)

fig.show()

In [None]:
import plotly.express as px
import pandas as pd

# Read the CSV
df = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/alpha-rarefaction/faith_pd.csv')

# Transform to long format
data = []
for col in df.columns:
    if not col.startswith('depth-'):
        continue
    
    parts = col.split('_')
    depth = int(parts[0].replace('depth-', ''))
    
    for idx, row in df.iterrows():
        data.append({
            'sample': row['sample-id'],
            'depth': depth,
            'faith_pd': row[col]
        })

df_modified = pd.DataFrame(data)

# Create rarefaction curve
fig = px.line(df_modified, x='depth', y='faith_pd', color='sample', markers=True)
df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/faith_pd_long.tsv', sep='\t', index=False)
fig.show()

In [None]:
fig = px.line(df_modified.groupby(['sample', 'depth']).agg({'faith_pd': ['mean', 'std']}).reset_index().set_axis(['sample', 'depth', 'mean', 'std'], axis=1), x='depth', y='mean', color='sample', error_y='std', markers=True)
fig.show()

In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/barplot/level-2.csv')

# Print structure to understand it
print("Total columns:", len(df.columns))
print("Last 5 columns:", df.columns[-5:].tolist())
print("\nFirst row of last 5 columns:")
print(df[df.columns[-5:]].iloc[0])

# The first column is 'index' (sample names), last 4 are metadata
# All columns in between are taxonomy
taxonomy_cols = df.columns[1:-4]
sample_col = df.columns[0]  # 'index' column contains sample names
habitat_col = 'habitat'  # One of the last 4 columns

# Select only taxonomy columns and index
df_samples = df[[sample_col] + list(taxonomy_cols)]

# Melt the dataframe
df_modified = df_samples.melt(
    id_vars=[sample_col],
    var_name='taxonomy',
    value_name='count'
)

# Rename 'index' column to 'sample'
df_modified = df_modified.rename(columns={sample_col: 'sample'})

# Add habitat information from original df
# Create a mapping of sample to habitat
sample_to_habitat = df.set_index(sample_col)[habitat_col].to_dict()
df_modified['habitat'] = df_modified['sample'].map(sample_to_habitat)
df_modified["Kingdom"] = df_modified["taxonomy"].str.split(';').str[0]
df_modified["Phylum"] = df_modified["taxonomy"].str.split(';').str[1]
df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/taxonomy_long.tsv', sep='\t', index=False)
df_modified

In [None]:
df_modified = df_modified.groupby(['sample', 'taxonomy', 'habitat'])['count'].sum().reset_index().assign(percent=lambda x: x.groupby('sample')['count'].transform(lambda y: 100 * y / y.sum()))

fig = px.bar(df_modified, x='sample', y='percent', color='taxonomy', title='Taxonomic Composition by Sample')
fig.show()

In [None]:
df_modified = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/ancom/Category-habitat-ASV/data.tsv', sep='\t')

fig = px.scatter(df_modified, x='clr', y='W', hover_data=['id'],  title='ANCOM Volcano Plot', labels={'W': 'W statistic', 'clr': 'CLR (Centered Log-Ratio)'}, opacity=0.5, color_discrete_sequence=['#636EFA'], template="plotly_white")
fig.show()

In [None]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# 1. Read ANCOM results (volcano data)
df_ancom = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/ancom/Category-habitat-ASV/data.tsv', sep='\t')

# 2. Read taxonomy/abundance table
df_tax = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/rel_abundance_tables/rel-table-ASV_with-DADA2-tax.tsv', sep='\t')
# single column to merge Kingdom & Phylum
df_tax['taxonomy'] = df_tax['Kingdom'] + ';' + df_tax['Phylum']

# 3. Merge them on ASV ID
df_modified = df_ancom.merge(df_tax[['ID', 'taxonomy', 'Kingdom', 'Phylum']], 
                               left_on='id', right_on='ID', how='left')
df_modified = df_modified[["id", "taxonomy", 'Kingdom', 'Phylum', "W", "clr"]].dropna()
df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/ancom_volcano.tsv', sep='\t', index=False)
df_modified

# # 4. Create Dash app with filters
# app = Dash(__name__)

# app.layout = html.Div([
#     dcc.Dropdown(
#         id='phylum-filter',
#         options=[{'label': p, 'value': p} for p in df_modified['Phylum'].dropna().unique()],
#         value=None,
#         placeholder='Filter by Phylum'
#     ),
#     dcc.Slider(
#         id='w-threshold',
#         min=df_modified['W'].min(),
#         max=df_modified['W'].max(),
#         value=2300,
#         marks={i: str(i) for i in range(2000, 2700, 100)}
#     ),
#     dcc.Graph(id='volcano')
# ])

# @app.callback(
#     Output('volcano', 'figure'),
#     [Input('phylum-filter', 'value'),
#      Input('w-threshold', 'value')]
# )
# def update_volcano(selected_phylum, w_threshol  d):
#     filtered = df_modified[df_modified['W'] >= w_threshold]
    
#     if selected_phylum:
#         filtered = filtered[filtered['Phylum'] == selected_phylum]
    
#     fig = px.scatter(filtered, x='clr', y='W', 
#                      hover_data=['id', 'Phylum', 'Class', 'Genus'],
#                      color='Phylum',
#                      opacity=0.6)
#     fig.update_layout(template='plotly_white')
#     return fig

# app.run(mode='inline')  # or mode='external' for separate tab

In [None]:
df_modified