In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/alpha-rarefaction/faith_pd.csv')

# Melt the dataframe from wide to long format
df_long = df.melt(
    id_vars=['sample-id'],
    var_name='iteration',
    value_name='faith_pd'
)

# Extract depth from iteration column (keeping as nullable integer)
df_long['depth'] = df_long['iteration'].str.extract(r'depth-(\d+)')[0].astype('Int64')
df_long['iter'] = df_long['iteration'].str.extract(r'depth-(\d+)_iter-(\d+)')[1].astype('Int64')

# Rename sample-id to sample for cleaner column name
df_long = df_long.rename(columns={'sample-id': 'sample'})

# Select and reorder columns
df_modified = df_long[['sample', 'depth', 'iter', 'faith_pd']].copy()

# Optionally drop NaN values
df_modified = df_modified.dropna()


df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/faith_pd_long.tsv', sep='\t', index=False)
df_modified 

In [None]:
import plotly.express as px
fig = px.line(df_modified.groupby(['sample', 'depth']).agg({'faith_pd': ['mean', 'std']}).reset_index().set_axis(['sample', 'depth', 'mean', 'std'], axis=1), x='depth', y='mean', color='sample', error_y='std', markers=True)
fig.show()

In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/barplot/level-2.csv')

# Print structure to understand it
print("Total columns:", len(df.columns))
print("Last 5 columns:", df.columns[-5:].tolist())
print("\nFirst row of last 5 columns:")
print(df[df.columns[-5:]].iloc[0])

# The first column is 'index' (sample names), last 4 are metadata
# All columns in between are taxonomy
taxonomy_cols = df.columns[1:-4]
sample_col = df.columns[0]  # 'index' column contains sample names
habitat_col = 'habitat'  # One of the last 4 columns

# Select only taxonomy columns and index
df_samples = df[[sample_col] + list(taxonomy_cols)]

# Melt the dataframe
df_modified = df_samples.melt(
    id_vars=[sample_col],
    var_name='taxonomy',
    value_name='count'
)

# Rename 'index' column to 'sample'
df_modified = df_modified.rename(columns={sample_col: 'sample'})

# Add habitat information from original df
# Create a mapping of sample to habitat
sample_to_habitat = df.set_index(sample_col)[habitat_col].to_dict()
df_modified['habitat'] = df_modified['sample'].map(sample_to_habitat)
df_modified["Kingdom"] = df_modified["taxonomy"].str.split(';').str[0]
df_modified["Phylum"] = df_modified["taxonomy"].str.split(';').str[1]
df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/taxonomy_long.tsv', sep='\t', index=False)
df_modified

In [None]:
df_modified = df_modified.groupby(['sample', 'taxonomy', 'habitat'])['count'].sum().reset_index().assign(percent=lambda x: x.groupby('sample')['count'].transform(lambda y: 100 * y / y.sum()))

fig = px.bar(df_modified, x='sample', y='percent', color='taxonomy', title='Taxonomic Composition by Sample')
fig.show()

In [None]:
df_modified = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/ancom/Category-habitat-ASV/data.tsv', sep='\t')

fig = px.scatter(df_modified, x='clr', y='W', hover_data=['id'],  title='ANCOM Volcano Plot', labels={'W': 'W statistic', 'clr': 'CLR (Centered Log-Ratio)'}, opacity=0.5, color_discrete_sequence=['#636EFA'], template="plotly_white")
fig.show()

In [None]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# 1. Read ANCOM results (volcano data)
df_ancom = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/ancom/Category-habitat-ASV/data.tsv', sep='\t')

# 2. Read taxonomy/abundance table
df_tax = pd.read_csv('/Users/tweber/Data/ampliseq-testdata/results-9c52c22f17179b9bd5cb2621c05ec3a931adcb02/qiime2/rel_abundance_tables/rel-table-ASV_with-DADA2-tax.tsv', sep='\t')
# single column to merge Kingdom & Phylum
df_tax['taxonomy'] = df_tax['Kingdom'] + ';' + df_tax['Phylum']

# 3. Merge them on ASV ID
df_modified = df_ancom.merge(df_tax[['ID', 'taxonomy', 'Kingdom', 'Phylum']], 
                               left_on='id', right_on='ID', how='left')
df_modified = df_modified[["id", "taxonomy", 'Kingdom', 'Phylum', "W", "clr"]].dropna()
df_modified.to_csv('/Users/tweber/Gits/workspaces/depictio-workspace/depictio/depictio/api/v1/configs/ampliseq_dataset/ancom_volcano.tsv', sep='\t', index=False)
df_modified

# # 4. Create Dash app with filters
# app = Dash(__name__)

# app.layout = html.Div([
#     dcc.Dropdown(
#         id='phylum-filter',
#         options=[{'label': p, 'value': p} for p in df_modified['Phylum'].dropna().unique()],
#         value=None,
#         placeholder='Filter by Phylum'
#     ),
#     dcc.Slider(
#         id='w-threshold',
#         min=df_modified['W'].min(),
#         max=df_modified['W'].max(),
#         value=2300,
#         marks={i: str(i) for i in range(2000, 2700, 100)}
#     ),
#     dcc.Graph(id='volcano')
# ])

# @app.callback(
#     Output('volcano', 'figure'),
#     [Input('phylum-filter', 'value'),
#      Input('w-threshold', 'value')]
# )
# def update_volcano(selected_phylum, w_threshol  d):
#     filtered = df_modified[df_modified['W'] >= w_threshold]
    
#     if selected_phylum:
#         filtered = filtered[filtered['Phylum'] == selected_phylum]
    
#     fig = px.scatter(filtered, x='clr', y='W', 
#                      hover_data=['id', 'Phylum', 'Class', 'Genus'],
#                      color='Phylum',
#                      opacity=0.6)
#     fig.update_layout(template='plotly_white')
#     return fig

# app.run(mode='inline')  # or mode='external' for separate tab