## Visualization and Interactive Analysis

### Example A: Sequence Alignment Visualization (matplotlib)
Purpose: Visualize simple alignment scores as a bar plot.

In [None]:
from skbio import read, DNA
from skbio.alignment import local_pairwise_align_ssw
import matplotlib.pyplot as plt
import itertools

# ---- Step 1: Read sequences from FASTA ----
fasta_file = "../data/fasta_example.fasta"  # Update if needed
sequences = list(read(fasta_file, format="fasta", constructor=DNA))
names = [seq.metadata['id'] for seq in sequences]

# ---- Step 2: Perform pairwise alignments ----
pairs = list(itertools.combinations(enumerate(sequences), 2))  # all unique pairs
labels = []
scores = []

for (i, seq1), (j, seq2) in pairs:
    alignment, score, _ = local_pairwise_align_ssw(seq1, seq2)
    label = f"{names[i]} vs {names[j]}"
    labels.append(label)
    scores.append(score)

# ---- Step 3: Plot ----
plt.figure(figsize=(8, 5))
plt.bar(labels, scores, color="skyblue", edgecolor="black")
plt.ylabel("Alignment Score")
plt.title("Sequence Alignment Scores")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### Example B: Variant Quality Distribution (seaborn)
Purpose: Visualize variant quality scores from a VCF file.


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import vcfpy

# Open and parse the VCF file
reader = vcfpy.Reader.from_path("../data/vcf_example.vcf")
quals = [record.QUAL for record in reader if record.QUAL is not None]

# Plot the distribution of quality scores
sns.histplot(quals, kde=True)
plt.xlabel('Variant Quality Score')
plt.title('Distribution of Variant Quality Scores')
plt.show()

### Example C: Interactive Volcano Plot (plotly)
Purpose: Interactively visualize RNA-seq results.


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

# Load gene expression results
df = pd.read_csv("../data/example_gene_expression.csv")

# Compute -log10 p-values
df['neg_log10_pval'] = -np.log10(df['pvalue'])

# Define significance based on padj and log2FC thresholds
df['significance'] = 'Not Significant'
df.loc[(df['padj'] < 0.05) & (df['log2FoldChange'] > 1), 'significance'] = 'Upregulated'
df.loc[(df['padj'] < 0.05) & (df['log2FoldChange'] < -1), 'significance'] = 'Downregulated'

# Volcano plot
fig = px.scatter(df,
                 x='log2FoldChange',
                 y='neg_log10_pval',
                 color='significance',
                 hover_name='gene',
                 title='Interactive Volcano Plot',
                 labels={
                     'log2FoldChange': 'log2(Fold Change)',
                     'neg_log10_pval': '-log10(p-value)',
                     'significance': 'Regulation Status'
                 },
                 color_discrete_map={
                     'Upregulated': 'red',
                     'Downregulated': 'blue',
                     'Not Significant': 'gray'
                 })

fig.update_traces(marker=dict(size=10, line=dict(width=0.5, color='DarkSlateGrey')))
fig.show()