In [None]:
from viper.utils import load_mutation_data_from_vcf, get_genome_length_from_fasta
from viper.hotspot import sliding_window_mutation_density, detect_hotspots
from viper.visualization import plot_mutation_density

# Load mutation data and genome length
mutation_df = load_mutation_data_from_vcf("data/generated_variants.vcf", min_qual=30)
genome_length = get_genome_length_from_fasta("data/example_sequences.fasta")

print(mutation_df.head())

   position  count
0        20      1
1        43      1


In [10]:
# Compute mutation density with sliding windows
window_size = 30
step_size = 5

density_df = sliding_window_mutation_density(
    mutations_df=mutation_df,
    genome_length=genome_length,
    window_size=window_size,
    step_size=step_size
)

print(density_df.head())

   start  end  window_midpoint  mutation_count   density
0      0   30               15               1  0.033333
1      5   35               20               1  0.033333
2     10   40               25               1  0.033333


In [4]:
hotspots_df = detect_hotspots(
    density_df,
    density_threshold=0
)

print(hotspots_df.head())

   start  end  window_midpoint  mutation_count   density
0      0   30               15               1  0.033333
1      5   35               20               1  0.033333
2     10   40               25               1  0.033333


In [None]:
plot_mutation_density(
    density_df=density_df,
    hotspots_df=hotspots_df,
    genome_length=genome_length
)