In [1]:
from Bio import SeqIO
import statistics
import time
import csv
import os
class DNAFile:
    """
    A class for analyzing DNA sequences from a FASTA file.
    It calculates per-sequence GC content and summary statistics.
    """
    def __init__(self, filepath, name=None):
        self.filepath = filepath
        self.name = name  # store user-provided name
        self.sequences = [str(record.seq).upper() for record in SeqIO.parse(filepath, "fasta")]

    def count_sequences(self):
        return len(self.sequences)

    def lengths(self):
        return [len(seq) for seq in self.sequences]

    def gc_content(self, seq):
        if not seq:
            return 0
        gc = seq.count("G") + seq.count("C")
        return round((gc / len(seq)) * 100, 2)
    def average_gc_content(self):
        if not self.sequences:
            return 0
        return round(statistics.mean(self.gc_content(seq) for seq in self.sequences), 2)

    def analyze(self, report_every=1000):
        print("🧬 Starting analysis...")
        start = time.time()
        lengths = []
        gc_contents = []
        lengths = self.lengths()
        total = len(lengths)
        for i, seq in enumerate(self.sequences):
            if (i + 1) % report_every == 0:
                print(f"Processed {i + 1}/{total} sequences...")
        total = len(lengths)
        longest = max(lengths) if lengths else 0
        shortest = min(lengths) if lengths else 0
        avg_len = statistics.mean(lengths) if lengths else 0
        median_len = statistics.median(lengths) if lengths else 0
        avg_gc = statistics.mean(gc_contents) if gc_contents else 0
        elapsed = time.time() - start
        elapsed = time.time() - start
        print("\n✅ Analysis complete!")
        print(f"Total sequences: {total}")
        print(f"Longest: {max(lengths)} bp")
        print(f"Shortest: {min(lengths)} bp")
        print(f"Average length: {statistics.mean(lengths):.2f} bp")
        print(f"Median length: {statistics.median(lengths)} bp")
        print(f"Average GC content: {self.average_gc_content()}%")
        print(f"Time elapsed: {elapsed:.2f} seconds")
        # Save results for later
        self.results = [
            ["Total Sequences", total],
            ["Longest", longest],
            ["Shortest", shortest],
            ["Average Length", avg_len],
            ["Median Length", median_len],
            ["Average GC Content", avg_gc],
            ["Time Elapsed (seconds)", elapsed]
        ]
        # 🔽 Call the CSV saving function here
        self.save_to_csv()
    _file_counter = 0  # class variable to count how many results we saved    
    def save_to_csv(self, output_file=None):
        # 🔹 If no file name given, build one automatically based on the FASTA name
        if output_file is None:
            # Increment the class counter
             DNAFile._file_counter += 1
            # Use user's name if provided, otherwise make a generic name
             if self.name:
                 base_name = self.name.replace(" ", "_")  # clean spaces
             else:
                 base_name = f"result_{DNAFile._file_counter}"

             output_file = f"{base_name}.csv"
        # Save in the same folder as the FASTA file
        folder = os.path.dirname(self.filepath)
        full_path = os.path.join(folder, output_file)  
            
        with open(output_file, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(["Sequence_ID", "Length", "GC_Content(%)"])
            for record in SeqIO.parse(self.filepath, "fasta"):
                seq = str(record.seq).upper()
                writer.writerow([record.id, len(seq), self.gc_content(seq)])    
        print(f"\n💾 Results saved to '{os.path.abspath(full_path)}'")

In [2]:
dna1 = DNAFile(r"C:\Users\QBS-OM\OneDrive\Desktop\AI in Biomedical Sciences course\gene.fasta",name="dna1")
dna1.analyze(report_every=1000)

🧬 Starting analysis...

✅ Analysis complete!
Total sequences: 1
Longest: 1608 bp
Shortest: 1608 bp
Average length: 1608.00 bp
Median length: 1608 bp
Average GC content: 40.11%
Time elapsed: 0.00 seconds

💾 Results saved to 'C:\Users\QBS-OM\OneDrive\Desktop\AI in Biomedical Sciences course\dna1.csv'


In [3]:
dna2=DNAFile(r"C:\Users\QBS-OM\OneDrive\Desktop\Geneious\Sample Importable Files\sampleDNA.fasta")
dna2.analyze(report_every=1000)

🧬 Starting analysis...

✅ Analysis complete!
Total sequences: 10
Longest: 705 bp
Shortest: 684 bp
Average length: 689.40 bp
Median length: 684.0 bp
Average GC content: 41.91%
Time elapsed: 0.00 seconds

💾 Results saved to 'C:\Users\QBS-OM\OneDrive\Desktop\Geneious\Sample Importable Files\result_2.csv'


In [4]:
dna3=DNAFile(r"C:\Users\QBS-OM\OneDrive\Desktop\Geneious\Sample Importable Files\sampleAlignment.fasta")
dna3.analyze(report_every=1000)

🧬 Starting analysis...

✅ Analysis complete!
Total sequences: 5
Longest: 499 bp
Shortest: 499 bp
Average length: 499.00 bp
Median length: 499 bp
Average GC content: 51.06%
Time elapsed: 0.00 seconds

💾 Results saved to 'C:\Users\QBS-OM\OneDrive\Desktop\Geneious\Sample Importable Files\result_3.csv'


In [5]:
dna_scaly_foot_gastropod=DNAFile(r"C:\Users\QBS-OM\OneDrive\Desktop\scaly-foot gastropod (Chrysomallon squamiferum)\GCA_012295275.1_Csq_v1.1.fa_genomic.fna", name="scaly_foot_gastropod")
dna_scaly_foot_gastropod.analyze(report_every=1000)

🧬 Starting analysis...

✅ Analysis complete!
Total sequences: 22
Longest: 49218974 bp
Shortest: 10809 bp
Average length: 18391401.59 bp
Median length: 18302362.5 bp
Average GC content: 32.6%
Time elapsed: 0.00 seconds

💾 Results saved to 'C:\Users\QBS-OM\OneDrive\Desktop\scaly-foot gastropod (Chrysomallon squamiferum)\scaly_foot_gastropod.csv'


In [7]:
example= DNAFile(r"C:\Users\QBS-OM\OneDrive\Desktop\my github projects\dna analyzer\gene.fasta", name="example")
example.analyze(report_every=1000)

🧬 Starting analysis...

✅ Analysis complete!
Total sequences: 1
Longest: 1608 bp
Shortest: 1608 bp
Average length: 1608.00 bp
Median length: 1608 bp
Average GC content: 40.11%
Time elapsed: 0.00 seconds

💾 Results saved to 'C:\Users\QBS-OM\OneDrive\Desktop\my github projects\dna analyzer\example.csv'
