In [5]:
from pathlib import Path
import csv

def txt_to_csv(input_dir: Path, output_file: Path):
    """Reads all FASTA‐style .txt files in input_dir and writes a CSV."""
    input_dir = Path(input_dir)
    txt_files = sorted(input_dir.glob("*.txt"))
    if not txt_files:
        print(f"No .txt files found in {input_dir}")
        return

    output_file = Path(output_file)
    output_file.parent.mkdir(parents=True, exist_ok=True)
    with output_file.open("w", newline="") as out_f:
        writer = csv.writer(out_f)
        writer.writerow(["clone_name", "dna_sequence"])

        for txt in txt_files:
            clone_name = None
            seq_fragments = []
            for line in txt.read_text().splitlines():
                line = line.strip()
                if not line:
                    continue
                if line.startswith(">"):
                    if clone_name is not None:
                        writer.writerow([clone_name, "".join(seq_fragments)])
                    clone_name = line[1:].strip()
                    seq_fragments = []
                else:
                    seq_fragments.append(line)
            if clone_name is not None:
                writer.writerow([clone_name, "".join(seq_fragments)])

    print(f"✔ Wrote {output_file} from {len(txt_files)} files.")

In [None]:
txt_to_csv("data/raw", "output/project_sequences.csv")