In [1]:
import re
import os
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
import PySimpleGUI as sg

In [2]:
def load_sgRNAs(csv_path):
    df = pd.read_csv(csv_path, delimiter=';')
    df.columns = df.columns.str.strip().str.lower()
    if "name" not in df.columns or "sequence" not in df.columns:
        raise ValueError(f"CSV must contain 'name' and 'sequence' columns. Found: {df.columns.tolist()}")
    df["sgRNA_only"] = df["sequence"].apply(lambda x: ''.join([c for c in str(x) if c.isupper()]))
    return df

In [3]:
def extract_ab1_sequence(file_path):
    record = SeqIO.read(file_path, "abi")
    return str(record.seq), record.letter_annotations.get("phred_quality", [])

In [4]:
def process_folder(folder, primers):
    summary = []
    for file in os.listdir(folder):
        if file.endswith(".ab1"):
            ab1_path = os.path.join(folder, file)
            fasta_path = os.path.join(folder, file.replace(".ab1", ".fasta"))

            ab1_seq, ab1_qual = extract_ab1_sequence(ab1_path)
            avg_quality = sum(ab1_qual) / len(ab1_qual) if ab1_qual else 0
            quality_ok = avg_quality >= 20

            if os.path.exists(fasta_path):
                fasta_seq = str(SeqIO.read(fasta_path, "fasta").seq).upper()
                matched_guides = []
                matched_guide_seqs = []
                for _, row in primers.iterrows():
                    guide_seq = row["sgRNA_only"].upper()
                    if guide_seq in fasta_seq:
                        matched_guides.append(row["name"])
                        matched_guide_seqs.append(guide_seq)
                if matched_guides:
                    matched_guides_str = ";".join(matched_guides)
                    matched_guide_seqs_str = ";".join(matched_guide_seqs)
                else:
                    matched_guides_str = "No Match"
                    matched_guide_seqs_str = ""
            else:
                matched_guides_str = "FASTA Not Found"
                matched_guide_seqs_str = ""

            summary.append({
                "Filename": file,
                "Avg Quality": round(avg_quality, 2),
                "Quality OK": quality_ok,
                "Matching sgRNA name(s)": matched_guides_str,
                "Matched guide sequence(s)": matched_guide_seqs_str
            })

    return pd.DataFrame(summary)

In [5]:
def save_summary(df, folder):
    output_path = os.path.join(folder, "summary.csv")
    df.to_csv(output_path, index=False)

In [6]:
def gui():
    sg.theme("LightBlue2")
    layout = [
        [sg.Text("sgRNA CSV:"), sg.InputText(), sg.FileBrowse(file_types=(("CSV Files", "*.csv"),))],
        [sg.Text("Folder with .ab1/.fasta files:"), sg.InputText(), sg.FolderBrowse()],
        [sg.Button("Run"), sg.Button("Exit")]
    ]
    window = sg.Window("Golden Gate Plasmid Checker", layout)

    while True:
        event, values = window.read()
        if event in (sg.WINDOW_CLOSED, "Exit"):
            break
        if event == "Run":
            primer_file = values[0]
            folder = values[1]
            try:
                primers = load_sgRNAs(primer_file)
                result_df = process_folder(folder, primers)
                save_summary(result_df, folder)
                sg.popup("Done!", f"Summary saved in {folder}")
            except Exception as e:
                sg.popup_error("Error", str(e))

    window.close()

In [7]:
if __name__ == "__main__":
    gui()