In [1]:
import tkinter as tk
from tkinter import messagebox, filedialog
import pandas as pd
import numpy as np

def hamming_distance(s1, s2):
    s1, s2 = str(s1), str(s2)
    max_len = max(len(s1), len(s2))
    dist = 0
    for i in range(max_len):
        c1 = s1[i] if i < len(s1) else None
        c2 = s2[i] if i < len(s2) else None
        if c1 != c2:
            dist += 1
    return dist


def hamming_similarity(s1, s2):
    s1, s2 = str(s1), str(s2)
    max_len = max(len(s1), len(s2))
    if max_len == 0:
        return 100.0
    dist = hamming_distance(s1, s2)
    similarity = (1 - dist / max_len) * 100
    return round(similarity, 2)


def jaro_winkler_similarity(s1, s2):
    s1, s2 = s1.upper(), s2.upper()
    if s1 == s2:
        return 100.0

    len1, len2 = len(s1), len(s2)
    if len1 == 0 or len2 == 0:
        return 0.0

    match_distance = int(max(len1, len2) / 2) - 1
    s1_matches = [False] * len1
    s2_matches = [False] * len2

    matches = 0
    transpositions = 0

    for i in range(len1):
        start = max(0, i - match_distance)
        end = min(i + match_distance + 1, len2)
        for j in range(start, end):
            if s2_matches[j]:
                continue
            if s1[i] == s2[j]:
                s1_matches[i] = True
                s2_matches[j] = True
                matches += 1
                break

    if matches == 0:
        return 0.0

    s2_index = 0
    for i in range(len1):
        if s1_matches[i]:
            while not s2_matches[s2_index]:
                s2_index += 1
            if s1[i] != s2[s2_index]:
                transpositions += 1
            s2_index += 1
    transpositions /= 2

    jaro = (matches / len1 + matches / len2 + (matches - transpositions) / matches) / 3

    prefix = 0
    for i in range(min(4, len1, len2)):
        if s1[i] == s2[i]:
            prefix += 1
        else:
            break

    jaro_winkler = jaro + (prefix * 0.1 * (1 - jaro))
    return round(jaro_winkler * 100, 2)


def smith_waterman(seq1, seq2, match=2, mismatch=-1, gap=-1):
    m, n = len(seq1), len(seq2)
    score = [[0]*(n+1) for _ in range(m+1)]
    max_score = 0

    for i in range(1, m+1):
        for j in range(1, n+1):
            diag = score[i-1][j-1] + (match if seq1[i-1] == seq2[j-1] else mismatch)
            delete = score[i-1][j] + gap
            insert = score[i][j-1] + gap
            score[i][j] = max(0, diag, delete, insert)
            max_score = max(max_score, score[i][j])

    max_possible = min(len(seq1), len(seq2)) * match
    similarity = (max_score / max_possible) * 100 if max_possible > 0 else 0
    return round(similarity, 2)

def run_analysis(algo):
    s1 = seq1_entry.get().strip().upper()
    s2 = seq2_entry.get().strip().upper()

    if not s1 or not s2:
        messagebox.showwarning("Input Error", "Please enter both DNA sequences.")
        return

    if algo == "Hamming":
        sim = hamming_similarity(s1, s2)
        result_text.set(f"Hamming Distance Similarity: {sim:.2f}%")

    elif algo == "Jaro-Winkler":
        sim = jaro_winkler_similarity(s1, s2)
        result_text.set(f"Jaro-Winkler Similarity: {sim:.2f}%")

    elif algo == "Smith-Waterman":
        sim = smith_waterman(s1, s2)
        result_text.set(f"Smith-Waterman Similarity: {sim:.2f}%")

    elif algo == "All":
        hamming_sim = hamming_similarity(s1, s2)
        jaro_sim = jaro_winkler_similarity(s1, s2)
        smith_sim = smith_waterman(s1, s2)
        avg = round((hamming_sim + jaro_sim + smith_sim) / 3, 2)
        result_text.set(
            f" Hamming Similarity: {hamming_sim:.2f}%\n"
            f" Jaro-Winkler Similarity: {jaro_sim:.2f}%\n"
            f" Smith-Waterman Similarity: {smith_sim:.2f}%\n"
            f"-------------------------------------\n"
            f" Average Similarity: {avg:.2f}%"
        )


def load_from_csv():
    global df
    filepath = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
    if not filepath:
        return
    try:
        df = pd.read_csv(filepath)
        messagebox.showinfo("Success", f"Loaded {len(df)} sequences from file!")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to load CSV file:\n{e}")


def fill_from_csv():
    if df is None:
        messagebox.showwarning("No Data", "Please load a CSV file first.")
        return
    try:
        idx1 = int(csv1_entry.get())
        idx2 = int(csv2_entry.get())
        seq1_entry.delete(0, tk.END)
        seq2_entry.delete(0, tk.END)
        seq1_entry.insert(0, df.loc[idx1 - 1, "Sequence"])
        seq2_entry.insert(0, df.loc[idx2 - 1, "Sequence"])
    except Exception:
        messagebox.showerror("Error", "Invalid indices entered.")

root = tk.Tk()
root.title("🧬GeneCompare3X-Triple method DNA similarity analyzer++")
root.geometry("720x540")
root.config(bg="#f9fafb")

df = None
result_text = tk.StringVar()

# Title
tk.Label(root, text="Triple method DNA similarity analyzer++", font=("Arial", 18, "bold"),
         bg="#f9fafb", fg="#2a4365").pack(pady=10)

# Input fields
frame = tk.Frame(root, bg="#f9fafb")
frame.pack(pady=5)

tk.Label(frame, text="DNA Sequence 1:", font=("Arial", 12),
         bg="#f9fafb").grid(row=0, column=0, sticky="e", padx=5)
seq1_entry = tk.Entry(frame, width=70)
seq1_entry.grid(row=0, column=1, pady=5)

tk.Label(frame, text="DNA Sequence 2:", font=("Arial", 12),
         bg="#f9fafb").grid(row=1, column=0, sticky="e", padx=5)
seq2_entry = tk.Entry(frame, width=70)
seq2_entry.grid(row=1, column=1, pady=5)

# CSV input
tk.Label(frame, text="(Optional) Load from CSV:", font=("Arial", 11, "italic"),
         bg="#f9fafb", fg="#4a5568").grid(row=2, column=0, columnspan=2, pady=10)
tk.Button(frame, text=" Load CSV", command=load_from_csv,
          bg="#edf2f7", relief="raised").grid(row=3, column=0, pady=5)

csv1_entry = tk.Entry(frame, width=5)
csv2_entry = tk.Entry(frame, width=5)
csv1_entry.grid(row=3, column=1, sticky="w", padx=10)
csv2_entry.grid(row=3, column=1, sticky="w", padx=80)
tk.Button(frame, text="Use CSV Indexes", command=fill_from_csv,
          bg="#e2e8f0").grid(row=3, column=1, sticky="w", padx=150)

btn_frame = tk.Frame(root, bg="#f9fafb")
btn_frame.pack(pady=15)

tk.Button(btn_frame, text=" Hamming", width=15, command=lambda: run_analysis("Hamming"),
          bg="#bee3f8").grid(row=0, column=0, padx=10)
tk.Button(btn_frame, text=" Jaro-Winkler", width=15, command=lambda: run_analysis("Jaro-Winkler"),
          bg="#fbd38d").grid(row=0, column=1, padx=10)
tk.Button(btn_frame, text=" Smith-Waterman", width=15, command=lambda: run_analysis("Smith-Waterman"),
          bg="#c6f6d5").grid(row=0, column=2, padx=10)
tk.Button(btn_frame, text=" Compare All", width=15, command=lambda: run_analysis("All"),
          bg="#fbb6ce").grid(row=1, column=1, pady=10)
                             
tk.Label(root, text="Results:", font=("Arial", 14, "bold"),
         bg="#f9fafb", fg="#2a4365").pack(pady=10)
tk.Label(root, textvariable=result_text, font=("Consolas", 12), bg="#edf2f7",
         fg="#2d3748", width=80, height=8, relief="groove",
         wraplength=600, justify="left").pack(pady=5)

root.mainloop()