In [None]:
import tkinter as tk
from tkinter import ttk, filedialog, scrolledtext
import pandas as pd
import ast
from itertools import combinations


class DataProcessorGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("Data Processor")
        self.root.geometry("800x700")

        self.style = ttk.Style()
        self.style.configure('Custom.TButton', font=('Arial', 12))

        # Create a custom style for the frames with pink background
        self.style.configure('Custom.TFrame', background='#FFC0CB')  # Pink color

        self.create_widgets()

    def create_widgets(self):
        # File Selection Frame
        self.file_frame = ttk.Frame(self.root, style='Custom.TFrame')
        self.file_frame.pack(pady=20)

        self.file_path_label = ttk.Label(self.file_frame, text="Select Input File:", font=('Arial', 14))
        self.file_path_label.grid(row=0, column=0, padx=10, pady=5)

        self.file_path_entry = ttk.Entry(self.file_frame, width=50, font=('Arial', 12))
        self.file_path_entry.grid(row=0, column=1, padx=10, pady=5)

        self.browse_button = ttk.Button(self.file_frame, text="Browse", command=self.browse_file,
                                        style='Custom.TButton')
        self.browse_button.grid(row=0, column=2, padx=10, pady=5)

        # Data Configuration Frame
        self.config_frame = ttk.Frame(self.root, style='Custom.TFrame')
        self.config_frame.pack(pady=20)

        self.percent_label = ttk.Label(self.config_frame, text="Select Percentage of Data to Read:",
                                       font=('Arial', 14))
        self.percent_label.grid(row=0, column=0, padx=10, pady=5)

        self.percent_entry = ttk.Entry(self.config_frame, font=('Arial', 12))
        self.percent_entry.grid(row=0, column=1, padx=10, pady=5)

        self.min_support_label = ttk.Label(self.config_frame, text="Minimum Support:", font=('Arial', 14))
        self.min_support_label.grid(row=1, column=0, padx=10, pady=5)

        self.min_support_entry = ttk.Entry(self.config_frame, font=('Arial', 12))
        self.min_support_entry.grid(row=1, column=1, padx=10, pady=5)

        self.min_confidence_label = ttk.Label(self.config_frame, text="Minimum Confidence:", font=('Arial', 14))
        self.min_confidence_label.grid(row=2, column=0, padx=10, pady=5)

        self.min_confidence_entry = ttk.Entry(self.config_frame, font=('Arial', 12))
        self.min_confidence_entry.grid(row=2, column=1, padx=10, pady=5)

        # Process Button
        self.process_button = ttk.Button(self.root, text="Process Data", command=self.process_data,
                                         style='Custom.TButton')
        self.process_button.pack(pady=10)

        # Output Text
        self.output_text = scrolledtext.ScrolledText(self.root, height=15, width=70, font=('Arial', 12))
        self.output_text.pack(pady=10)

    def browse_file(self):
        file_path = filedialog.askopenfilename(
            filetypes=[("CSV Files", "*.csv"), ("Text Files", "*.txt"), ("Excel Files", "*.xlsx")])
        self.file_path_entry.delete(0, tk.END)
        self.file_path_entry.insert(0, file_path)

    def process_data(self):
        self.output_text.delete('1.0', tk.END)
        file_path = self.file_path_entry.get()
        try:
            percentage = float(self.percent_entry.get())
            min_support = float(self.min_support_entry.get())
            min_confidence = float(self.min_confidence_entry.get())
            Groubed_data = load_data(file_path, percentage)
            last_freq = apriori(Groubed_data, min_support)
            last_freq = last_freq.values.tolist()
            total_conff = generate_strong_association_rules(last_freq, Groubed_data, min_confidence)

            # Output frequent itemsets and support counts
            self.output_text.insert(tk.END, "Frequent Itemsets and Support Counts:\n")
            for itemset in last_freq:
                self.output_text.insert(tk.END, f"{itemset[0]}: {itemset[1]}\n")

            # Output all combinations
            self.output_text.insert(tk.END, "\nAll Combinations:\n")
            all_combinations = generate_all_combinations(last_freq)
            for combination in all_combinations:
                self.output_text.insert(tk.END, f"{combination}\n")

            # Output association rules
            self.output_text.insert(tk.END, "\nAssociation Rules:\n")
            for rule in total_conff:
                self.output_text.insert(tk.END, f"{rule}\n")

        except Exception as e:
            self.output_text.insert(tk.END, f"An error occurred: {e}")


static_list = []


def output(param):
    static_list.append(param)


def load_data(file_path, percentage):
    percent = percentage / 100
    data_input = pd.read_csv(file_path, nrows=int(percent * pd.read_csv(file_path).shape[0]))
    input_df = pd.DataFrame(data_input)
    Groubed_data = input_df.groupby('TransactionNo')['Items'].apply(set).reset_index(name='ItemsSet')
    return Groubed_data


def calculate_support_count_item(trans_items):
    support_count_item = {}
    for items_set in trans_items:
        for item in items_set:
            if item in support_count_item:
                support_count_item[item] += 1
            else:
                support_count_item[item] = 1
    data = pd.DataFrame(list(support_count_item.items()), columns=['Item', 'Count'])
    data = data.sort_values(by='Count', ascending=False)
    return data


def count_itemset_frequent(transactions, itemsets):
    count_itemset = {}
    for item_set in itemsets:
        for transaction in transactions:
            if all(item in transaction for item in item_set):
                count_itemset[item_set] = count_itemset.get(item_set, 0) + 1
    data = pd.DataFrame(count_itemset.items(), columns=['Item', 'Count'])
    return data


def filter_by_support_count(data, supp):
    filtered_rows = []
    for index, row in data.iterrows():
        if row['Count'] >= supp:
            filtered_rows.append(row)
    df = pd.DataFrame(filtered_rows)
    return df


def apriori(Groubed_data, supp_count):
    a = calculate_support_count_item(Groubed_data["ItemsSet"])
    a = filter_by_support_count(a, supp_count)
    output(a)
    last_freq = []
    i = 2
    while True:
        if len(a) > 1:
            item = a.iloc[0, 0]
            if isinstance(item, tuple):
                i += 1
                unique_items = set()
                for index, row in a.iterrows():
                    items = tuple(row['Item'])
                    unique_items.update(items)
                c = list(combinations(unique_items, i))
                counted_itemsets = count_itemset_frequent(Groubed_data['ItemsSet'], c)
                a = filter_by_support_count(counted_itemsets, supp_count)
                output(a)
                if not a.empty:
                    last_freq = a
            else:
                com_list = list(combinations(a['Item'], 2))
                counted_itemsets = count_itemset_frequent(Groubed_data['ItemsSet'], com_list)
                a = filter_by_support_count(counted_itemsets, supp_count)
                output(a)
                last_freq = a
        elif len(a) == 1:
            return last_freq
        else:
            print("No more frequent item sets found.")
            return last_freq


def get_all_combinations(row):
    items, count = row
    all_combinations = []
    for length in range(1, len(items)):
        for combo in combinations(items, length):
            other_items = tuple(item for item in items if item not in combo)
            other_items_str = ", ".join(other_items)
            formatted_str = f"{combo}-({other_items_str})"
            all_combinations.append(formatted_str)
    return all_combinations


def generate_strong_association_rules(last_freq, Groubed_data, min_confidence):
    total_conff = []
    for row in last_freq:
        items, count = row
        rules = get_all_combinations(row)
        for item in rules:
            parts = item.split('-')
            first_element = ast.literal_eval(parts[0])
            other_items_str = parts[1][1:-1]
            second_element = tuple(other_items_str.split(', '))
            dem = sum(1 for transaction in Groubed_data["ItemsSet"] if all(item in transaction for item in first_element))
            Numerator = count
            conf_cal = Numerator / dem
            if conf_cal >= min_confidence:
                total_conff.append((item, conf_cal))
    return total_conff


def generate_all_combinations(frequent_itemsets):
    all_combinations = []
    for itemset in frequent_itemsets:
        items = itemset[0]
        for length in range(1, len(items)):
            item_combinations = list(combinations(items, length))
            all_combinations.extend(item_combinations)
    return all_combinations


root = tk.Tk()
app = DataProcessorGUI(root)
root.mainloop()
