<a href="https://colab.research.google.com/github/divyani95/Transaction_details_Project/blob/main/Transaction_Script(VS).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import os

def read_and_clean_file():
    file_path = filedialog.askopenfilename(
        title="Select Transaction File",
        filetypes=(("Excel files", "*.xls;*.xlsx"), ("CSV files", "*.csv"), ("All files", "*.*"))
    )

    if not file_path:
        messagebox.showinfo("No file selected", "Please select a file to process.")
        return

    try:
        # Read the file
        if file_path.lower().endswith('.csv'):
            df = pd.read_csv(file_path)
        else:
            df = pd.read_excel(file_path, engine='openpyxl')

        # Define helper function for column matching
        def get_column_name(possible_names, df_columns):
            for name in df_columns:
                if any(possible_name.lower() == name.lower() for possible_name in possible_names):
                    return name
            return None

        # Identify required columns
        transaction_date_col = get_column_name(["Transaction Date"], df.columns)
        debit_amount_col = get_column_name(["Debit Amount"], df.columns)
        description_col = get_column_name(["Transaction Description"], df.columns)

        if not debit_amount_col:
            messagebox.showerror("Error", "Debit Amount column not found.")
            return
        # Split "Transaction Date" into "Transaction-Date" & "Transaction-Time"
        if transaction_date_col:
            df[["Transaction-Date", "Transaction-Time"]] = df[transaction_date_col].astype(str).str.split(" ", expand=True)
            df.drop(columns=[transaction_date_col], inplace=True)
             # Convert "Transaction-Date" to proper date format
        df["Transaction-Date"] = pd.to_datetime(df["Transaction-Date"], errors="coerce").dt.date

        # Convert "Transaction-Time" to proper time format if available
        if "Transaction-Time" in df.columns:
            df["Transaction-Time"] = pd.to_datetime(df["Transaction-Time"], errors="coerce").dt.time
        # Clean "Debit Amount" column
        df[debit_amount_col] = df[debit_amount_col].astype(str).str.replace(',', '').str.strip()
        df[debit_amount_col] = pd.to_numeric(df[debit_amount_col], errors='coerce')

        # Remove rows where "Debit Amount" is 0.00
        df = df[df[debit_amount_col] != 0.00]

        # Filter transactions for "NEFT" or "RTGS"
        if description_col:
            df = df[df[description_col].str.startswith(("NEFT", "RTGS"), na=False)]

        # Split "Transaction Date" into Date & Time
        if transaction_date_col:
            df[["Transaction-Date", "Transaction-Time"]] = df[transaction_date_col].astype(str).str.split(" ", expand=True)
            df.drop(columns=[transaction_date_col], inplace=True)

            df["Transaction-Date"] = pd.to_datetime(df["Transaction-Date"], errors="coerce").dt.date
            if "Transaction-Time" in df.columns:
                df["Transaction-Time"] = pd.to_datetime(df["Transaction-Time"], errors="coerce").dt.time

        # Save cleaned dataset
        output_path = os.path.join(os.path.dirname(file_path), "Transactions_Details_Cleaned.xlsx")
        df.to_excel(output_path, index=False, engine='openpyxl')

        messagebox.showinfo("Success", f"Cleaned dataset saved at:\n{output_path}")

    except Exception as e:
        messagebox.showerror("Error", f"An error occurred:\n{e}")

def create_gui():
    root = tk.Tk()
    root.title("Transaction Data Cleaner")
    root.minsize(300, 150)

    read_button = tk.Button(root, text="Read File", command=read_and_clean_file, width=20, height=2)
    read_button.pack(pady=40)

    root.mainloop()

if __name__ == "__main__":
    create_gui()