In [1]:
%gui tk  # Enable GUI support in Jupyter Notebook

import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
import os

# ETL function
def run_etl(filepath):
    df = pd.read_csv(filepath)

    # Drop 'id' if exists
    if 'id' in df.columns:
        df.drop('id', axis=1, inplace=True)

    # Target column
    target = 'stroke'
    X = df.drop(target, axis=1)
    y = df[target]

    numeric_features = ['age', 'avg_glucose_level', 'bmi']
    categorical_features = ['gender', 'hypertension', 'heart_disease', 'ever_married',
                            'work_type', 'Residence_type', 'smoking_status']

    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    preprocessor = ColumnTransformer(transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor)
    ])

    # Fit and transform the data
    X_processed = pipeline.fit_transform(X)

    encoded_cat_names = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot'].get_feature_names_out(categorical_features)
    final_feature_names = numeric_features + list(encoded_cat_names)
    
    # Create final DataFrame
    processed_df = pd.DataFrame(X_processed, columns=final_feature_names)
    processed_df[target] = y.reset_index(drop=True)

    # Save result
    output_path = os.path.join(os.path.dirname(filepath), 'processed_stroke_dataset.csv')
    processed_df.to_csv(output_path, index=False)

    return output_path

# UI
def upload_and_process():
    file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
    if file_path:
        try:
            output_path = run_etl(file_path)
            messagebox.showinfo("Success ✅", f"Processed file saved to:\n{output_path}")
        except Exception as e:
            messagebox.showerror("Error ❌", str(e))

root = tk.Tk()
root.title("Stroke Dataset ETL Processor")
root.geometry("400x200")

label = tk.Label(root, text="Upload CSV File for ETL Processing", font=("Arial", 14))
label.pack(pady=20)

upload_btn = tk.Button(root, text="Upload & Process", command=upload_and_process, font=("Arial", 12), bg="lightblue")
upload_btn.pack()

root.mainloop()


ERROR:root:Invalid GUI request 'tk # Enable GUI support in Jupyter Notebook', valid ones are:dict_keys(['inline', 'nbagg', 'notebook', 'ipympl', 'widget', None, 'qt4', 'qt', 'qt5', 'wx', 'tk', 'gtk', 'gtk3', 'osx', 'asyncio'])
