In [1]:
import tkinter
print("Tkinter is installed and working!")

Tkinter is installed and working!


In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import tensorflow as tf

# Function to load data from CSV file
def load_data():
    filepath = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
    if filepath:
        try:
            # Load data into pandas DataFrame
            global impure_data
            impure_data = pd.read_csv(filepath)
            # Display first 5 rows in the DataFrame for quick preview
            text.delete(1.0, tk.END)  # Clear previous content
            text.insert(tk.END, impure_data.head().to_string())
            messagebox.showinfo("Data Loaded", "Data loaded successfully!")
        except Exception as e:
            messagebox.showerror("Error", f"Failed to load data: {e}")

# Function to run models and show results
def run_models():
    if impure_data is None:
        messagebox.showerror("Error", "Please load data first.")
        return
    
    # Data Preprocessing
    numerical_cols = impure_data.select_dtypes(include=['float64', 'int64']).columns
    scaler = StandardScaler()
    standardized_data = impure_data.copy()
    standardized_data[numerical_cols] = scaler.fit_transform(impure_data[numerical_cols])

    # Prepare features and target
    X = standardized_data[['temp_min', 'precipitation', 'wind']].values
    y = standardized_data['temp_max'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train models and store results
    models = {
        'Linear Regression': LinearRegression(),
        'Decision Tree': DecisionTreeRegressor(),
        'Random Forest': RandomForestRegressor(n_estimators=100),
        'Gradient Boosting': GradientBoostingRegressor(),
        'SVM': SVR(),
        'Neural Network': tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(1, activation='linear')
        ])
    }

    results = {}
    for model_name, model in models.items():
        if model_name == 'Neural Network':
            model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
            model.fit(X_train, y_train, epochs=50, verbose=0, batch_size=16)
            y_pred = model.predict(X_test).flatten()
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

        results[model_name] = {
            'MAE': mean_absolute_error(y_test, y_pred),
            'MSE': mean_squared_error(y_test, y_pred),
            'R2 Score': r2_score(y_test, y_pred),
        }

    # Show results in a message box
    results_df = pd.DataFrame(results).T
    result_str = results_df.to_string()
    text.delete(1.0, tk.END)
    text.insert(tk.END, result_str)

    # Display results in a message box
    messagebox.showinfo("Model Results", result_str)

# Function to visualize performance
# Function to visualize performance
def visualize_results():
    if impure_data is None:
        messagebox.showerror("Error", "Please load data first.")
        return

    try:
        # Ensure results are already generated and available
        if not hasattr(impure_data, 'results_df') or impure_data.results_df.empty:
            messagebox.showerror("Error", "No results to visualize.")
            return
        
        results_df = impure_data.results_df  # Assuming results_df is saved in the impure_data

        # Plot MAE, MSE, and R² Score comparison
        fig, axes = plt.subplots(1, 3, figsize=(18, 6))

        mae_values = results_df['MAE'].values
        mse_values = results_df['MSE'].values
        r2_values = results_df['R2 Score'].values

        axes[0].bar(results_df.index, mae_values, color='skyblue', edgecolor='black')
        axes[0].set_title('Model Comparison Based on MAE', fontsize=16)
        axes[0].set_xlabel('Models', fontsize=14)
        axes[0].set_ylabel('MAE', fontsize=14)
        axes[0].tick_params(axis='x', rotation=45)

        axes[1].bar(results_df.index, mse_values, color='lightgreen', edgecolor='black')
        axes[1].set_title('Model Comparison Based on MSE', fontsize=16)
        axes[1].set_xlabel('Models', fontsize=14)
        axes[1].set_ylabel('MSE', fontsize=14)
        axes[1].tick_params(axis='x', rotation=45)

        axes[2].bar(results_df.index, r2_values, color='salmon', edgecolor='black')
        axes[2].set_title('Model Comparison Based on R² Score', fontsize=16)
        axes[2].set_xlabel('Models', fontsize=14)
        axes[2].set_ylabel('R² Score', fontsize=14)
        axes[2].tick_params(axis='x', rotation=45)

        plt.tight_layout()
        plt.show()
    except Exception as e:
        messagebox.showerror("Error", f"Error in visualizing: {e}")

# Set up the main window
root = tk.Tk()
root.title("Data Analysis GUI")

# Load data button
load_button = tk.Button(root, text="Load Data", command=load_data)
load_button.pack(pady=10)

# Run models button
run_button = tk.Button(root, text="Run Models", command=run_models)
run_button.pack(pady=10)

# Visualize Results button
visualize_button = tk.Button(root, text="Visualize Results", command=visualize_results)
visualize_button.pack(pady=10)

# Text widget to display data and results
text = tk.Text(root, height=15, width=100)
text.pack(pady=10)

# Run the Tkinter event loop
impure_data = None
root.mainloop()
