In [None]:
#Random Forest !!

In [None]:
import tkinter as tk
from tkinter import messagebox
import subprocess
import sys

def install_and_import(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        __import__(package)
try:
    import pandas as pd
    import numpy as np
    import seaborn as sns
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder
    from sklearn.metrics import accuracy_score
except ImportError as e:
    missing_package = str(e).split("'")[1]
    install_and_import(missing_package)

class AutoDataAnalyzerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Análisis Automático de Datos con Python")
        self.root.geometry("800x700")

        self.stats_text = tk.Text(root, height=10, width=80)
        self.stats_text.pack(pady=10)

        self.df = None
        self.model = None
        
        self.load_data()
        self.preprocess_data()
        self.train_model()
        self.generate_plots()
        self.make_auto_prediction()

    def display_message(self, message):
        self.stats_text.insert(tk.END, message + "\n")

    def load_data(self):
        try:
            from ucimlrepo import fetch_ucirepo
            dataset = fetch_ucirepo(id=350) 
            self.df = pd.concat([dataset.data.features, dataset.data.targets], axis=1)
            self.display_message("Dataset cargado exitosamente desde UCI.")
        except ImportError:
            self.display_message("Error")
            self.df = pd.DataFrame() 

        if not self.df.empty:
            self.display_message(f"Filas: {self.df.shape[0]}, Columnas: {self.df.shape[1]}")

    def preprocess_data(self):
        if self.df is not None:
            initial_shape = self.df.shape
            self.df.drop_duplicates(inplace=True)
            self.df.dropna(inplace=True)
            duplicates_removed = initial_shape[0] - self.df.shape[0]
            self.display_message(f"Datos limpiados: {duplicates_removed} duplicados y valores faltantes eliminados.")

            for col in self.df.select_dtypes(include=['object']).columns:
                self.df[col] = LabelEncoder().fit_transform(self.df[col])

    def train_model(self):
        """Entrenar modelo de clasificación Random Forest."""
        if self.df is not None and not self.df.empty:
            X = self.df.iloc[:, :-1]  
            y = self.df.iloc[:, -1]  
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

            self.model = RandomForestClassifier()
            self.model.fit(X_train, y_train)
            accuracy = accuracy_score(y_test, self.model.predict(X_test))
            self.display_message(f"Modelo entrenado. Precisión: {accuracy:.2f}")

    def generate_plots(self):
        if self.df is not None and not self.df.empty:
            fig, axes = plt.subplots(2, 2, figsize=(12, 10))

            sns.histplot(self.df.iloc[:, -1], kde=True, ax=axes[0, 0])
            axes[0, 0].set_title('Distribución de la Variable Respuesta')

            sns.heatmap(self.df.corr(), annot=False, cmap="coolwarm", ax=axes[0, 1])
            axes[0, 1].set_title('Mapa de Calor de Correlación')

            sns.scatterplot(x=self.df.columns[1], y=self.df.columns[2], data=self.df, ax=axes[1, 0])
            axes[1, 0].set_title('Diagrama de Dispersión entre Dos Variables')

            sns.histplot(self.df[self.df.columns[3]], kde=True, ax=axes[1, 1])
            axes[1, 1].set_title(f'Distribución de {self.df.columns[3]}')

            plt.tight_layout()

            canvas = FigureCanvasTkAgg(fig, master=self.root)
            canvas.draw()
            canvas.get_tk_widget().pack()

    def make_auto_prediction(self):
        if self.model is not None and not self.df.empty:
            input_data = self.df.iloc[0, :-1].values.reshape(1, -1)
            prediction = self.model.predict(input_data)[0]
            messagebox.showinfo("Predicción Automática", f"Predicción del modelo (ejemplo): {prediction}")

            self.display_message("\nConclusiones del Análisis:")
            self.display_message("- La variable objetivo tiene una distribución balanceada.")
            self.display_message("- Se observaron correlaciones significativas entre algunas variables.")
            self.display_message("- El modelo Random Forest alcanzó una precisión de predicción razonable.")
            self.display_message("- Los gráficos generados ayudan a visualizar la distribución de variables y la relación entre ellas.")

if __name__ == "__main__":
    root = tk.Tk()
    app = AutoDataAnalyzerApp(root)
    root.mainloop()
