In [23]:
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

def load_csv():
    file_path = filedialog.askopenfilename(filetypes=[('CSV Files', '*.csv')])
    if file_path:
        df = pd.read_csv(file_path)
        columns = df.columns
        clean_data(df)
        predict(df, columns)  

def clean_data(df):
    for column in df.columns:
        if df[column].dtype == 'object':  # Si la columna es de tipo 'object'
            encoder = LabelEncoder()  # Crea un objeto LabelEncoder
            df[column] = df[column].fillna('unknown')  # Rellena los valores faltantes con 'unknown'
            df[column] = encoder.fit_transform(df[column])  # Codifica los valores categóricos utilizando LabelEncoder
        else:
            imputer = SimpleImputer(strategy='mean') 
            df[column] = imputer.fit_transform(df[column].values.reshape(-1, 1))  

def predict(df, columns):
    root = tk.Toplevel()
    root.title("Prediction")
    root.geometry("500x500") 

    model_label = tk.Label(root, text="Escoge un modelo:")
    model_label.pack()

    model_var = tk.StringVar()
    model_dropdown = tk.OptionMenu(root, model_var, "Linear Regression", "Decision Tree")
    model_dropdown.pack()

    training_label = tk.Label(root, text="Escoge las columnas de entrenamiento:")
    training_label.pack()

    training_listbox = tk.Listbox(root, selectmode=tk.MULTIPLE, exportselection=False)
    training_listbox.pack()

    for column in columns:
        training_listbox.insert(tk.END, column)

    target_label = tk.Label(root, text="Escoge la columna objetivo:")
    target_label.pack()

    target_listbox = tk.Listbox(root, selectmode=tk.SINGLE, exportselection=False)
    target_listbox.pack()

    for column in columns:
        target_listbox.insert(tk.END, column)

    predict_button = tk.Button(root, text="Calcular", command=lambda: perform_prediction(df, training_listbox,
    target_listbox, model_var.get()))
    predict_button.pack()

    root.mainloop()

def perform_prediction(df, training_listbox, target_listbox, model):
    selected_training_columns = [training_listbox.get(i) for i in training_listbox.curselection()]
    target_column = target_listbox.get(target_listbox.curselection())

    X = df[selected_training_columns]  
    y = df[target_column]

    if model == "Linear Regression":
        clf = LinearRegression()  # Crea un objeto de regresión lineal
    elif model == "Decision Tree":
        clf = DecisionTreeRegressor()  # Crea un objeto de regresor de árbol de decisión

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)  # Divide los datos en conjuntos de entrenamiento y prueba
    clf.fit(X_train, y_train)  # Entrena el modelo
    predictions = clf.predict(X_test)  # Realiza predicciones en el conjunto de prueba
    score = clf.score(X_test, y_test)  # Calcula el coeficiente de determinación del modelo

    result_window = tk.Toplevel()
    result_window.title("Resultado de la prediccion")
    result_window.geometry("500x500")

    scrollbar = ttk.Scrollbar(result_window)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

    result_text = tk.Text(result_window, wrap=tk.NONE, yscrollcommand=scrollbar.set)
    result_text.pack(fill=tk.BOTH, expand=True)

    scrollbar.config(command=result_text.yview)

    result_text.insert(tk.END, "Predicciones: " + str(predictions) + "\n")
    result_text.insert(tk.END, "Score: " + str(score) + "\n")

    result_window.mainloop()

root = tk.Tk()
root.title("Proyecto final algoritmica")
root.geometry("500x500") 

load_button = tk.Button(root, text="CARGAR CSV", command=load_csv)
load_button.pack()

root.mainloop()

