In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("Finanzas.csv")
df

"""## Verificación de registros vacíos"""

df.isnull().values.any()
sns.heatmap(df.isnull())

df.isnull().sum(axis=0)

"""## Eliminación de registros vacíos"""

df = df.dropna()
df

"""## Variable objetivo A: equilibrio financiero"""

df["Equilibrio"] = (df["IngresoActual"] + df["GananciaReinversion"]) >= df["Gastos"]
df["Equilibrio"] = df["Equilibrio"].astype(int)
df

"""## Variable objetivo B: ingreso necesario"""

df["IngresoNecesario"] = df["Gastos"] - df["GananciaReinversion"]

"""## Clasificación del gasto (petición del usuario)"""

def clasificar_gasto(x):
    if x < 3000:
        return "Gasto bajo"
    elif x < 9000:
        return "Gasto normal"
    elif x < 15000:
        return "Gasto un poco alto"
    else:
        return "Gasto muy alto"

df["NivelGasto"] = df["Gastos"].apply(clasificar_gasto)
df

"""# Árbol de Decisión A (Equilibrio)"""

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

X = df[["Gastos","Reinversion","GananciaReinversion","IngresoActual"]]
y = df["Equilibrio"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, stratify=y, random_state=0)

param_dist = {"max_depth" : range(2,7),
              "max_features" : range(1,5),
              "min_samples_leaf" : [5,10,20,40],
              "criterion":["gini","entropy"]}

from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0),
                           param_grid=param_dist, cv=5)
grid_search.fit(X_train, y_train)

grid_search.best_params_
grid_search.best_score_

"""## Modelo Equilibrio"""

dtree = DecisionTreeClassifier(criterion="entropy", max_depth=5, max_features=4,
                               min_samples_leaf=5, random_state=0)

dtree.fit(X_train, y_train)

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

predictions = dtree.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

print("Exactitud:", accuracy)
print("F1:", f1)
print(confusion_matrix(y_test,predictions))

"""## Visualización del árbol"""

from sklearn.tree import export_graphviz
import pydot
from six import StringIO

features = list(X)
target = ['No alcanza','Sí alcanza']

dot = StringIO()
export_graphviz(dtree, out_file=dot,feature_names=features,filled=True,rounded=True,class_names=target)

graph = pydot.graph_from_dot_data(dot.getvalue())
graph[0].write_png("Arbol_Finanzas.png")

"""# Árbol B: ingreso necesario (regresión)"""

from sklearn.tree import DecisionTreeRegressor

X2 = df[["Gastos","GananciaReinversion"]]
y2 = df["IngresoNecesario"]

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.30, random_state=0)

reg = DecisionTreeRegressor(max_depth=5, min_samples_leaf=10)
reg.fit(X2_train, y2_train)

pred2 = reg.predict(X2_test)

"""## Predicción para un registro ejemplo"""

test = pd.DataFrame({
    'Gastos':[8000],
    'Reinversion':[500],
    'GananciaReinversion':[2000],
    'IngresoActual':[6000]
})

pred_equilibrio = dtree.predict(test)
pred_ingreso_min = reg.predict(test[["Gastos","GananciaReinversion"]])[0]

print("¿Alcanza?:", pred_equilibrio)
print("Ingreso mínimo necesario:", pred_ingreso_min)
