In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder 
from sklearn.metrics import confusion_matrix,accuracy_score,recall_score,precision_score,f1_score
from sklearn import tree

# implementación del Data Frame

df  = pd.read_csv("archivos_csv/titanic.csv")

age_null_values = df["Age"].isnull()

df.info()

age_null_values.replace({True:1,False:0})

null_values = age_null_values.sum()

print(f"\nValores nulos en la columna 'Age': {null_values}")
print("---------------------------------------------------")

In [None]:
# imputación de datos faltantes

df["Age"].fillna(df["Age"].mean(),inplace=True)

mode = df["Embarke"].mode()
df["Embarked"].fillna(mode[0],inplace=True)

# codificando variables categóricas nominales

df["Sex_encoded"] = LabelEncoder().fit_transform(df["Sex"])

# a su vez generamos una versión textual de la variable binaria objetivo

map_survived = {
    0:"no",
    1:"yes"
}
df["Survived_no_encoded"] = df["Survived"].apply(lambda x : map_survived.get(x))

df[["Survived","Survived_no_encoded"]]

In [None]:
# seccionamos los datos en grupos de entrenamiento y prueba

x_train, x_test, y_train, y_test = train_test_split(
                                                   df[["Sex_encoded","Pclass","Fare","Parch","SibSp"]],
                                                   df["Survived"],
                                                   test_size=0.1)

# declaración del algoritmo

tree_decision = tree.DecisionTreeClassifier(criterion="entropy",max_depth=3)

model = tree_decision.fit(x_train,y_train)

# predicción de los datos de prueba

class_predicts = tree_decision.predict(x_test)

class_real = y_test.values

# métricas de rendimientos para clasificación

matrix_confusion = confusion_matrix(class_real,class_predicts)
TP = matrix_confusion[0,0]
FP = matrix_confusion[0,1]
FN = matrix_confusion[1,0]
TN = matrix_confusion[1,1]

accuracy = accuracy_score(class_real,class_predicts)
color_accuracy = "green"
if accuracy < 0.6:
    color_accuracy = "red"
accuracy_str = str(accuracy)

recall = recall_score(class_real,class_predicts)
color_recall = "green"
if recall < 0.6:
    color_recall = "red"
recall_str = str(recall)

precision = precision_score(class_real,class_predicts)
color_precision = "green"
if precision < 0.6:
    color_precision = "red"
precision_str = str(precision)

F1_score = f1_score(class_real,class_predicts)
color_f1 = "green"
if F1_score < 0.6:
    color_f1 = "red"
F1_score_str = str(F1_score)

print(tree.export_text(model,feature_names=["Sex_encoded","Pclass","Fare","Parch","SibSp"]))

plt.figure(figsize=(35,22))
tree.plot_tree(model,feature_names=["Sex_encoded","Pclass","Fare","Parch","SibSp"])
plt.show()

##### el dashboard generado no es una representación para ideas de negocio, sino una muestra gráfica de los procesos realizados y patrones tomados en cuenta para la creación del modelo y su rendimiento

In [7]:
# creación de dashboar
app = dash.Dash(__name__)

app.layout = html.Div(id="body",className="e1_body",children=[
html.H1("Titanic",id="title",className="e1_title"),
html.Div(className="e1_dashboards",children=[
    html.Div(id="graph_div_1",className="e1_graph_div",children=[
        html.Div(id="dropdown_div_1",className="e1_dropdown_div",children=[
            dcc.Dropdown(id="dropdown_1",className="e1_dropdown",
                        options = [
                            {"label":"Sexo","value":"Sex"},
                            {"label":"Clase social","value":"Pclass"},
                            {"label":"Embarcadero","value":"Embarked"},
                            {"label":"Padres e hijos/as","value":"Parch"},
                            {"label":"Hermanas/os y esposas/os","value":"SibSp"},
                        ],
                        value="Sex",
                        multi=False,
                        clearable=False)
        ]),
        dcc.Graph(id="piechart",className="e1_graph",figure={})
    ]),
    html.Div(id="graph_div_2",className="e1_graph_div",children=[
        html.Div(id="dropdown_div_2",className="e1_dropdown_div",children=[
            dcc.Dropdown(id="dropdown_2",className="e1_dropdown",
                        options = [
                            {"label":"Edad","value":"Age"},
                            {"label":"Boleto","value":"Fare"},
                        ],
                        value="Age",
                        multi=False,
                        clearable=False)
        ]),
        dcc.Graph(id="bar",className="e1_graph",figure={})
    ]),
]),
    
    html.Div(className="e1_div",children=[
        html.Div(id="rendimiento",className="e1_rendimiento",children=[
            html.P([html.B("Clases reales",style={"color":"blue"}),"   |   ",html.B("Predicciones",style={"color":"red"})],style={"text-align":"center","font-family":"sans-serif"}),
            html.P("----------------------------------------------------------------------------------------------------",style={"margin":"0"}),
            html.P(f"{class_real}",className="e1_clases_reales"),
            html.P(f"{class_predicts}",className="e1_predicciones")
        ]),
        html.Div(id="modelos_metricas",className="e1_metricas",children=[
                html.P("Matriz de confusión",style={"font-size":"0.9em","text-align":"center","font-family":"sans-serif","font-weigth":"bold"}),
                html.Div(className="e1_matriz",id="matriz_confusion",children=[
                html.Div([html.B(TP,style={"color":"green","font-family":"sans-serif"})],id="TP",className="e1_aciertos"), 
                html.Div([html.B(FP,style={"color":"red","font-family":"sans-serif"})],id="FP",className="e1_aciertos"),
                html.Div([html.B(FN,style={"color":"red","font-family":"sans-serif"})],id="FN",className="e1_aciertos"),
                html.Div([html.B(TN,style={"color":"green","font-family":"sans-serif"})],id="TN",className="e1_aciertos")
                ]),
                html.Div(className="e1_puntuaciones",children=[
                html.Ul(id="lista",children=[
                html.Li([f"Accuracy: ",html.B(accuracy_str[:4],style={"color":f"{color_accuracy}"})],id="accuracy",style={"font-family":"sans-serif","margin-right":"5px"}),
                html.Li([f"Recall: ",html.B(recall_str[:4],style={"color":f"{color_recall}"})],id="recall",style={"font-family":"sans-serif","margin-right":"5px"}),
                html.Li([f"Precision: ",html.B(precision_str[:4],style={"color":f"{color_precision}"})],id="precision",style={"font-family":"sans-serif","margin-right":"5px"}),
                html.Li([f"F1 Score: ",html.B(F1_score_str[:4],style={"color":f"{color_f1}"})],id="f1_score",style={"font-family":"sans-serif","margin-right":"5px"})
                ])
                
            ])
        ])
    ])
])

@app.callback(
    [Output(component_id="piechart",component_property="figure"),
    Output(component_id="bar",component_property="figure")],
    [Input(component_id="dropdown_1",component_property="value"),
    Input(component_id="dropdown_2",component_property="value")]
)

def update_graph(slct_var_cat,slct_var_num):
    
    df_percentage = df.groupby(slct_var_cat)["Survived"].mean()
    df_percentage = df_percentage.reset_index()
    df_percentage["Survived"] = df_percentage["Survived"] * 100
    
    piechart = px.pie(df_percentage,values='Survived',names=slct_var_cat,title='Porcentage de sobrevivir')
    
    
    df_mean = df.groupby("Survived_no_encoded")[slct_var_num].mean()
    df_mean = df_mean.reset_index()
    
    barplot = px.bar(df_mean,x="Survived_no_encoded",y=slct_var_num,title='Medias de Edad y Boleto',labels={"x":"sobrevivientes","y":slct_var_num})
    barplot.update_layout(xaxis_title="Sobrevivientes")
    
    return piechart,barplot

if __name__ == "__main__":
    app.run_server(debug=False)

[2024-07-02 10:29:42,908] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\genar\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 1473, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\genar\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 882, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\genar\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\genar\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 865, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^