In [31]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.metrics import accuracy_score

In [32]:
df_titanic = sns.load_dataset('titanic')
df_titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [33]:
columnas_no_numericas = df_titanic.select_dtypes(exclude=['number']).columns
df_titanic = df_titanic.drop(columns=columnas_no_numericas)
df_titanic

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.2500
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.9250
3,1,1,35.0,1,0,53.1000
4,0,3,35.0,0,0,8.0500
...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000
887,1,1,19.0,0,0,30.0000
888,0,3,,1,2,23.4500
889,1,1,26.0,0,0,30.0000


In [34]:
df_titanic = df_titanic.dropna(subset=['age'])
df_titanic

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.2500
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.9250
3,1,1,35.0,1,0,53.1000
4,0,3,35.0,0,0,8.0500
...,...,...,...,...,...,...
885,0,3,39.0,0,5,29.1250
886,0,2,27.0,0,0,13.0000
887,1,1,19.0,0,0,30.0000
889,1,1,26.0,0,0,30.0000


In [35]:
X =  df_titanic.drop(columns='survived')
X

Unnamed: 0,pclass,age,sibsp,parch,fare
0,3,22.0,1,0,7.2500
1,1,38.0,1,0,71.2833
2,3,26.0,0,0,7.9250
3,1,35.0,1,0,53.1000
4,3,35.0,0,0,8.0500
...,...,...,...,...,...
885,3,39.0,0,5,29.1250
886,2,27.0,0,0,13.0000
887,1,19.0,0,0,30.0000
889,1,26.0,0,0,30.0000


In [36]:
y = df_titanic['survived']
y

Unnamed: 0,survived
0,0
1,1
2,1
3,1
4,0
...,...
885,0
886,0
887,1
889,1


In [37]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [39]:
mlp_all_features = MLPClassifier(hidden_layer_sizes=(50,), random_state=42)
mlp_all_features.fit(X_train, y_train)



In [40]:
y_pred_all = mlp_all_features.predict(X_test)
acurracy_all = accuracy_score(y_test, y_pred_all)
acurracy_all

0.6713286713286714

In [41]:
importance  = permutation_importance(mlp_all_features,X_test, y_test, n_repeats=30, random_state=42)
importance

{'importances_mean': array([ 0.09324009, -0.03916084,  0.06247086,  0.01585082,  0.0004662 ]),
 'importances_std': array([0.03256736, 0.01828909, 0.01466124, 0.02622352, 0.01509943]),
 'importances': array([[ 0.1048951 ,  0.06293706,  0.12587413,  0.08391608,  0.06993007,
          0.05594406,  0.11888112,  0.11888112,  0.06293706,  0.04195804,
          0.08391608,  0.08391608,  0.18181818,  0.08391608,  0.12587413,
          0.07692308,  0.14685315,  0.04195804,  0.07692308,  0.11188811,
          0.08391608,  0.12587413,  0.11888112,  0.09090909,  0.06993007,
          0.06293706,  0.06993007,  0.0979021 ,  0.14685315,  0.06993007],
        [-0.07692308, -0.04195804, -0.04895105, -0.04895105, -0.00699301,
         -0.05594406, -0.02797203, -0.04195804, -0.04895105, -0.04195804,
         -0.04195804, -0.03496503, -0.02797203, -0.06293706, -0.01398601,
         -0.03496503, -0.04195804, -0.02797203, -0.03496503, -0.06293706,
         -0.02797203,  0.01398601, -0.05594406, -0.02797203,

In [42]:
low_importance_features = []
for i in range(len(importance.importances_mean)):
  if(importance.importances_mean[i] < 0.02):
    feature_name = X.columns[i]
    low_importance_features.append(feature_name)

low_importance_features

['age', 'parch', 'fare']

In [43]:
X_filtered = X.drop(columns=low_importance_features)
X_filtered

Unnamed: 0,pclass,sibsp
0,3,1
1,1,1
2,3,0
3,1,1
4,3,0
...,...,...
885,3,0
886,2,0
887,1,0
889,1,0


In [44]:
X_filtered_scaled = scaler.fit_transform(X_filtered)
X_filtered_scaled

array([[ 0.91123237,  0.52457013],
       [-1.47636364,  0.52457013],
       [ 0.91123237, -0.55170307],
       ...,
       [-1.47636364, -0.55170307],
       [-1.47636364, -0.55170307],
       [ 0.91123237, -0.55170307]])

In [45]:
X_train_filtered, X_test_filtered, y_train_filtered, y_test_filtered = train_test_split(X_filtered_scaled, y, test_size=0.2, random_state=42)

In [46]:
# Entrenar el modelo con las características "filtradas"
mlp_filtered_features = MLPClassifier(hidden_layer_sizes=(50,), random_state=42)
mlp_filtered_features.fit(X_train_filtered, y_train_filtered)

In [47]:
y_pred_filtered = mlp_filtered_features.predict(X_test_filtered)
acurracy_filtered = accuracy_score(y_test_filtered, y_pred_filtered)

In [48]:
print("La precisión del modelo con todas las características es: ", acurracy_all)
print("La precisión del modelo con las características filtradas es: ", acurracy_filtered)
# con todas = 80% 30
# filtro = 78% 15

La precisión del modelo con todas las características es:  0.6713286713286714
La precisión del modelo con las características filtradas es:  0.7202797202797203
