# Clasificacion Perceptron multicapa utilizando Sklearn

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
url = 'https://onetwork.org/dataset/2_titanic.csv'
df = pd.read_csv(url)

In [3]:
df.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [4]:
print(df.shape)

(891, 12)


In [5]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [6]:
df['Title'] = df['Name'].str.extract('([A-Za-z]+)\.', expand = True)
df['Title'].unique()

array(['Mr', 'Mrs', 'Miss', 'Master', 'Don', 'Rev', 'Dr', 'Mme', 'Ms',
       'Major', 'Lady', 'Sir', 'Mlle', 'Col', 'Capt', 'Countess',
       'Jonkheer'], dtype=object)

In [7]:
title_reduction = {'Mr': 'Mr', 'Mrs': 'Mrs', 'Miss': 'Miss', 'Master': 'Master', 'Don': 'Mr', 'Rev': 'Rev', 'Dr': 'Dr', 'Mme': 'Miss', 'Ms': 'Miss', 'Major': 'Mr', 'Lady': 'Mrs', 'Sir': 'Mr', 'Mile':'Miss', 'Col': 'Mr','Capt': 'Mr', 'Countess': 'Mrs', 'Jonkheer': 'Mr'}
df['Title'] = df['Title'].map(title_reduction)
df['Title'].unique()

array(['Mr', 'Mrs', 'Miss', 'Master', 'Rev', 'Dr', nan], dtype=object)

In [8]:
for title, age, in df.groupby('Title')['Age'].median().items():
  print(title, age)
  df.loc[(df['Title']==title) & (df['Age'].isnull()), 'Age']= age

Dr 46.5
Master 3.5
Miss 21.0
Mr 30.0
Mrs 35.0
Rev 46.5


In [9]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
Title            2
dtype: int64

In [10]:
df = df.drop(['PassengerId', 'Cabin', 'Embarked', 'Name', 'Ticket', 'Title'], axis = 1)
df.head(7)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,male,22.0,1,0,7.25
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.925
3,1,1,female,35.0,1,0,53.1
4,0,3,male,35.0,0,0,8.05
5,0,3,male,30.0,0,0,8.4583
6,0,1,male,54.0,0,0,51.8625


In [11]:
df['Sex'] = df['Sex'].map({'male': 1, 'female': 0})
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,1,22.0,1,0,7.25
1,1,1,0,38.0,1,0,71.2833
2,1,3,0,26.0,0,0,7.925
3,1,1,0,35.0,1,0,53.1
4,0,3,1,35.0,0,0,8.05


In [12]:
df.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
dtype: int64

In [13]:
df.shape

(891, 7)

In [14]:
X = df.drop('Survived', axis = 1)
y = df['Survived']

In [15]:
X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.2, random_state=4)
X_train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare
42,3,1,30.0,0,0,7.8958
684,2,1,60.0,1,1,39.0000
605,3,1,36.0,1,0,15.5500
409,3,0,21.0,3,1,25.4667
740,1,1,30.0,0,0,30.0000
...,...,...,...,...,...,...
360,3,1,40.0,1,4,27.9000
709,3,1,3.5,1,1,15.2458
439,2,1,31.0,0,0,10.5000
174,1,1,56.0,0,0,30.6958


In [16]:
X_train = np.array(X_train, dtype=float)
y_train = np.array(y_train, dtype=float)
X_test = np.array(X_test, dtype=float)
y_test = np.array(y_test, dtype=float)

In [17]:
print("Numero de datos de entrenamiento", len(X_train))
print("Numero de datos de test", len(X_test))

Numero de datos de entrenamiento 712
Numero de datos de test 179


# Clasificacion Perceptron multicapa utilizando Sklearn

In [18]:
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [19]:
from sklearn.metrics import accuracy_score
print('Exactitud: ', accuracy_score(y_test, y_pred))

Exactitud:  0.8044692737430168


In [20]:
import joblib
joblib.dump(model, 'mlp_model_titanic.pkl')

['mlp_model_titanic.pkl']

In [21]:
model = joblib.load('mlp_model_titanic.pkl')

**PREDECIR**

In [22]:
clase = 3
sexo = 1
edad = 22
sibsp = 1
parch = 0
tarifa = 7.25

#1	1	female	38.0	1	0	71.2833
#	3	male	22.0	1	0	7.2500


In [23]:
X_new = np.array([[clase, sexo, edad, sibsp, parch, tarifa]])
y_new = model.predict(X_new)

if y_new[0] == 1:
  print('El pasajero sobrevive')
else:
  print('El pasajero no sobrevive')

El pasajero no sobrevive
