# Modelo de Machine Learning 
Support Vector Machines

In [1]:
# Se importan los modulos
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
# Se lee el dataset previamente procesado y se guarda en la variable "X"
X = pd.read_csv('data/clean_train.csv')

# Se muestra el dataset
X

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Embarked,Age_range,Fare_category
0,1,0,3,0,1,0,0.0,1,0
1,2,1,1,1,1,0,1.0,2,3
2,3,1,3,1,0,0,0.0,1,1
3,4,1,1,1,1,0,0.0,2,3
4,5,0,3,0,0,0,0.0,2,1
...,...,...,...,...,...,...,...,...,...
886,887,0,2,0,0,0,0.0,1,1
887,888,1,1,1,0,0,0.0,1,2
888,889,0,3,1,1,2,0.0,0,2
889,890,1,1,0,0,0,1.0,1,2


In [3]:
# Se extraen las etiquetas (características a predecir) y se guardan en la variable "y"
y = X['Survived']

# Se muestran las etiquetas
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [4]:
# Se elimina toda la columna de las etiquetas (Survived)
X.drop('Survived', axis=1, inplace=True)

# Se muestra el dataset
X

Unnamed: 0,PassengerId,Pclass,Sex,SibSp,Parch,Embarked,Age_range,Fare_category
0,1,3,0,1,0,0.0,1,0
1,2,1,1,1,0,1.0,2,3
2,3,3,1,0,0,0.0,1,1
3,4,1,1,1,0,0.0,2,3
4,5,3,0,0,0,0.0,2,1
...,...,...,...,...,...,...,...,...
886,887,2,0,0,0,0.0,1,1
887,888,1,1,0,0,0.0,1,2
888,889,3,1,1,2,0.0,0,2
889,890,1,0,0,0,1.0,1,2


In [5]:
# Se divide el dataset para posteriormente entrenar el modelo de ML
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [6]:
# Se creal el modelo predictivo SVM
model = svm.SVC(kernel='linear')

# Se entrena el modelo
model.fit(X_train, y_train)

SVC(kernel='linear')

In [7]:
# Se predicen los datos de prueba
y_pred = model.predict(X_test)

# Se calcula la exactitud del modelo
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7821229050279329


In [8]:
# Se cargan los nuevos datos a predecir y se guardan en la variable "new_data"
new_data = pd.read_csv('data/clean_test.csv')

# Se muestran los nuevos datos
new_data

Unnamed: 0,PassengerId,Pclass,Sex,SibSp,Parch,Embarked,Age_range,Fare_category
0,892,3,0,0,0,2,2,0
1,893,3,1,1,0,0,2,0
2,894,2,0,0,0,2,3,1
3,895,3,0,0,0,0,1,1
4,896,3,1,1,1,0,1,1
...,...,...,...,...,...,...,...,...
413,1305,3,0,0,0,0,0,1
414,1306,1,1,0,0,1,2,3
415,1307,3,0,0,0,0,2,0
416,1308,3,0,0,0,0,0,1


In [9]:
# Se predicen los nuevos datos
y_pred_new = model.predict(new_data)

# Se muestran las predicciones donde 0 significa deceso y 1 sobreviviente
y_pred_new

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [10]:
# Se crea un dataframe con las predicciones de los nuevos datos
final_pred = pd.DataFrame({"PassengerId": new_data['PassengerId'], "Survived": y_pred_new})

In [11]:
# Se establece "PassengerId" como el índice
final_pred.set_index('PassengerId', inplace=True)

In [12]:
# Se convierte el dataframe previamente creado en un archivo CSV
final_pred.to_csv('data/final_pred.csv')