# Diccionario de Datos

| **Columna**   | **Descripción**                                                                 |
|---------------|---------------------------------------------------------------------------------|
| `pclass`      | Clase del boleto del pasajero (1 = Primera, 2 = Segunda, 3 = Tercera).          |
| `survived`    | Indicador de supervivencia (0 = No sobrevivió, 1 = Sobrevivió).                |
| `name`        | Nombre completo del pasajero.                                                  |
| `sex`         | Género del pasajero (`male` = Masculino, `female` = Femenino).                 |
| `age`         | Edad del pasajero (en años).                                                   |
| `sibsp`       | Número de hermanos/cónyuges a bordo.                                           |
| `parch`       | Número de padres/hijos a bordo.                                                |
| `ticket`      | Número del boleto.                                                             |
| `fare`        | Tarifa pagada por el boleto.                                                   |
| `cabin`       | Cabina asignada (si aplica).                                                   |
| `embarked`    | Puerto de embarque (`C` = Cherbourg, `Q` = Queenstown, `S` = Southampton).     |
| `boat`        | Número del bote salvavidas asignado (si aplica).                               |
| `body`        | Número del cuerpo recuperado (si aplica).                                      |
| `home.dest`   | Destino o lugar de residencia del pasajero.                                    |


In [62]:
# Carga de librerías
import pandas as pd

In [63]:
# Cargamos los datos
df = pd.read_csv("../data/Titanic_Research_v6.csv", sep= ';')

df.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29,0.0,0,24160,2.113.375,B5,S,2.0,,USA
1,1,1,"Allison, Master. Hudson Trevor",male,92,1.0,2,113781,151.55,C22 C26,S,11.0,,CANADA
2,1,0,"Allison, Miss. Helen Loraine",female,2,1.0,2,113781,151.55,C22 C26,S,,,CANADA
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30,1.0,2,113781,151.55,C22 C26,S,,135.0,CANADA
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1.0,2,113781,151.55,C22 C26,S,,,CANADA


In [64]:
# Revisamos el estado general del dataframe en búsqueda de datos faltantes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1300 non-null   object 
 5   sibsp      1308 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   fare       1308 non-null   object 
 9   cabin      295 non-null    object 
 10  embarked   1307 non-null   object 
 11  boat       486 non-null    object 
 12  body       121 non-null    float64
 13  home.dest  1303 non-null   object 
dtypes: float64(2), int64(3), object(9)
memory usage: 143.3+ KB


## Retiramos columnas innecesarias

In [65]:
columnas_eliminar = ['cabin', 'boat', 'body', 'fare']

df_procesado = df.drop(columns= columnas_eliminar)

df_procesado.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29,0.0,0,24160,S,USA
1,1,1,"Allison, Master. Hudson Trevor",male,92,1.0,2,113781,S,CANADA
2,1,0,"Allison, Miss. Helen Loraine",female,2,1.0,2,113781,S,CANADA
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30,1.0,2,113781,S,CANADA
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1.0,2,113781,S,CANADA


## Imputación de valores

In [66]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1300 non-null   object 
 5   sibsp      1308 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1307 non-null   object 
 9   home.dest  1303 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [67]:
# Verificamos como es el registro de una familia para comprender la valoracion de sibsp y parch 
df_procesado[df_procesado['name'].str.contains("allison", case= False, na= False)]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
1,1,1,"Allison, Master. Hudson Trevor",male,92,1.0,2,113781,S,CANADA
2,1,0,"Allison, Miss. Helen Loraine",female,2,1.0,2,113781,S,CANADA
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30,1.0,2,113781,S,CANADA
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1.0,2,113781,S,CANADA


In [68]:
# Elegijo la columna sibsp
df_procesado[df_procesado['sibsp'].isna()]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
1137,3,0,"Razi, Mr. Raihed",male,300,,0,2629,C,LEBANON


In [69]:
# Verifico si alguien mas viaja en el mismo camarote con el numero de ticket
df_procesado[df_procesado['ticket'] == '2629']

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
1137,3,0,"Razi, Mr. Raihed",male,300,,0,2629,C,LEBANON


In [70]:
# Buscamos posibles familiares por apellido
df_procesado[df_procesado['name'].str.contains('razi', case= False, na= False)]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
1137,3,0,"Razi, Mr. Raihed",male,300,,0,2629,C,LEBANON
1258,3,1,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,270,0.0,2,2650,C,LEBANON


In [71]:
# Verificamos como esta compuesta la familia Touma
df_procesado[df_procesado['name'].str.contains('touma', case= False, na= False)]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
1256,3,1,"Touma, Master. Georges Youssef",male,70,1.0,1,2650,C,LEBANON
1257,3,1,"Touma, Miss. Maria Youssef",female,90,1.0,1,2650,C,LEBANON
1258,3,1,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,270,0.0,2,2650,C,LEBANON


In [72]:
# Por la edad puede ser que sean hermanos p eorno viajan como familia por eso imputamos con 0
df_procesado['sibsp'] = df_procesado['sibsp'].fillna(0)

df_procesado['sibsp'].isna().value_counts()

sibsp
False    1309
Name: count, dtype: int64

In [73]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1300 non-null   object 
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1307 non-null   object 
 9   home.dest  1303 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [74]:
# Revisamos home.dest
df_procesado[df_procesado['home.dest'].isna()]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
871,3,0,"Horgan, Mr. John",male,220.0,0.0,0,370377,Q,
1066,3,0,"Novel, Mr. Mansouer",male,285.0,0.0,0,2697,C,
1135,3,0,"Radeff, Mr. Alexander",male,270.0,0.0,0,349223,S,
1303,3,0,"Yousseff, Mr. Gerious",male,,0.0,0,2627,C,
1304,3,0,"Zabour, Miss. Hileni",female,160.0,1.0,0,2665,C,
1305,3,0,"Zabour, Miss. Thamine",female,190.0,1.0,0,2665,C,


In [75]:
# Revisamos si hay alguna familia Zabour, aunque sibsp sugiere que son hermanas
df_procesado[df_procesado['name'].str.contains('Zabour', case= False, na= False)]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
1304,3,0,"Zabour, Miss. Hileni",female,160,1.0,0,2665,C,
1305,3,0,"Zabour, Miss. Thamine",female,190,1.0,0,2665,C,


In [76]:
# Imputamos los datos como desconocido
df_procesado['home.dest'] = df_procesado['home.dest'].fillna('Desconocido')

In [77]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1300 non-null   object 
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1307 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [78]:
# Revisamos embarked
df_procesado[df_procesado['embarked'].isna()]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
168,1,1,"Icard, Miss. Amelie",female,380,0.0,0,113572,,FRANCE
284,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0.0,0,113572,,USA


In [79]:
# Reemplazamos por Desconocido
df_procesado['embarked'].fillna('Desconocido', inplace= True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_procesado['embarked'].fillna('Desconocido', inplace= True)


In [80]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1300 non-null   object 
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1309 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [81]:
# Cambiamos comas por puntos
df_procesado['age'] = df_procesado['age'].str.replace(',', '.')

In [82]:
# Corregimos la columna de edad
df_procesado[df_procesado['age'].isna()]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
118,1,0,"Franklin, Mr. Thomas Parham",male,,0.0,0,113778,S,UK
134,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1.0,0,17453,C,FRANCE
152,1,1,"Hawksford, Mr. Walter James",male,,0.0,0,16988,S,UK
157,1,0,"Hilliard, Mr. Herbert Henry",male,,0.0,0,17463,S,USA
166,1,0,"Hoyt, Mr. William Fisher",male,,0.0,0,PC 17600,C,USA
410,2,0,"Frost, Mr. Anthony Wood ""Archie""",male,,0.0,0,239854,S,IRELAND
816,3,0,"Gheorgheff, Mr. Stanio",male,,0.0,0,349254,C,BULGARIA
940,3,0,"Kraeff, Mr. Theodor",male,,0.0,0,349253,C,BULGARIA
1303,3,0,"Yousseff, Mr. Gerious",male,,0.0,0,2627,C,Desconocido


In [83]:
# Creamos un df para calcular las edades para la única mujer y los hombre
df_droped = df_procesado.copy()
df_droped.dropna(inplace= True)

In [84]:
# Filtro por Mr y Mrs, para obtener la media
df_age_mr = df_droped[df_droped['name'].str.contains('Mr', case= True, na= False)]
df_age_mrs = df_droped[df_droped['name'].str.contains('Mrs', case= True, na= False)]


In [85]:
# Corrección de formato para realizar los cálculos
df_age_mr['age'] = df_age_mr['age'].astype(float)
df_age_mr['age'] = df_age_mr['age'].astype(int)
df_age_mrs['age'] = df_age_mrs['age'].astype(float)
df_age_mrs['age'] = df_age_mrs['age'].astype(int)

# Calculamos la media para realizar la imputación
edad_media_mr = df_age_mr['age'].astype(int).median()
edad_media_mrs = df_age_mrs['age'].astype(int).median()

print('Edad media de Hombres: ', edad_media_mr)
print('Edad media de Mujeres: ', edad_media_mrs)

Edad media de Hombres:  30.0
Edad media de Mujeres:  35.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_age_mr['age'] = df_age_mr['age'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_age_mr['age'] = df_age_mr['age'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_age_mrs['age'] = df_age_mrs['age'].astype(float)
A value is trying to be set on a copy of a slice from a

In [86]:
# La única mujer tiene el registro 134
df_procesado.iloc[134, 4] = 35

In [87]:
# Imputamos las edad de los hombres
df_procesado['age'] = df_procesado['age'].fillna(30)

In [88]:
# Resultado final
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1309 non-null   object 
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1309 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [89]:
df_procesado['age'].unique()

array(['29', '0.92', '2', '30', '25', '48', '63', '39', '53', '71', '47',
       '18', '24', '26.0', '80', '50', '32.0', '36', '37', '26', '42.0',
       '29.0', '19', '35.0', '28', '45', '40', '58', '42', '22', '41',
       '44', '59', '60', '41.0', '28.0', '33', '17', '11', '14', '49',
       '76', '46', '27', '36.0', '43', '22.0', '64', '55', '70', '38',
       '51', '33.0', '31', '4', '54', '23', '49.0', '48.0', '30.0', 30,
       '39.0', '35', '24.0', 35, '37.0', '40.0', '27.0', '52', '16',
       '38.0', '32.5', '21', '58.0', '15', '16.0', '65', '28.5', '45.5',
       '56', '45.0', '13', '61', '34', '57', '6', '32', '62', '67',
       '31.0', '1', '12', '20', '0.83', '8', '17.0', '0.67', '7', '3',
       '61.0', '36.5', '19.0', '18.5', '23.0', '5', '66', '9', '18.0',
       '21.0', '0.75', '20.0', '10.0', '66.0', '43.0', '0.33', '0.17',
       '25.0', '40.5', '46.0', '10', '11.0', '2.0', '34.0', '4.0', '1.0',
       '8.0', '7.0', '5.0', '44.0', '50.0', '3.0', '12.0', '62.0', '15.

In [90]:
# Verificamos si hay duplicados
df_procesado.duplicated().value_counts()

False    1309
Name: count, dtype: int64

# Formateo de Columnas

In [91]:
df_procesado.head(3)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0.0,0,24160,S,USA
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1.0,2,113781,S,CANADA
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1.0,2,113781,S,CANADA


In [92]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1309 non-null   object 
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1309 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [32]:
# Verificamos los datos de pclass y survived
pclass_unique = df_procesado['pclass'].unique()
survived_unique = df_procesado['survived'].unique()

print("Datos unicos de pclass:\n", pclass_unique)
print("Datos unicos de survived:\n", survived_unique)


Datos unicos de pclass:
 [1 2 3]
Datos unicos de survived:
 [1 0]


In [33]:
# Verifico los datos de sex antes de codificar
df_procesado['sex'].unique()

array(['female', 'male'], dtype=object)

In [34]:
# Los voy a codificar como 0 y 1
df_procesado['sex'] = df_procesado['sex'].map({'female': 0, 'male': 1})
df_procesado['sex'].astype(int)

df_procesado.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",0,29,0.0,0,24160,S,USA
1,1,1,"Allison, Master. Hudson Trevor",1,92,1.0,2,113781,S,CANADA
2,1,0,"Allison, Miss. Helen Loraine",0,2,1.0,2,113781,S,CANADA
3,1,0,"Allison, Mr. Hudson Joshua Creighton",1,30,1.0,2,113781,S,CANADA
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",0,25,1.0,2,113781,S,CANADA


In [93]:
# Verificamos los datos de age
df_procesado['age'].unique()

array(['29', '0.92', '2', '30', '25', '48', '63', '39', '53', '71', '47',
       '18', '24', '26.0', '80', '50', '32.0', '36', '37', '26', '42.0',
       '29.0', '19', '35.0', '28', '45', '40', '58', '42', '22', '41',
       '44', '59', '60', '41.0', '28.0', '33', '17', '11', '14', '49',
       '76', '46', '27', '36.0', '43', '22.0', '64', '55', '70', '38',
       '51', '33.0', '31', '4', '54', '23', '49.0', '48.0', '30.0', 30,
       '39.0', '35', '24.0', 35, '37.0', '40.0', '27.0', '52', '16',
       '38.0', '32.5', '21', '58.0', '15', '16.0', '65', '28.5', '45.5',
       '56', '45.0', '13', '61', '34', '57', '6', '32', '62', '67',
       '31.0', '1', '12', '20', '0.83', '8', '17.0', '0.67', '7', '3',
       '61.0', '36.5', '19.0', '18.5', '23.0', '5', '66', '9', '18.0',
       '21.0', '0.75', '20.0', '10.0', '66.0', '43.0', '0.33', '0.17',
       '25.0', '40.5', '46.0', '10', '11.0', '2.0', '34.0', '4.0', '1.0',
       '8.0', '7.0', '5.0', '44.0', '50.0', '3.0', '12.0', '62.0', '15.

In [94]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1309 non-null   object 
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1309 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 102.4+ KB


In [96]:
# Cambio la edad a tipo numérico
df_procesado['age'] = df_procesado['age'].astype(float)

df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1309 non-null   float64
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1309 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(2), int64(3), object(5)
memory usage: 102.4+ KB


In [97]:
# Voy a redondear las edades a solo enteros, para eso, voy a asegurarme que todos los datos menores a 1 sean 1
df_procesado['age'] = df_procesado['age'].apply(lambda x: 1 if x < 1 else x)

df_procesado['age'] = df_procesado['age'].astype(int)

df_procesado.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,embarked,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29,0.0,0,24160,S,USA
1,1,1,"Allison, Master. Hudson Trevor",male,1,1.0,2,113781,S,CANADA
2,1,0,"Allison, Miss. Helen Loraine",female,2,1.0,2,113781,S,CANADA
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30,1.0,2,113781,S,CANADA
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1.0,2,113781,S,CANADA


In [98]:
df_procesado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1309 non-null   int64  
 5   sibsp      1309 non-null   float64
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   embarked   1309 non-null   object 
 9   home.dest  1309 non-null   object 
dtypes: float64(1), int64(4), object(5)
memory usage: 102.4+ KB


In [100]:
# Guardamos la los datos limpios

df_procesado.to_csv("../data/Titanic_Research_v6_clean.csv", sep= ';', index= False)