# Reto - Titanic: Machine Learning from Disaster

---

### Lautaro Gabriel Coteja - A01571214
### Daniela Jiménez Téllez - A01654798
### Andrés Villarreal González - A00833915
### Héctor Hibran Tapia Fernández - A01661114

---

Link del repositorio:

https://github.com/Lautaro000/Inteligencia-Artificial-Avanzada-para-la-Ciencia-de-Datos-I

---

## Importación de librerías

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [None]:
plt.style.use('dark_background')

### Importamos dataset (train)

---

In [None]:
df_train_cleaned = pd.read_csv('./analisis_de_datos.csv')

Ya que tenemos 0 valores nulos, podemos iniciar con el análisis.

<font size = 7 color ='336EFF'>Análisis de Datos</font>

---


In [None]:
df_train_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 19 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          891 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        891 non-null    object 
 11  Embarked     891 non-null    object 
 12  Title        891 non-null    object 
 13  Family       891 non-null    int64  
 14  C            891 non-null    float64
 15  Q            891 non-null    float64
 16  S            891 non-null    float64
 17  Female       891 non-null    float64
 18  Male         891 non-null    float64
dtypes: float

In [None]:
df_train_cleaned.describe(include = 'all')

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title,Family,C,Q,S,Female,Male
count,891.0,891.0,891.0,891,891,891.0,891.0,891.0,891.0,891.0,891,891,891,891.0,891.0,891.0,891.0,891.0,891.0
unique,,,,891,2,,,,681.0,,147,3,17,,,,,,
top,,,,"Braund, Mr. Owen Harris",male,,,,347082.0,,B96 B98,S,Mr,,,,,,
freq,,,,1,577,,,,7.0,,691,646,517,,,,,,
mean,446.0,0.383838,2.308642,,,29.807054,0.523008,0.381594,,32.204208,,,,1.904602,0.188552,0.08642,0.725028,0.352413,0.647587
std,257.353842,0.486592,0.836071,,,13.883697,1.102743,0.806057,,49.693429,,,,1.613459,0.391372,0.281141,0.446751,0.47799,0.47799
min,1.0,0.0,1.0,,,0.42,0.0,0.0,,0.0,,,,1.0,0.0,0.0,0.0,0.0,0.0
25%,223.5,0.0,2.0,,,21.0,0.0,0.0,,7.9104,,,,1.0,0.0,0.0,0.0,0.0,0.0
50%,446.0,0.0,3.0,,,28.0,0.0,0.0,,14.4542,,,,1.0,0.0,0.0,1.0,0.0,1.0
75%,668.5,1.0,3.0,,,38.125,1.0,0.0,,31.0,,,,2.0,0.0,0.0,1.0,1.0,1.0


In [None]:
fig = px.histogram(df_train_cleaned, x = "Survived", color = "Sex", barmode = "group", text_auto = True,
                   color_discrete_map = {'male': '#00FFFF', 'female': '#FF69B4'})

fig.update_xaxes(tickvals = [0, 1], ticktext = ['Not Survived', 'Survived'], title_text = '')

fig.update_traces(textposition = 'outside', textfont_color = 'white')

fig.update_layout(
    title_text = 'Survivors by Sex',
    title_x = 0.5,
    plot_bgcolor = 'black',
    paper_bgcolor = 'black',
    font_color = 'white',
    xaxis = dict(
        color = 'white'
    ),
    yaxis = dict(
        color = 'white'
    ),
    legend = dict(
        font = dict(
            color = 'white'
        )
    )
)

fig.show()

In [None]:
fig = px.histogram(
    df_train_cleaned,
    x = "Fare",
    color = "Survived",
    marginal = "box",
    nbins = 50,
    title = "Distribution of Survival Fees",
    color_discrete_map = {0: "#ff073a", 1: "#39ff14"},
    category_orders = {"Survived": [0, 1]}
)

fig.for_each_trace(lambda t: t.update(name=t.name.replace("0", "Not Survived").replace("1", "Survived")))

fig.update_layout(
    title_x = 0.5,
    plot_bgcolor = 'black',
    paper_bgcolor = 'black',
    font_color = 'white',
    title_font = dict(color = 'white'),
    legend_bgcolor = 'black',
    legend_font = dict(color = 'white')
)

fig.show()

In [None]:
fig = px.histogram(
    df_train_cleaned,
    x = "Age",
    color = "Survived",
    marginal = "box",
    nbins = 50,
    title = "Age Distribution",
    color_discrete_map = {0: "#ff073a", 1: "#39ff14"},
    category_orders = {"Survived": [0, 1]}
)

fig.for_each_trace(lambda t: t.update(name=t.name.replace("0", "Not Survived").replace("1", "Survived")))

fig.update_layout(
    title_x = 0.5,
    plot_bgcolor = 'black',
    paper_bgcolor = 'black',
    font_color = 'white',
    title_font = dict(color = 'white'),
    legend_bgcolor = 'black',
    legend_font = dict(color = 'white')
)

fig.show()

In [None]:
pclass_counts = df_train_cleaned['Pclass'].value_counts().sort_index()

fig = go.Figure(data = [go.Bar(
    x = pclass_counts.index,
    y = pclass_counts.values,
    text = pclass_counts.values,
    textposition = 'outside',
    marker_color = ['#FFD700', '#C0C0C0', '#CD7F32'],
    marker_line_color = ['#FFD700', '#C0C0C0', '#CD7F32'],
    marker_line_width = 2
)])

fig.update_traces(
    textposition = 'outside',
    textfont = dict(size = 14),
    cliponaxis = False,
    offsetgroup = 0
)

fig.update_traces(y = [v + 20 for v in pclass_counts.values])

fig.update_layout(
    title = 'Number of People per Class',
    xaxis_title = ' ',
    yaxis_title = 'Count',
    xaxis = dict(
        tickmode = 'array',
        tickvals = [1, 2, 3],
        ticktext = ['1st Class', '2nd Class', '3rd Class'],
        linecolor = 'gray',
        gridcolor = 'black'
    ),
    yaxis = dict(
        range = [0, 520],
        linecolor = 'gray',
        gridcolor = 'gray'
    ),
    bargap = 0.2,
    width = 700,
    height = 500,
    plot_bgcolor = 'black',
    paper_bgcolor = 'black',
    font = dict(color = 'white'),
    title_font = dict(color = 'white')
)

fig.show()

In [None]:
fig = px.histogram(df_train_cleaned, x = 'Pclass', color = 'Sex', barmode = 'group', text_auto = True,
                   title = 'Number of Women and Men per Class',
                   color_discrete_map = {'male': '#00FFFF', 'female': '#FF69B4'})

fig.update_xaxes(title_text = ' ', tickvals = [1, 2, 3], ticktext = ['1st Class', '2nd Class', '3rd Class'])

fig.update_traces(textposition = 'outside', textfont_color = 'white')

fig.update_layout(
    width = 1000,
    height = 500,
    plot_bgcolor = 'black',
    paper_bgcolor = 'black',
    font_color = 'white',
    xaxis = dict(
        color = 'white'
    ),
    yaxis = dict(
        color = 'white'
    ),
    legend = dict(
        font = dict(
            color = 'white'
        )
    )
)

fig.show()

In [None]:
df_count = df_train_cleaned.groupby(['Survived', 'Pclass']).size().reset_index(name = 'count')

fig = go.Figure()

pclass_names = {1: '1st Class', 2: '2nd Class', 3: '3rd Class'}
survival_labels = {0: 'Not Survived', 1: 'Survived'}

for pclass in sorted(df_train_cleaned['Pclass'].unique()):
    filtered_df = df_count[df_count['Pclass'] == pclass]
    filtered_df['Survived'] = filtered_df['Survived'].map(survival_labels)

    fig.add_trace(go.Bar(
        x = filtered_df['Survived'],
        y = filtered_df['count'],
        name = pclass_names[pclass],
        text = filtered_df['count'],
        textposition = 'outside',
        marker_color = ['#FFD700', '#C0C0C0', '#CD7F32'][pclass-1],
        marker_line_color = ['#FFD700', '#C0C0C0', '#CD7F32'][pclass-1],
        marker_line_width = 2
    ))

fig.update_layout(
    title_text = 'Survivors per Class',
    xaxis_title = ' ',
    barmode = 'group',
    plot_bgcolor = 'black',
    paper_bgcolor = 'black',
    font = dict(color = 'white'),
    title_font = dict(color = 'white')
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
df_train_cleaned['Título'] = df_train_cleaned['Name'].str.extract(r'([A-Za-z]+)\.')

title_counts = df_train_cleaned['Título'].value_counts().reset_index()
title_counts.columns = ['Título', 'Count']
title_counts = title_counts.iloc[::-1].reset_index(drop = True)

neon_colors = [
    '#39FF14', '#0AFF0A', '#00FFFF', '#00BFFF', '#FF00FF', '#FF1493',
    '#FF4500', '#FFD700', '#ADFF2F', '#7FFF00', '#00FF7F', '#32CD32',
    '#00FA9A', '#00CED1', '#1E90FF', '#8A2BE2', '#FF69B4'
]

fig = go.Figure()

for i, row in title_counts.iterrows():
    fig.add_trace(go.Bar(
        y = [row['Título']],
        x = [row['Count']],
        orientation = 'h',
        name = row['Título'],
        marker = dict(color = neon_colors[i % len(neon_colors)]),
        text = row['Count'],
        textposition = 'outside'
    ))


fig.update_layout(
    template = 'plotly_dark',
    plot_bgcolor = '#000000',
    paper_bgcolor = '#000000',
    font = dict(color = 'white'),
    title = 'Number of People per Title',
    xaxis = dict(title = 'Count'),
    yaxis = dict(title = 'Title'),
    height = 550
)

fig.show()

In [None]:
average_age_per_title = df_train_cleaned.groupby('Título')['Age'].mean().dropna()
average_age_per_title = average_age_per_title.sort_values(ascending = True)

fig = go.Figure()

for i, (title, age) in enumerate(average_age_per_title.items()):
    fig.add_trace(go.Bar(
        y = [title],
        x = [age],
        orientation = 'h',
        name = title,
        marker = dict(color = neon_colors[i % len(neon_colors)]),
        text = f'{age:.2f}',
        textposition = 'inside'
    ))

fig.update_layout(
    template = 'plotly_dark',
    plot_bgcolor = '#000000',
    paper_bgcolor = '#000000',
    font = dict(color = 'white'),
    title = 'Average Age by Title',
    xaxis = dict(title = 'Age'),
    yaxis = dict(title = 'Title'),
    height = 550
)

fig.show()

In [None]:
df_train_cleaned = df_train_cleaned.drop(['Sex', 'Embarked', 'Name', 'Ticket', 'Cabin', 'Título', 'Title', 'SibSp', 'Parch'], axis = 1)

In [None]:
df_train_cleaned.head(20)

Unnamed: 0,PassengerId,Survived,Pclass,Age,Fare,Family,C,Q,S,Female,Male
0,1,0,3,22.0,7.25,2,0.0,0.0,1.0,0.0,1.0
1,5,0,3,35.0,8.05,1,0.0,0.0,1.0,0.0,1.0
2,6,0,3,44.5,8.4583,1,0.0,1.0,0.0,0.0,1.0
3,7,0,1,54.0,51.8625,1,0.0,0.0,1.0,0.0,1.0
4,13,0,3,20.0,8.05,1,0.0,0.0,1.0,0.0,1.0
5,14,0,3,39.0,31.275,7,0.0,0.0,1.0,0.0,1.0
6,18,1,2,37.0,13.0,1,0.0,0.0,1.0,0.0,1.0
7,21,0,2,35.0,26.0,1,0.0,0.0,1.0,0.0,1.0
8,22,1,2,34.0,13.0,1,0.0,0.0,1.0,0.0,1.0
9,24,1,1,28.0,35.5,1,0.0,0.0,1.0,0.0,1.0
