# Titanic - Machine Learning from Disaster

In [1]:
from pathlib import Path

# Set Path to store competition files
_DATA_PATH = Path('../Data')

In [2]:
from zipfile import ZipFile

# Function to get competition files using Kaggle API
def get_competition_files(competition: str, path: Path) -> None:
  try:
    import kaggle
    kaggle.api.authenticate()
    kaggle.api.competition_download_files(competition, path=path)
  except ModuleNotFoundError as e:
    print("Kaggle not installed, use 'pip install kaggle' or 'conda install -c conda-forge kaggle' and restart Jupyter kernel. Otherwise, download data and place in folder above manually")
    print(e.message())
  except Exception as e:
    raise e
  
# Function to unzip files
def unzip_competition(path: Path, fname: str) -> None:
  with ZipFile(path / fname, 'r') as handle:
    handle.extractall(path)

In [3]:
import os

# If there are no files in the folder, get them
if len(os.listdir("../Data")) < 3:
  get_competition_files('Titanic', _DATA_PATH)
  unzip_competition(_DATA_PATH, 'Titanic.zip')
  os.remove(f'{_DATA_PATH}/Titanic.zip')
else:
  print(f'Competition files already present')


Competition files already present


In [4]:
# Libraries for data analysis and visualization
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [19]:
# Acquire the Data
train_df = pd.read_csv(_DATA_PATH / 'train.csv')
test_df = pd.read_csv(_DATA_PATH / 'test.csv')
combine = [train_df, test_df]

# Print Feature names
print(f'Features:')

for i in train_df.columns.values:
  print(f'{i}')


Features:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked


In [20]:
# Preview the Data
train_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
