In [None]:
# Importing the necessary libraries
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load the dataset
df = pd.read_csv('titanic.csv')

# Identify the categorical data
categorical_data = ['Sex', 'Embarked', 'Pclass']

In [None]:
# Implement an instance of the ColumnTransformer class
# Transformer parameter is sequence list from tuple, that defines name, a transformer object
# and list of columns on which the transformer should be applied
# This means that a `OneHotEncoder` will be applied to the column in `categorical data` which a class 
# used for converting categorical variables into a form that could be provided to ML algorithms

# `remainder='passthrough' argument means that columns not specified in `transformer` will be left
# in the output without any changes.
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), categorical_data)], remainder='passthrough')

# The instance of `ColumnTransformer` stored in variable `ct`. Now, `ct` can be used to transform a 
# DataFrame or array-like object using `fit` and `transform` method

# Usage: `fit` method calculates the mean and standard deviation of data and `transform`  method applies
# the standardization to the data by subtracking the mean an dividing by standard deviation.

# Apply the fit_transform method on the instance of ColumnTransformer
# fit_transform method do both of fit and transform, more efficiently than call fit and transform separately
X = ct.fit_transform(df)

# Convert the output into a NumPy array
X = np.array(X)

# Use LabelEncoder to encode binary categorical data
le = LabelEncoder()
y = le.fit_transform(df['Survived'])

# Print the updated matrix of features and the dependent variable vector
print("Updated matrix of features: \n", X)
print("Updated dependent variable vector: \n", y)