#### Setting up the machine learning model

In [1]:
# Imports:
import pandas as pd  # For setting up the data frame
from sklearn.linear_model import LogisticRegression  # For the logistic regression model
from sklearn.model_selection import train_test_split  # For setting up the train-test modularization
from sklearn.metrics import accuracy_score  # For testing accuracy
import numpy as np  # For the coefficient data frame
import joblib  # Persistent model

# Constants:
DATASET_FILE_NAME = 'titanic.csv'

DEP_VAR = ['Pclass', 'Sex', 'Age']
INDEP_VAR = 'Survived'

CAT_VAR = ['Sex']

MODEL_NAME = 'titanic-model.joblib'

***
#### Making and cleaning the data frame

In [2]:
# Data frame:
X, y = pd.read_csv(DATASET_FILE_NAME, usecols=DEP_VAR), pd.read_csv(DATASET_FILE_NAME)[INDEP_VAR]
X = pd.get_dummies(X, columns=CAT_VAR, drop_first=True)

***
#### Setting up the train-test modularization and logistic regression model

In [3]:
# Testing and training sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

# Logistic regression model:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# log_reg = joblib.load(MODEL_NAME)

***
#### Predicting the test set and calculating the accuracy

In [4]:
# Predictions:
predictions = log_reg.predict(X_test)

# Accuracy score:
score = accuracy_score(y_test, predictions) * 100
print("Accuracy:", score, '\b%')

Accuracy: 80.33707865168539%


***
#### Exporting the model

In [None]:
# Persistent model:
joblib.dump(log_reg, MODEL_NAME)