**Step 1) Importing and Instantiating Classes**

In [None]:
pip install sweetviz

In [None]:
from utils import open_csv_file, analyze_dataframe, create_report

from correlation_matrix import Correlation
from data import DataFrame
from machine_learning import MachineLearning
from sklearn import metrics, svm

import numpy as np

from graph import Graph
import seaborn as sns

training_data = open_csv_file('train.csv')
data_frame_instance = DataFrame(training_data)

analysis_result = analyze_dataframe(data_frame_instance)
print("Analysis Result:")
print(analysis_result)

**Step 2) Inspecting and Cleaning Data**

In [None]:
# Replace missing values with median
data_frame_instance.replace_missing_value_with_median('Age')


**Step 3) Transforming Data**

In [None]:
# Create column for last name
data_frame_instance.create_last_name_column('Name')


In [None]:
# Label encoding
data_frame_instance.label_encoding('Last_name')

# One-hot encoding
data_frame_instance.hot_encoding('Embarked')
data_frame_instance.hot_encoding('Sex')

In [None]:
data_frame_instance.min_max_scaler('Last_name_encoded')


In [None]:
data_frame_instance.get_bin_edges('Fare','quantile')

In [None]:
data_frame_instance.bucket_quantile('Age')
data_frame_instance.bucket_quantile('Fare')

In [None]:
data_frame_instance.remove_columns(['PassengerId', 'Name', 'Cabin', 'Age', 'Fare', 'Ticket', 'Last_name'])

**Step 4) Feature Selection**

In [None]:
# ## feature selection
corr_mat_type_two = data_frame_instance.get_df().corr().abs()
# ## created a matrix of ones with the same shape with corr mat
mask = np.triu(np.ones_like(corr_mat_type_two, dtype=bool))
mask_df = corr_mat_type_two.mask(mask)
mask_df

**Step 5) Machine learning and evaluation**

In [None]:
data_to_use = data_frame_instance.get_df().iloc[:, 1:13]
data_to_use
data_to_target = data_frame_instance.get_df().iloc[:, 0]
data_to_target
ml = MachineLearning(data_to_use, data_to_target)
# Split the data
ml.split_data()

# Fit the KNN classifier
ml.fit_knn_classification()

X_test = ml.X_test

y_pred_knn = ml.predict_model(ml.knn_model)

ml.evaluate_model(y_pred_knn)

print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_knn))



In [None]:
ml.decision_tree_gini_classifier()
ml.decision_tree_depth_classifier()
ml.decision_tree_entropy_classifier()


In [None]:
y_pred_gini = ml.predict_model(ml.decision_tree_gini_model)
y_pred_depth = ml.predict_model(ml.decision_tree_depth_model)
y_pred_entropy = ml.predict_model(ml.decision_tree_entropy_model)


In [None]:
ml.evaluate_model(y_pred_gini)

print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_gini))

In [None]:
ml.evaluate_model(y_pred_depth)

print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_depth))

In [None]:
ml.evaluate_model(y_pred_entropy)

print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_entropy))

In [None]:
ml.svm_clf_linear_classifier()
ml.svm_clf_sigmoid_classifier()
ml.svm_clf_rbf_classifier()

y_pred_linear = ml.predict_model(ml.svm_clf_linear_model)
y_pred_sigmoid = ml.predict_model(ml.svm_clf_sigmoid_model)
y_pred_rbf = ml.predict_model(ml.svm_clf_rbf_model)

In [None]:
ml.evaluate_model(y_pred_linear)

print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_linear))

In [None]:
ml.evaluate_model(y_pred_sigmoid)

print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_sigmoid))

In [None]:
ml.evaluate_model(y_pred_rbf)
print('Accuracy', metrics.accuracy_score(ml.y_test, y_pred_rbf))