In [24]:
import os
import pickle
import numpy as np
import pandas as pd

In [25]:
# Pipelines
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Transformers
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import FunctionTransformer

## Models
from sklearn.ensemble import RandomForestClassifier

## Metrics
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

In [26]:
from data_gadgets.cleaning import Cleaner

# Reading Data

In [27]:
path = os.path.join('..', '..', 'data', 'raw', 'X_test_task1.csv')
X_test = pd.read_csv(path)
path = os.path.join('..', '..', 'data', 'raw', 'y_test_task1.csv')
y_test = pd.read_csv(path)

# Loading Model

In [28]:
path = os.path.join('..', '..', 'models', 'model_task1.pkl')
with open(path, 'rb') as file:
    model = pickle.load(file)

# Prediction Pipeline

In [29]:
def cleaning_pipeline(df):
    cleaner = Cleaner()
    df = cleaner.headers(df)
    df = cleaner.categories(df)
    df['cabin'] = df['cabin'].map(lambda x: x[0] if type(x) is not float else 'N')

    df['sibsp'] = df['sibsp'].map(lambda x: 1 if x >=1 else 0)
    df['parch'] = df['parch'].map(lambda x: 1 if x >=1 else 0)
    family = []
    for value1, value2 in zip(df.sibsp, df.parch):
        if value1 == 1 or value2 == 1:
            family.append(1)
        else:
            family.append(0)
    df['family'] = family
    df = df.drop(['sibsp', 'parch'], axis=1)
    
    return df

In [30]:
X_test = cleaning_pipeline(X_test)
y_pred = model.predict(X_test)

# Evaluation

In [31]:
accuracy_score(y_test, y_pred)

1.0

In [32]:
f1_score(y_test, y_pred)

1.0

In [33]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        45
           1       1.00      1.00      1.00        39

    accuracy                           1.00        84
   macro avg       1.00      1.00      1.00        84
weighted avg       1.00      1.00      1.00        84



In [34]:
confusion_matrix(y_test, y_pred)

array([[45,  0],
       [ 0, 39]], dtype=int64)