In [1]:
import os
import pickle
import numpy as np
import pandas as pd

In [2]:
# Pipelines
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Transformers
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import FunctionTransformer

## Models
from sklearn.linear_model import LogisticRegression

## Metrics
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

In [3]:
from data_gadgets.cleaning import Cleaner

# Reading Data

In [4]:
path = os.path.join('..', '..', 'data', 'raw', 'X_test_task3.csv')
X_test = pd.read_csv(path)
path = os.path.join('..', '..', 'data', 'raw', 'y_test_task3.csv')
y_test = pd.read_csv(path)

# Loading Model

In [5]:
path = os.path.join('..', '..', 'models', 'model_task3.pkl')
with open(path, 'rb') as file:
    model = pickle.load(file)

# Prediction Pipeline

In [6]:
def cleaning_pipeline(df):
    cleaner = Cleaner()
    df = cleaner.headers(df)
    df = cleaner.categories(df)
    df = df.drop(['petal_length', 'petal_width'], axis=1)
    
    return df

In [7]:
X_test = cleaning_pipeline(X_test)
y_pred = model.predict(X_test)

# Evaluation

In [8]:
accuracy_score(y_test, y_pred)

0.7333333333333333

In [12]:
f1_score(y_test, y_pred, average='weighted')

0.7412698412698414

In [13]:
print(classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        11
Iris-versicolor       0.73      0.62      0.67        13
 Iris-virginica       0.38      0.50      0.43         6

       accuracy                           0.73        30
      macro avg       0.70      0.71      0.70        30
   weighted avg       0.76      0.73      0.74        30



In [14]:
confusion_matrix(y_test, y_pred)

array([[11,  0,  0],
       [ 0,  8,  5],
       [ 0,  3,  3]], dtype=int64)