## Packages

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

# read data
from sklearn.datasets import load_iris

# data manipulation
from sklearn.model_selection import train_test_split

# Machine Learning Algorithms
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Evaluating tools
from sklearn.model_selection import cross_val_score

## Functions

In [2]:
# Read sklearn dataset and convert it to datafame
def sklearn_dataset_to_df(dataset):
    df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    df['target'] = pd.Series(dataset.target)
    return df

# Convert dtype
def convert_dtype_in_df(df, cols, dtype):
    for col in cols:
        df[col] = df[col].astype(dtype)
    return df

# Calculating accuracy
def accuracy_calculator(model, x, y, cv):
    return round(
        cross_val_score(
            model,
            x, 
            y,
            cv=cv,
            scoring='accuracy',
        ).mean() * 100,
        4
    )

## Read Data

In [3]:
# Get Data: iris
iris = sklearn_dataset_to_df(dataset=load_iris())
iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## Data Process

In [4]:
# Convert column names.
iris.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# Convert feature and response's type
category_type = ['species']
iris = convert_dtype_in_df(df=iris, cols=category_type, dtype='category')

# Show info
iris.info()

# Shuffle and Split data
iris_x = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
iris_y = iris.species
x_train, x_test, y_train, y_test = train_test_split(iris_x, iris_y, test_size=0.1, random_state=0)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   sepal_length  150 non-null    float64 
 1   sepal_width   150 non-null    float64 
 2   petal_length  150 non-null    float64 
 3   petal_width   150 non-null    float64 
 4   species       150 non-null    category
dtypes: category(1), float64(4)
memory usage: 5.1 KB


## Modeling

In [5]:
# Logistic Regression
logistic_reg = LogisticRegression()
logistic_reg.fit(x_train, y_train)
logistic_reg_acc = accuracy_calculator(model=logistic_reg, x=x_train, y=y_train, cv=3)


In [6]:
# K Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
knn_acc = accuracy_calculator(model=knn, x=x_train, y=y_train, cv=3)


In [7]:
# Gaussian Naïve Bayes
gaussian_naive_bayes = GaussianNB()
gaussian_naive_bayes.fit(x_train, y_train)
gaussian_naive_bayes_acc = accuracy_calculator(model=gaussian_naive_bayes, x=x_train, y=y_train, cv=3)


In [8]:
# Decision Tree
decision_tree = DecisionTreeClassifier()
decision_tree.fit(x_train, y_train)
decision_tree_acc = accuracy_calculator(model=decision_tree, x=x_train, y=y_train, cv=3)


In [9]:
# Random Forest
random_forest = RandomForestClassifier()
random_forest.fit(x_train, y_train)
random_forest_acc = accuracy_calculator(model=random_forest, x=x_train, y=y_train, cv=3)


## Evaluating

In [10]:
model_training_score = pd.DataFrame(
    {
        'Model': [
            'Logistic Regression',
            'K Nearest Neighbors',
            'Gaussian Naïve Bayes',
            'Decision Tree',
            'Random Forest',
        ],
        'Training_Score': [
            logistic_reg_acc,
            knn_acc,
            gaussian_naive_bayes_acc,
            decision_tree_acc,
            random_forest_acc,
        ]
    }
)

model_training_score

Unnamed: 0,Model,Training_Score
0,Logistic Regression,94.0741
1,K Nearest Neighbors,96.2963
2,Gaussian Naïve Bayes,94.8148
3,Decision Tree,95.5556
4,Random Forest,94.8148


## Advanced: Hyper-Parameters Tuning

In [11]:
# TODO