## 1) Import all required modules

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

## 2) Load the dataset

In [2]:
dt = pd.read_csv('palmer_penguins.csv')

## 3) Display dataset, its shape, and missing values

In [3]:
dt.head()

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,species
0,39.1,18.7,181,3750,Adelie
1,39.5,17.4,186,3800,Adelie
2,40.3,18.0,195,3250,Adelie
3,36.7,19.3,193,3450,Adelie
4,39.3,20.6,190,3650,Adelie


In [4]:
dt.shape

(342, 5)

In [5]:
dt.isna().sum()

bill_length_mm       0
bill_depth_mm        0
flipper_length_mm    0
body_mass_g          0
species              0
dtype: int64

## 4) Define our fields as x, and target as y variable

In [6]:
x = dt.drop(['species'], axis=1)
y = dt[['species']]

In [7]:
x.head()

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
0,39.1,18.7,181,3750
1,39.5,17.4,186,3800
2,40.3,18.0,195,3250
3,36.7,19.3,193,3450
4,39.3,20.6,190,3650


In [8]:
y.head()

Unnamed: 0,species
0,Adelie
1,Adelie
2,Adelie
3,Adelie
4,Adelie


## 5) Split the dataset

In [9]:
trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.20, random_state=101)

In [10]:
print("trainX: ", trainX.shape)
print("testX: ", testX.shape)
print("trainY: ", trainY.shape)
print("testY: ", testY.shape)

trainX:  (273, 4)
testX:  (69, 4)
trainY:  (273, 1)
testY:  (69, 1)


## 5) Creating the model

In [11]:
classifier = DecisionTreeClassifier()

## 6) Training and testing

In [12]:
classifier.fit(trainX, trainY)

DecisionTreeClassifier()

In [13]:
newX = np.array([[39.5, 20.10, 195, 3655]])

In [14]:
newY = classifier.predict(newX)
print(newY)

['Adelie']




In [15]:
y_predicted = classifier.predict(testX)

## 7) Performance evaluation

In [16]:
print(classification_report(testY, y_predicted))

              precision    recall  f1-score   support

      Adelie       0.96      0.89      0.93        28
   Chinstrap       0.80      1.00      0.89        12
      Gentoo       1.00      0.97      0.98        29

    accuracy                           0.94        69
   macro avg       0.92      0.95      0.93        69
weighted avg       0.95      0.94      0.94        69



## 8) Try other algorithms

In [17]:
classifier = KNeighborsClassifier()

In [18]:
classifier.fit(trainX, trainY)

  return self._fit(X, y)


KNeighborsClassifier()

In [19]:
y_predicted = classifier.predict(testX)

In [20]:
print(classification_report(testY, y_predicted))

              precision    recall  f1-score   support

      Adelie       0.76      0.89      0.82        28
   Chinstrap       0.60      0.25      0.35        12
      Gentoo       0.94      1.00      0.97        29

    accuracy                           0.83        69
   macro avg       0.76      0.71      0.71        69
weighted avg       0.80      0.83      0.80        69

