In [None]:
from google.colab import drive 
drive.mount("/content/gdrive")

In [None]:
import numpy as np 
import os 
import pandas as pd

In [None]:
np.random.seed(42)

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [None]:
NUM_FOLD = 5
kf = StratifiedKFold(n_splits=NUM_FOLD)

### Training and testing dataset

In [None]:
fish_train_dataset = pd.read_csv("train_fish.csv")
fish_train_dataset.head()

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width,Species
0,145.0,20.5,22.0,24.3,6.6339,3.5478,Roach
1,8.7,10.8,11.3,12.6,1.9782,1.2852,Smelt
2,1000.0,41.1,44.0,46.6,12.4888,7.5958,Perch
3,12.2,12.1,13.0,13.8,2.277,1.2558,Smelt
4,6.7,9.3,9.8,10.8,1.7388,1.0476,Smelt


In [None]:
fish_test_dataset = pd.read_csv("test_fish.csv")
fish_test_dataset.head()

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width,Species
0,110.0,20.0,22.0,23.5,5.5225,3.995,Perch
1,650.0,36.5,39.0,41.4,11.1366,6.003,Perch
2,567.0,43.2,46.0,48.7,7.792,4.87,Pike
3,161.0,22.0,23.4,26.7,6.9153,3.6312,Roach
4,700.0,30.4,33.0,38.3,14.8604,5.2854,Bream


### Training part

In [None]:
classes = ['Bream', 'Roach', 'Whitefish', 'Parkki', 'Perch', 'Pike', 'Smelt']

In [None]:
X_train = fish_train_dataset.drop(["Species", "Width", "Length1", "Weight"], axis=1)
y_train = fish_train_dataset["Species"]
X_test = fish_test_dataset.drop(["Species", "Width", "Length1", "Weight"], axis=1)
y_test = fish_test_dataset["Species"]

In [None]:
scaler = StandardScaler()
transform = scaler.fit(X_train)
X_train = transform.transform(X_train)
X_test = transform.transform(X_test)

In [None]:
knn = KNN(n_neighbors = 5)
knn.fit(X_train, y_train)
    
y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       Bream       0.91      1.00      0.95        10
      Parkki       1.00      0.50      0.67         2
       Perch       0.67      0.93      0.78        15
        Pike       1.00      1.00      1.00         7
       Roach       1.00      0.38      0.55         8
       Smelt       0.80      1.00      0.89         4
   Whitefish       0.00      0.00      0.00         2

    accuracy                           0.81        48
   macro avg       0.77      0.69      0.69        48
weighted avg       0.82      0.81      0.78        48



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
fish_dataset = pd.read_csv("../Fish.csv")
fish_dataset.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [None]:
X = fish_dataset.drop(["Species"], axis=1).to_numpy()
y = fish_dataset["Species"].to_numpy()
kf.get_n_splits(X)

re = 0
for idx, (train_index, test_index) in enumerate(kf.split(X, y)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = StandardScaler()
    transform = scaler.fit(X_train)
    X_train = transform.transform(X_train)
    X_test = transform.transform(X_test)
    
    knn = KNN(n_neighbors = 5)
    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_test)
    print("Fold {}".format(idx + 1))
    print(classification_report(y_test, y_pred))
    
    re += accuracy_score(y_test, y_pred)
print(re / NUM_FOLD)

Fold 1
              precision    recall  f1-score   support

       Bream       1.00      0.86      0.92         7
      Parkki       0.67      1.00      0.80         2
       Perch       0.40      0.18      0.25        11
        Pike       1.00      0.67      0.80         3
       Roach       0.38      0.75      0.50         4
       Smelt       0.38      1.00      0.55         3
   Whitefish       0.00      0.00      0.00         2

    accuracy                           0.56        32
   macro avg       0.55      0.64      0.55        32
weighted avg       0.57      0.56      0.53        32

Fold 2
              precision    recall  f1-score   support

       Bream       1.00      1.00      1.00         7
      Parkki       1.00      1.00      1.00         3
       Perch       0.50      0.09      0.15        11
        Pike       1.00      1.00      1.00         3
       Roach       0.29      1.00      0.44         4
       Smelt       1.00      1.00      1.00         3
   Whitefi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
X = fish_dataset.drop(["Species", "Width", "Length2", "Weight"], axis=1).to_numpy()
y = fish_dataset["Species"].to_numpy()

kf.get_n_splits(X)
re = 0
for idx, (train_index, test_index) in enumerate(kf.split(X, y)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = StandardScaler()
    transform = scaler.fit(X_train)
    X_train = transform.transform(X_train)
    X_test = transform.transform(X_test)
    
    knn = KNN(n_neighbors = 5)
    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_test)
    print("Fold {}".format(idx + 1))
    print(classification_report(y_test, y_pred))
    
    re += accuracy_score(y_test, y_pred)
print(re / NUM_FOLD)
    

Fold 1
              precision    recall  f1-score   support

       Bream       1.00      0.86      0.92         7
      Parkki       0.67      1.00      0.80         2
       Perch       0.67      0.36      0.47        11
        Pike       1.00      0.67      0.80         3
       Roach       0.43      0.75      0.55         4
       Smelt       0.38      1.00      0.55         3
   Whitefish       0.00      0.00      0.00         2

    accuracy                           0.62        32
   macro avg       0.59      0.66      0.58        32
weighted avg       0.67      0.62      0.61        32

Fold 2
              precision    recall  f1-score   support

       Bream       1.00      1.00      1.00         7
      Parkki       1.00      1.00      1.00         3
       Perch       0.67      0.18      0.29        11
        Pike       1.00      1.00      1.00         3
       Roach       0.31      1.00      0.47         4
       Smelt       1.00      1.00      1.00         3
   Whitefi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
