In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
df = pd.read_csv("Fish_dataset.csv")
df.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [10]:
df.columns

Index(['Species', 'Weight', 'Length1', 'Length2', 'Length3', 'Height',
       'Width'],
      dtype='object')

In [4]:
df['Species'].unique()

array(['Bream', 'Roach', 'Whitefish', 'Parkki', 'Perch', 'Pike', 'Smelt'],
      dtype=object)

In [5]:
df.isnull().sum()

Species    0
Weight     0
Length1    0
Length2    0
Length3    0
Height     0
Width      0
dtype: int64

In [7]:
colName = []
for col in df.columns:
    if df[col].dtypes == 'object':
        colName.append(col)
colName

['Species']

In [9]:
le = LabelEncoder()

for i in colName:
    df[i] = le.fit_transform(df[i])

    le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    print("Feature : ", i)
    print("Mapping : ", le_name_mapping)

Feature :  Width
Mapping :  {'Bream': 0, 'Parkki': 1, 'Perch': 2, 'Pike': 3, 'Roach': 4, 'Smelt': 5, 'Whitefish': 6}


In [12]:
x = df[['Weight', 'Length1', 'Length2', 'Length3', 'Height','Width']]
y = df[['Species']]
print(x.shape)
print(y.shape)

(159, 6)
(159, 1)


In [15]:
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
x

array([[-4.38072172e-01, -3.05788578e-01, -2.82303007e-01,
        -1.06020232e-01,  5.96578670e-01, -2.36528948e-01],
       [-3.03562184e-01, -2.25507242e-01, -1.98053663e-01,
        -2.33668373e-03,  8.21260549e-01, -6.65789457e-02],
       [-1.63447613e-01, -2.35542409e-01, -1.79331587e-01,
        -1.09769794e-02,  7.97341291e-01,  1.65793169e-01],
       [-9.89949100e-02,  5.30159764e-03,  5.46943678e-02,
         1.96390116e-01,  8.79771455e-01,  2.26210031e-02],
       [ 8.87586153e-02,  2.53719316e-02,  5.46943678e-02,
         2.39591594e-01,  8.12834979e-01,  4.26371272e-01],
       [ 1.44804444e-01,  5.54774324e-02,  1.20221635e-01,
         3.00073664e-01,  1.08395111e+00,  3.03431249e-01],
       [ 2.84919015e-01,  5.54774324e-02,  1.20221635e-01,
         2.82793073e-01,  1.21901769e+00,  5.12357880e-01],
       [-2.33330416e-02,  1.35758768e-01,  1.48304750e-01,
         3.25994551e-01,  8.65728838e-01,  1.62163285e-01],
       [ 1.44804444e-01,  1.35758768e-01,  1.483

In [17]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=10)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(127, 6)
(32, 6)
(127, 1)
(32, 1)


In [22]:
model = KNeighborsClassifier(n_neighbors=int(np.sqrt(len(x_train))), metric='euclidean')

In [23]:
model.fit(x_train,y_train)

  return self._fit(X, y)


In [25]:
y_pred = model.predict(x_test)
print(list(zip(y_test,y_pred)))

[('Species', 2)]


In [29]:
cfm = confusion_matrix(y_test,y_pred)
print(cfm)

print("Classification report")
print(classification_report(y_test,y_pred))

acc = accuracy_score(y_test,y_pred)
print("Accuracy score : ", acc)

[[6 0 0 0 0 0 0]
 [0 1 1 0 2 0 0]
 [0 0 8 0 1 0 0]
 [0 0 2 1 0 0 0]
 [0 0 5 0 0 0 0]
 [0 0 0 0 0 3 0]
 [0 0 2 0 0 0 0]]
Classification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      0.25      0.40         4
           2       0.44      0.89      0.59         9
           3       1.00      0.33      0.50         3
           4       0.00      0.00      0.00         5
           5       1.00      1.00      1.00         3
           6       0.00      0.00      0.00         2

    accuracy                           0.59        32
   macro avg       0.63      0.50      0.50        32
weighted avg       0.62      0.59      0.54        32

Accuracy score :  0.59375


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
