In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

df = pd.read_csv('Fish.csv')
print(df.head())

  Species  Weight  Length1  Length2  Length3   Height   Width
0   Bream   242.0     23.2     25.4     30.0  11.5200  4.0200
1   Bream   290.0     24.0     26.3     31.2  12.4800  4.3056
2   Bream   340.0     23.9     26.5     31.1  12.3778  4.6961
3   Bream   363.0     26.3     29.0     33.5  12.7300  4.4555
4   Bream   430.0     26.5     29.0     34.0  12.4440  5.1340


In [40]:
fish_input = df[['Weight', 'Length2', 'Length3', 'Height', 'Width']]
fish_target = df['Species']

train_input, test_input, train_target, test_target = train_test_split(fish_input, fish_target, random_state=42)

ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)

In [41]:
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(train_scaled, train_target)
print(kn.score(train_scaled, train_target))
print(kn.score(test_scaled, test_target))

0.8907563025210085
0.85


In [42]:
proba = kn.predict_proba(test_scaled[:5])
print(np.around(proba, decimals=2))

[[0.   0.   1.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   1.   0.  ]
 [0.   0.   0.   1.   0.   0.   0.  ]
 [0.   0.   0.67 0.   0.33 0.   0.  ]
 [0.   0.   0.67 0.   0.33 0.   0.  ]]


In [43]:
distances, indexes = kn.kneighbors(test_scaled[3:4])
print(train_target.iloc[indexes[0]])

52     Roach
106    Perch
103    Perch
Name: Species, dtype: str


In [44]:
bream_smelt_indexes = (train_target == 'Bream') | (train_target == 'Smelt')
train_bream_smelt = train_scaled[bream_smelt_indexes]
target_bream_smelt = train_target[bream_smelt_indexes]

In [47]:
lr = LogisticRegression()
lr.fit(train_bream_smelt, target_bream_smelt)

print(lr.predict(train_bream_smelt[:5]))
print(lr.predict_proba(train_bream_smelt[:5]))

['Bream' 'Smelt' 'Bream' 'Bream' 'Bream']
[[0.99760007 0.00239993]
 [0.02737325 0.97262675]
 [0.99486386 0.00513614]
 [0.98585047 0.01414953]
 [0.99767419 0.00232581]]
1.0


In [52]:
lr.fit(train_scaled, train_target)

print(lr.predict(train_scaled[:5]))
np.set_printoptions(suppress=True)
print(lr.predict_proba(train_scaled[:5]))

['Bream' 'Pike' 'Smelt' 'Perch' 'Parkki']
[[0.95424367 0.01035935 0.01155332 0.00211933 0.00432481 0.00000033
  0.01739919]
 [0.00239254 0.00006734 0.04364811 0.94115005 0.00837806 0.00001201
  0.00435189]
 [0.00010775 0.04645398 0.10498823 0.00092367 0.06095211 0.78606941
  0.00050486]
 [0.00396331 0.02875189 0.6711239  0.0126273  0.25799972 0.01161166
  0.01392222]
 [0.07369858 0.38401474 0.20886715 0.00994068 0.2868819  0.01608689
  0.02051005]]
