In [1]:
import pandas as pd
from tensorflow.keras.models import load_model
import numpy as np

from sklearn.preprocessing import StandardScaler

In [2]:
fish_features = {
    'species':['ck',
               'co',
               'cm',
               'pink',
               'so',
               'stl',
               'ct',
               'rbt'],
    'eye_size':['large',
                'large',
                'medium',
                'medium',
                'very large',
                'small',
                'small',
                'small'],
    'snout_shape':['pointy',
                   'short and blunt',
                   'NA',
                   'NA',
                   'NA',
                   'short and rounded',
                   'long and pointy',
                   'short and rounded'],
    'parr_marks':['slightly faded',
                  'slightly faded',
                  'faded',
                  'NA',
                  'slightly faded',
                  'faded',
                  'faded',
                  'NA'],
    'parr_marks_length':['long',
                         'long',
                         'short',
                         'NA',
                         'irregular',
                         'short',
                         'short',
                         'short'],
    'spotting_density':['medium',
                        'medium',
                        'medium',
                        'NA',
                        'NA',
                        'high',
                        'high',
                        'high'],
    'fin_type':['anal fin',
                'anal fin',
                'caudal fin',
                'caudal fin',
                'caudal fin',
                'caudal fin',
                'caudal fin',
                'caudal fin'],
    'parr_marks_spacing':['wider than interspaces',
                          'narrower than interspaces',
                          'NA',
                          'half',
                          'variable',
                          'variable',
                          'variable',
                          'NA'],
    'spotting_characteristic':['circle',
                               'circle',
                               'variable',
                               'NA',
                               'row',
                               'irregular',
                               'irregular',
                               'NA']

}

unique = pd.DataFrame(fish_features)

In [3]:
df = pd.read_csv("C:/Users/hp/Desktop/MDS work/Capstone/Bottlenecks_MDS_Capstone/species_prediction_model/data/field_big.csv")

In [4]:
full_processed = df.merge(unique, how='left',on='species')

In [5]:
sampled = full_processed.sample(n=10)

In [6]:
sampled = sampled.replace(np.nan, None)

In [7]:
def one_hot_encoding(df,col,prefix):
  df = df.copy()
  dummies = pd.get_dummies(df[col],prefix=col,dtype='int')
  df = pd.concat([df,dummies],axis=1)
  df = df.drop(col,axis=1)

  return df

In [8]:
for col in sampled.columns:
  if col != 'species' and col != 'fork_length_mm' and col != 'water_temp_start':
    print(col)
    sampled = one_hot_encoding(sampled,col,col)

watershed
river
site
method
local
eye_size
snout_shape
parr_marks
parr_marks_length
spotting_density
fin_type
parr_marks_spacing
spotting_characteristic


In [9]:
sampled_X = sampled.drop(["water_temp_start", "species"], axis = 1)

In [10]:
scaler = StandardScaler()
sampled_X['fork_length_mm'] = scaler.fit_transform(sampled_X[['fork_length_mm']])

In [11]:
sampled_X

Unnamed: 0,fork_length_mm,watershed_cowichan,watershed_englishman,watershed_nanaimo,watershed_puntledge,river_center creek,river_cowichan,river_nanaimo,river_puntledge,site_center creek,...,parr_marks_length_short,spotting_density_high,spotting_density_medium,fin_type_anal fin,fin_type_caudal fin,parr_marks_spacing_narrower than interspaces,parr_marks_spacing_variable,parr_marks_spacing_wider than interspaces,spotting_characteristic_circle,spotting_characteristic_irregular
16537,2.870987,0,0,0,1,0,0,0,1,0,...,1,1,0,0,1,0,1,0,0,1
57162,-0.411391,1,0,0,0,0,1,0,0,0,...,0,0,1,1,0,0,0,1,1,0
16079,-0.301979,0,0,0,1,0,0,0,1,0,...,0,0,1,1,0,0,0,1,1,0
46030,-0.433274,0,0,1,0,0,0,1,0,0,...,0,0,1,1,0,0,0,1,1,0
9517,-0.455156,0,1,0,0,1,0,0,0,1,...,0,0,1,1,0,1,0,0,1,0
39792,-0.652099,1,0,0,0,0,1,0,0,0,...,0,0,1,1,0,0,0,1,1,0
62109,0.354497,0,1,0,0,1,0,0,0,1,...,0,0,1,1,0,1,0,0,1,0
11953,-0.477039,1,0,0,0,0,1,0,0,0,...,0,0,1,1,0,0,0,1,1,0
13231,-0.564569,1,0,0,0,0,1,0,0,0,...,0,0,1,1,0,0,0,1,1,0
53526,0.070024,1,0,0,0,0,1,0,0,0,...,0,0,1,1,0,1,0,0,1,0


Sampled_X is the df we test on

In [12]:
required_cols = ['fork_length_mm', 'watershed_cowichan', 'watershed_englishman',
       'watershed_nanaimo', 'watershed_puntledge', 'river_center creek',
       'river_cowichan', 'river_englishman', 'river_haslam creek',
       'river_nanaimo', 'river_puntledge', 'river_shelly creek', 'site_70.2',
       'site_above tsolum', 'site_cedar bridge', 'site_center creek',
       'site_condensory bridge', 'site_cow bay', 'site_hamilton ave',
       'site_jack point', 'site_little mexico', 'site_living forest',
       'site_mainstem fence', 'site_martindale rd', 'site_newcastle',
       'site_side channel', 'site_skutz', 'site_t-bone road',
       'site_tsolum confluence', 'site_vimy pool', 'method_beach seine',
       'method_g-trap', 'method_rst', 'method_smolt trap', 'local_in-river',
       'local_marine', 'eye_size_large', 'eye_size_medium', 'eye_size_small',
       'snout_shape_NA', 'snout_shape_long and pointy', 'snout_shape_pointy',
       'snout_shape_short and blunt', 'snout_shape_short and rounded',
       'parr_marks_NA', 'parr_marks_faded', 'parr_marks_slightly faded',
       'parr_marks_length_long', 'parr_marks_length_short',
       'spotting_density_high', 'spotting_density_medium', 'fin_type_anal fin',
       'fin_type_caudal fin', 'parr_marks_spacing_NA',
       'parr_marks_spacing_narrower than interspaces',
       'parr_marks_spacing_variable',
       'parr_marks_spacing_wider than interspaces',
       'spotting_characteristic_NA', 'spotting_characteristic_circle',
       'spotting_characteristic_irregular',
       'spotting_characteristic_variable']

In [13]:
for col in required_cols:
    if col not in sampled_X.columns:
        sampled_X[col] = 0

sampled_X = sampled_X[required_cols]

Prediction - DL model

In [14]:
dl_model = load_model('../model/dl_riya_new.h5')



In [15]:
pred = dl_model.predict(sampled_X)
print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[[1.4839918e-04 1.6293728e-03 2.1519612e-04 9.8940533e-01 3.8365563e-04
  8.2179606e-03]
 [9.7813946e-01 1.1984156e-03 1.6287616e-02 1.1570280e-03 1.6349318e-03
  1.5825643e-03]
 [9.8942274e-01 1.7293497e-03 5.0455672e-03 1.6107832e-03 1.5979180e-03
  5.9354544e-04]
 [9.9327952e-01 8.9539343e-04 2.9229168e-03 1.1256774e-03 1.2858394e-03
  4.9077132e-04]
 [1.1859083e-03 9.1783330e-04 9.9454391e-01 2.2275990e-03 6.2615098e-04
  4.9858110e-04]
 [9.8290724e-01 9.6524775e-04 1.2776569e-02 9.4536931e-04 1.3251822e-03
  1.0803759e-03]
 [8.3615864e-04 1.0119668e-03 9.9326479e-01 3.0788104e-03 7.6655456e-04
  1.0416886e-03]
 [9.9487585e-01 7.3249155e-04 2.3197574e-03 5.4305396e-04 1.0206222e-03
  5.0821371e-04]
 [9.9525815e-01 6.8281859e-04 2.1517519e-03 5.0760549e-04 9.4786368e-04
  4.5188013e-04]
 [4.7278120e-03 1.8425673e-03 9.8725235e-01 2.3410362e-03 1.0160225e-03
  2.8202750e-03]]


In [16]:
prediction = pd.DataFrame(pred, columns=['ck', 'cm', 'co', 'ct', 'rbt', 'stl'])
prediction.head()

Unnamed: 0,ck,cm,co,ct,rbt,stl
0,0.000148,0.001629,0.000215,0.989405,0.000384,0.008218
1,0.978139,0.001198,0.016288,0.001157,0.001635,0.001583
2,0.989423,0.001729,0.005046,0.001611,0.001598,0.000594
3,0.99328,0.000895,0.002923,0.001126,0.001286,0.000491
4,0.001186,0.000918,0.994544,0.002228,0.000626,0.000499


In [17]:
max_val = []
max_label = []
for i in range(prediction.shape[0]):
    max_val.append(prediction.iloc[i].max())
    max_label.append(prediction.iloc[i].idxmax())

In [18]:
final_pred_df = pd.DataFrame({
    'species' : sampled["species"],
    'predicted_label': max_label,
    'confidence': max_val
})
final_pred_df

Unnamed: 0,species,predicted_label,confidence
16537,ct,ct,0.989405
57162,ck,ck,0.978139
16079,ck,ck,0.989423
46030,ck,ck,0.99328
9517,co,co,0.994544
39792,ck,ck,0.982907
62109,co,co,0.993265
11953,ck,ck,0.994876
13231,ck,ck,0.995258
53526,co,co,0.987252
