### IMPORT LIBRARIES - SGD Classifier

In [1]:
import os
from tensorflow.keras.preprocessing.image import DirectoryIterator
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import SGDClassifier


### GET DATA AND TRAIN/SPLIT IT

In [2]:
def get_data(val_split = False, test_size = 0.3):
    '''This function returns the '''
    path = '../catchafish/data'
    batch_size = 100000
    
    dir_iterator = DirectoryIterator(directory = path, 
                                     image_data_generator = None, 
                                     target_size = (32, 32), 
                                     batch_size = batch_size,
                                     shuffle = False, 
                                     dtype = int)
    
    X = dir_iterator[0][0]
    y = dir_iterator.labels
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
    
    if val_split:
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.3)
        return X_train, X_val, X_test, y_train, y_val, y_test
    else:
        return X_train, X_test, y_train, y_test

In [15]:
X_train, X_test, y_train, y_test = get_data()

Found 27682 images belonging to 9 classes.


### CHECK SHAPES


In [16]:
X_train.shape

(19377, 32, 32, 3)

In [17]:
y_train.shape

(19377,)

In [18]:
y_train

array([4, 4, 4, ..., 4, 4, 4], dtype=int32)

In [19]:
y_test.shape

(8305,)

In [20]:
X_test.shape

(8305, 32, 32, 3)

### RESHAPE X_TRAIN and X_TEST

SGD has to be fitted with an array X of shape (n_samples, n_features) holding the training samples

In [21]:
X_train_reshaped = np.reshape(X_train, (X_train.shape[0], 3072)) # (32*32*3 = 3072)

In [22]:
X_train_reshaped.shape

(19377, 3072)

and an array y of shape (n_samples,) holding the target values (class labels) for the training samples

In [23]:
y_train.shape

(19377,)

In [24]:
X_test_reshaped = np.reshape(X_test, (X_test.shape[0], 3072))

In [25]:
X_test_reshaped.shape

(8305, 3072)

### SCALING

In [26]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [27]:

clf = make_pipeline(StandardScaler(), SGDClassifier(loss="hinge", penalty="l2", n_jobs=-1, n_iter_no_change=20, max_iter=200))
clf.fit(X_train_reshaped,y_train)




Pipeline(memory=None,
     steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('sgdclassifier', SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', ma...m_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False))])

In [28]:
clf.predict(X_test_reshaped)



array([4, 4, 4, ..., 4, 4, 4], dtype=int32)

### MODEL INSTANCING

### FIT THE MODEL

### SCORE THE MODEL


In [29]:
clf.score(X_test_reshaped, y_test)



0.9875978326309452

### Mapping function species to label number


Classes à garder dans votre dataset en local :
01, 02, 03, 04, 05, 07, 08, 09, 10, 16



Classes à supprimer du coup :
06, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23

In [None]:
def get_name(key):
    fish = {0 : ('fish_01','Dascyllus reticulatus '), 
            1 : ('fish_02', 'Plectroglyphidodon dickii'),
            2 : ('fish_03', 'Chromis chrysura'),
            3 : ('fish_04', 'Amphiprion clarkii'),
            4 : ('fish_05', 'Chaetodon lunulatus'),
            5 : ('fish_07', 'Myripristis kuntee'),
            6 : ('fish_08', 'Acanthurus nigrofuscus'),
            7 : ('fish_09', 'Hemigymnus fasciatus'),
            8 : ('fish_10', 'Neoniphon sammara'),
            9 : ('fish_16', 'Lutjanus fulvus ')}
    liste = 

In [49]:
def get_name(key):
    fish = {0 : ('fish_01','Dascyllus reticulatus '), 
            1 : ('fish_02', 'Plectroglyphidodon dickii'),
            2 : ('fish_03', 'Chromis chrysura'),
            3 : ('fish_04', 'Amphiprion clarkii'),
            4 : ('fish_05', 'Chaetodon lunulatus'),
            5 : ('fish_07', 'Myripristis kuntee'),
            6 : ('fish_08', 'Acanthurus nigrofuscus'),
            7 : ('fish_09', 'Hemigymnus fasciatus'),
            8 : ('fish_10', 'Neoniphon sammara'),
            9 : ('fish_16', 'Lutjanus fulvus ')}
    if key in fish:
        return fish.get(key)
        

In [50]:
np.unique(y_test)

array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int32)

In [64]:
get_name(5)


('fish_07', 'Myripristis kuntee')

In [63]:
y_test.apply(get_name)

AttributeError: 'numpy.ndarray' object has no attribute 'apply'