In [4]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
import os
from tensorflow.keras.preprocessing.image import DirectoryIterator
from sklearn.model_selection import train_test_split

In [6]:
def get_data(val_split = False, val_size = 0.3):
    '''Cette fonction parcourt le dossier contenant les images de départ et les renvoie,
    en uniformisant les tailles, sous forme de nd-arrays NumPy. La fonction exécute
    aussi un train_test_split qui distingue un dataset d'entraînement (70% des images)
    et un dataset de test (30% des images). '''
    path = '../catchafish/data'
    batch_size = 100000

    dir_iterator = DirectoryIterator(directory = path,
                                     image_data_generator = None,
                                     target_size = (32, 32),
                                     batch_size = batch_size,
                                     shuffle = False,
                                     dtype = int)

    X = dir_iterator[0][0]
    y = dir_iterator.labels

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)

    if val_split:
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = val_size)
        return X_train, X_val, X_test, y_train, y_val, y_test
    else:
        return X_train, X_test, y_train, y_test

In [7]:
X_train, X_test, y_train, y_test = get_data()

Found 26385 images belonging to 10 classes.


In [8]:
def get_data_test():
    '''Cette  '''
    path = '../catchafish/data_test'
    batch_size = 1000

    dir_iterator = DirectoryIterator(directory = path,
                                     image_data_generator = None,
                                     target_size = (32, 32),
                                     batch_size = batch_size,
                                     shuffle = False,
                                     dtype = int)

    X_test = dir_iterator[0][0]
    y_test = dir_iterator.labels
    return X_test, y_test

In [9]:
X_test_last, y_test_last = get_data_test()

Found 253 images belonging to 10 classes.


In [34]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(n_jobs = -1, 
                            multi_class = 'multinomial', 
                            solver = 'lbfgs')

logreg.fit(X_train.reshape(18469, 32 * 32 * 3), y_train)

LogisticRegression(multi_class='multinomial', n_jobs=-1)

In [35]:
logreg.score(X_test.reshape(7916, 32 * 32 * 3), y_test)

0.9243304699343102

In [36]:
logreg.score(X_test_last.reshape(253, 32 * 32 * 3), y_test_last)

0.12648221343873517

In [37]:
from sklearn.metrics import classification_report

print(classification_report(y_test_last, logreg.predict(X_test_last.reshape(253, 32 * 32 * 3))))

              precision    recall  f1-score   support

           0       0.07      0.14      0.10        21
           1       0.08      0.10      0.09        21
           2       0.00      0.00      0.00        20
           3       0.10      0.45      0.17        20
           4       0.22      0.16      0.19        25
           5       0.35      0.17      0.23        35
           6       0.00      0.00      0.00        34
           7       1.00      0.05      0.10        37
           8       0.40      0.10      0.16        20
           9       0.08      0.20      0.11        20

    accuracy                           0.13       253
   macro avg       0.23      0.14      0.11       253
weighted avg       0.28      0.13      0.12       253

