### IMPORT LIBRARIES

In [1]:
import os
from tensorflow.keras.preprocessing.image import DirectoryIterator
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression

### GET DATA AND TRAIN/SPLIT IT

In [48]:
def get_data(val_split = False, test_size = 0.3):
    '''This function returns the '''
    path = '../catchafish/data'
    batch_size = 100000
    
    dir_iterator = DirectoryIterator(directory = path, 
                                     image_data_generator = None, 
                                     target_size = (32, 32), 
                                     batch_size = batch_size,
                                     shuffle = False, 
                                     dtype = int)
    
    X = dir_iterator[0][0]
    y = dir_iterator.labels
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
    
    if val_split:
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.3)
        return X_train, X_val, X_test, y_train, y_val, y_test
    else:
        return X_train, X_test, y_train, y_test

In [49]:
X_train, X_test, y_train, y_test = get_data()

Found 27370 images belonging to 23 classes.


### CHECK SHAPES

In [50]:
X_train.shape

(19159, 32, 32, 3)

In [51]:
y_train.shape

(19159,)

In [52]:
y_test.shape

(8211,)

In [53]:
X_test.shape

(8211, 32, 32, 3)

### RESHAPE X_TRAIN and X_TEST

In [None]:
X_train_reshaped = np.reshape(X_train, (X_train.shape[0], 3072)) # (32*32*3 = 3072)

In [None]:
X_test_reshaped = np.reshape(X_test, (X_test.shape[0], 3072))

### MODEL INSTANCING

In [57]:
logreg = LogisticRegression( n_jobs=-1, multi_class='auto', solver='lbfgs')

### FIT THE MODEL

In [61]:
logreg.fit(X_train_reshaped, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='auto', n_jobs=-1,
          penalty='l2', random_state=None, solver='lbfgs', tol=0.0001,
          verbose=0, warm_start=False)

### SCORE THE MODEL

In [62]:
logreg.score(X_test_reshaped, y_test)

0.8987943003288272