# Imports

In [13]:
from win10toast import ToastNotifier
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
import os

toaster = ToastNotifier()
# toaster.show_toast("Test",
#                    duration=10)

# Load and Preprocess data

In [14]:
# Load data
# Set up directories
train_dir = 'dataset/train'
test_dir = 'dataset/test'




# Set Up Data Generators

In [None]:
# Data generators
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode=None,  # For predictions
    shuffle=False
)



Found 0 images belonging to 0 classes.


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'dataset/val'

# Define feature extraction

In [8]:
def extract_features(generator, model):
    features, labels = [], []
    for X_batch, y_batch in generator:
        features_batch = model.predict(X_batch)  # Extract features
        features.append(features_batch)
        labels.append(y_batch)
        if len(features) * generator.batch_size >= generator.samples:
            break
    return np.vstack(features), np.vstack(labels)


Found 5 validated image filenames belonging to 5 classes.
Found 1 validated image filenames belonging to 1 classes.
Found 3 validated image filenames.


# Load Pre-trained model 

In [None]:
# Load MobileNet for feature extraction
base_model = MobileNet(weights='imagenet', include_top=False, pooling='avg')


In [None]:
# Extract features from train and validation data
X_train, y_train = extract_features(train_generator, base_model)
X_val, y_val = extract_features(val_generator, base_model)


# Pipeline building

In [9]:
# Create the pipeline
pipe = Pipeline([
    ('classifier', LogisticRegression(max_iter=1000))
])

# Fit the pipeline
pipe.fit(X_train, np.argmax(y_train, axis=1))

# Evaluate the pipeline
accuracy = pipe.score(X_val, np.argmax(y_val, axis=1))
print(f"Validation Accuracy: {accuracy:.4f}")


ValueError: Found array with dim 4. RandomForestClassifier expected <= 2.

# Grid Search

# Results and predictions

In [None]:
# Extract features from test data
X_test, _ = extract_features(test_generator, base_model)

# Make predictions
predictions = pipe.predict(X_test)

# Map predictions to class names
class_indices = {v: k for k, v in train_generator.class_indices.items()}
predicted_classes = [class_indices[p] for p in predictions]

print(predicted_classes[:10])  # Print the first 10 predicted classes

