# Imports

In [6]:
from win10toast import ToastNotifier
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
toaster = ToastNotifier()
# toaster.show_toast("Test",
#                    duration=10)

# Load and Preprocess data

In [7]:
# Load the dataframes
train_df = pd.read_csv('dataset/Training_set.csv')
test_df = pd.read_csv('dataset/Testing_set.csv')
# Fill in null values for test data frame
test_df['label'] = ''

# Use 20% of the data
train_df = train_df.sample(frac=0.2, random_state=42)
test_df = test_df.sample(frac=0.2, random_state=42)

# Split training data
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)


# Set Up Data Generators

In [8]:
# Data generators
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Data generators for training, validation, and testing
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='dataset/train',
    x_col='filename',
    y_col='label',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

val_generator = train_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory='dataset/train',
    x_col='filename',
    y_col='label',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='dataset/test',
    x_col='filename',
    y_col='label',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)


Found 1040 validated image filenames belonging to 75 classes.
Found 260 validated image filenames belonging to 75 classes.
Found 557 validated image filenames belonging to 1 classes.


# Define feature extraction

In [9]:
def extract_features(generator, model):
    features, labels = [], []
    for X_batch, y_batch in generator:
        features_batch = model.predict(X_batch)  # Extract features
        features.append(features_batch)
        if y_batch is not None:
            labels.append(y_batch)
        if len(features) * generator.batch_size >= generator.samples:
            break
    return np.vstack(features), np.vstack(labels)


# Load Pre-trained model 

In [10]:
# Load MobileNet for feature extraction
base_model = MobileNet(weights='imagenet', include_top=False, pooling='avg')


  base_model = MobileNet(weights='imagenet', include_top=False, pooling='avg')


In [11]:
# Extract features from train and validation data
X_train, y_train = extract_features(train_generator, base_model)
X_val, y_val = extract_features(val_generator, base_model)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 944ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

# Pipeline building

In [12]:
# Create the pipeline
pipe = Pipeline([
    ('classifier', LogisticRegression(max_iter=1000))
])

# Fit the pipeline
pipe.fit(X_train, np.argmax(y_train, axis=1))

# Evaluate the pipeline
accuracy = pipe.score(X_val, np.argmax(y_val, axis=1))

pipe.predict(X_val)
print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7692


# Grid Search

# Results and predictions

In [13]:
# Extract features from test data
X_test, _= extract_features(test_generator, base_model)

# Make predictions
predictions = pipe.predict(X_test)

# Map predictions to class names
class_indices = {v: k for k, v in train_generator.class_indices.items()}
print(class_indices)
predicted_classes = [class_indices[p] for p in predictions]

print(predicted_classes[:10])  # Print the first 10 predicted classes



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 