In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import VGG16
from tensorflow.keras.datasets import mnist
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img
from sklearn.model_selection import train_test_split
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
# Genetic Algorithm for Feature Selection
class GeneticAlgorithm:
    def __init__(self, classifier, features, labels, population_size=50, generations=10, crossover_prob=0.8, mutation_prob=0.1):
        self.classifier = classifier
        self.features = features
        self.labels = labels
        self.population_size = population_size
        self.generations = generations
        self.crossover_prob = crossover_prob
        self.mutation_prob = mutation_prob

    def initialize_population(self):
        return np.random.choice([0, 1], size=(self.population_size, self.features.shape[1]))

    def evaluate_population(self, population):
        accuracies = []
        for chromosome in population:
            selected_features = np.where(chromosome == 1)[0]
            if len(selected_features) == 0:
                accuracies.append(0.0)
            else:
                self.classifier.fit(self.features[:, selected_features], self.labels)
                predictions = self.classifier.predict(self.features[:, selected_features])
                accuracies.append(accuracy_score(self.labels, predictions))
        return accuracies

    def select_parents(self, population, accuracies):
        num_parents = int(self.population_size * 0.2)
        parents_indices = []
        for _ in range(num_parents):
            tournament_indices = np.random.choice(self.population_size, size=5, replace=False)
            selected_index = tournament_indices[np.argmax([accuracies[i] for i in tournament_indices])]
            parents_indices.append(selected_index)
        return population[parents_indices]

    def crossover(self, parents):
        children = []
        for i in range(0, len(parents), 2):
            parent1, parent2 = parents[i], parents[i + 1]
            crossover_point = np.random.randint(1, len(parent1) - 1)
            child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
            child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
            children.extend([child1, child2])
        return np.array(children)

    def mutate(self, population):
        for i in range(len(population)):
            for j in range(len(population[i])):
                if np.random.rand() < self.mutation_prob:
                    population[i, j] = 1 - population[i, j]
        return population

    def run(self):
        population = self.initialize_population()

        for generation in range(self.generations):
            accuracies = self.evaluate_population(population)
            parents = self.select_parents(population, accuracies)
            offspring = self.crossover(parents)
            mutated_offspring = self.mutate(offspring)
            population = np.vstack((population, mutated_offspring))
            population = population[np.argsort(accuracies)[-self.population_size:]]

        best_chromosome = population[np.argmax(accuracies)]
        selected_features = np.where(best_chromosome == 1)[0]
        return selected_features



In [3]:

train_dir = 'train'
test_dir = 'test'
target_shape=(64,64,3)
datagen = ImageDataGenerator(rescale=1./255)  

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(64,64),
    batch_size=28709 ,
    class_mode='categorical',
    
)

test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(64,64),
    batch_size=7178,
    class_mode='categorical',
    
)



Found 28709 images belonging to 7 classes.
Found 7178 images belonging to 7 classes.


In [4]:
x_train,y_train=train_generator.next()
x_test,y_test=test_generator.next()
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)


In [5]:
print(x_train.shape)
print(x_test.shape)

print(y_train.shape)
print(y_test.shape)

(28709, 64, 64, 3)
(7178, 64, 64, 3)
(28709,)
(7178,)


In [6]:
target_shape=(64,64,3)
# Use VGG16 to get features
base_model = VGG16(weights='imagenet', include_top=False, input_shape=target_shape)
x_train_features = base_model.predict(x_train)
x_test_features = base_model.predict(x_test)

x_train_features_flat = x_train_features.reshape(x_train_features.shape[0], -1)
x_test_features_flat = x_test_features.reshape(x_test_features.shape[0], -1)




In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_features_flat = scaler.fit_transform(x_train_features_flat)
x_test_features_flat = scaler.transform(x_test_features_flat)


In [8]:
from sklearn.tree import DecisionTreeClassifier

# Genetic Algorithm for Feature Selection
classifier_dt = DecisionTreeClassifier()

genetic_algorithm = GeneticAlgorithm(classifier_dt, x_train_features_flat, y_train, population_size=10, generations=5)
selected_features = genetic_algorithm.run()

In [9]:
print(selected_features)
print(selected_features.shape)

[   4    8    9 ... 2044 2046 2047]
(1064,)


In [10]:

classifier_dt.fit(x_train_features_flat[:, selected_features], y_train)
x_test_features_selected = x_test_features_flat[:, selected_features]
y_test_pred = classifier_dt.predict(x_test_features_selected)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f'Test accuracy: {test_accuracy * 100:.2f}%')

Test accuracy: 32.46%


In [13]:
from sklearn.metrics import classification_report

y_train_pred = classifier_dt.predict(x_train_features_flat[:, selected_features])
y_test_pred


array([5, 6, 5, ..., 2, 4, 3], dtype=int64)

In [15]:
print("Classification Report on Test Set:")
print(classification_report(y_test, y_test_pred))

Classification Report on Test Set:
              precision    recall  f1-score   support

           0       0.24      0.25      0.24       958
           1       0.28      0.34      0.31       111
           2       0.29      0.28      0.29      1024
           3       0.40      0.38      0.39      1774
           4       0.28      0.29      0.28      1233
           5       0.27      0.26      0.27      1247
           6       0.46      0.49      0.48       831

    accuracy                           0.32      7178
   macro avg       0.32      0.33      0.32      7178
weighted avg       0.33      0.32      0.32      7178



In [16]:
print("Classification Report on Training Set:")
print(classification_report(y_train, y_train_pred))

Classification Report on Training Set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3995
           1       1.00      1.00      1.00       436
           2       1.00      1.00      1.00      4097
           3       1.00      1.00      1.00      7215
           4       1.00      1.00      1.00      4965
           5       1.00      1.00      1.00      4830
           6       1.00      1.00      1.00      3171

    accuracy                           1.00     28709
   macro avg       1.00      1.00      1.00     28709
weighted avg       1.00      1.00      1.00     28709

