In [1]:
from google.colab import drive
import zipfile

# Mount Google Drive if the zip file is located there
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Define the path to the zip file
zip_file_path = '/content/drive/MyDrive/project/dataset.zip'  # Update the path accordingly

# Define the extraction directory
extract_to = '/content/drive/MyDrive/project'  # You can change this to your desired extraction directory

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f'Files extracted to {extract_to}')

In [4]:
import os
import numpy as np
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

def load_data(image_dir, labels_file, target_size=(299, 299)):
    with open(labels_file, 'r') as f:
        labels = [int(line.strip()) - 1 for line in f.readlines()]  

    image_files = sorted(os.listdir(image_dir))
    images = []
    image_labels = []

    for img_file, label in zip(image_files, labels):
        img_path = os.path.join(image_dir, img_file)
        try:
            img = Image.open(img_path).resize(target_size)
            images.append(np.array(img))
            image_labels.append(label)
        except (UnidentifiedImageError, IOError):
            print(f"Skipping file {img_path}, not a valid image.")

    images = np.array(images)
    image_labels = np.array(image_labels)
    return images, image_labels

def normalize_images(images):
    return (images / 127.5) - 1


train_dir = '/content/drive/MyDrive/project/dataset/training'
train_labels_file = '/content/drive/MyDrive/project/dataset/training_labels.txt'
test_dir = '/content/drive/MyDrive/project/dataset/test'
test_labels_file = '/content/drive/MyDrive/project/dataset/test_labels.txt'


train_images, train_labels = load_data(train_dir, train_labels_file)
test_images, test_labels = load_data(test_dir, test_labels_file)


train_images = normalize_images(train_images)
test_images = normalize_images(test_images)


train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42, stratify=train_labels)


n_samples_train, width, height, n_channels = train_images.shape
train_images_flat = train_images.reshape(n_samples_train, -1)

n_samples_val = val_images.shape[0]
val_images_flat = val_images.reshape(n_samples_val, -1)

n_samples_test = test_images.shape[0]
test_images_flat = test_images.reshape(n_samples_test, -1)

ros = RandomOverSampler(random_state=42)
train_images_resampled, train_labels_resampled = ros.fit_resample(train_images_flat, train_labels)


train_images_resampled = train_images_resampled.reshape(-1, width, height, n_channels)

unique_labels_train, counts_train = np.unique(train_labels_resampled, return_counts=True)
print("Oversampled Training Class Counts:")
for label, count in zip(unique_labels_train, counts_train):
    print(f"Class {label + 1}: {count}")


unique_labels_val, counts_val = np.unique(val_labels, return_counts=True)
print("Validation Class Counts:")
for label, count in zip(unique_labels_val, counts_val):
    print(f"Class {label + 1}: {count}")


unique_labels_test, counts_test = np.unique(test_labels, return_counts=True)
print("Test Class Counts:")
for label, count in zip(unique_labels_test, counts_test):
    print(f"Class {label + 1}: {count}")


Skipping file /content/drive/MyDrive/project/dataset/test/.DS_Store, not a valid image.
Oversampled Training Class Counts:
Class 1: 70
Class 2: 70
Class 3: 70
Validation Class Counts:
Class 1: 12
Class 2: 18
Class 3: 8
Test Class Counts:
Class 1: 47
Class 2: 57
Class 3: 39


In [6]:
import os
import numpy as np
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [7]:

base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))


x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(3, activation='softmax')(x)  


model = Model(inputs=base_model.input, outputs=predictions)


for layer in base_model.layers:
    layer.trainable = False


model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


history = model.fit(train_images_resampled, train_labels_resampled,
                    epochs=10,
                    batch_size=32,
                    validation_data=(val_images, val_labels))


for layer in base_model.layers[-30:]:  
    layer.trainable = True


model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


fine_tune_history = model.fit(train_images_resampled, train_labels_resampled,
                              epochs=10,
                              batch_size=32,
                              validation_data=(val_images, val_labels))


test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f'Test loss: {test_loss}')
print(f'Test accuracy: {test_accuracy}')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 2.325242519378662
Test accuracy: 0.4825174808502197


In [10]:
from sklearn.metrics import classification_report, confusion_matrix

test_predictions = model.predict(test_images)


test_pred_labels = np.argmax(test_predictions, axis=1)


print("Classification Report:")
print(classification_report(test_labels, test_pred_labels, target_names=['Class 1', 'Class 2', 'Class 3']))


print("Confusion Matrix:")
print(confusion_matrix(test_labels, test_pred_labels))

Classification Report:
              precision    recall  f1-score   support

     Class 1       0.28      0.26      0.27        47
     Class 2       0.51      0.68      0.59        57
     Class 3       0.75      0.46      0.57        39

    accuracy                           0.48       143
   macro avg       0.51      0.47      0.47       143
weighted avg       0.50      0.48      0.48       143

Confusion Matrix:
[[12 33  2]
 [14 39  4]
 [17  4 18]]
