<a href="https://colab.research.google.com/github/briggsrr/BST-Movie-Sorter/blob/master/Clothing_Image_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




--- 

<h1> Loading Packages </h1>


In [3]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import cv2
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input, InputLayer, Flatten
from keras.models import Sequential, Model
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.callbacks import EarlyStopping
from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
import random
from sklearn.metrics import accuracy_score
from tqdm import tqdm # progress bar thing
%matplotlib inline

In [5]:
#Each (28 pixels*28 pixels) image can be viewed as a 28*28 2-dimensional matrix, where
#each element in this matrix is an integer within 0~255.
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot']
class_names_label = {class_name: i for i, class_name in enumerate(class_names)}
ISIZE = (28,28)
print(class_names_label)


{'T-shirt/top': 0, 'Trouser': 1, 'Pullover': 2, 'Dress': 3, 'Coat': 4, 'Sandal': 5, 'Shirt': 6, 'Sneaker': 7, 'Bag': 8, 'Ankle Boot': 9}



---
<h1> Data Processing</h1>

In [6]:
def load(path):
    output = []
    images = []
    labels = []

    print(f"Loading {path}")

    #iterate through each folder
    for folder in os.listdir(path):
        label = folder[0]
        if(label == 1):
          return images, labels

        #iterate through each image with progress bar
        for file in tqdm(os.listdir(os.path.join(path, folder))): 
            img_p = os.path.join(os.path.join(path, folder), file)

            image = cv2.imread(img_p)
            images.append(image)
            labels.append(label)

    images = np.array(images, dtype = 'float32')
    labels = np.array(labels, dtype = 'int32') 

    return images, labels

In [None]:
train_path = '/content/drive/MyDrive/hw4_train'
test_path = '/content/drive/MyDrive/hw4_test'

train_images, train_labels = load(train_path)
test_images, test_labels = load(test_path)

#normalize 
train_images = train_images / 255.0 
#test_images = test_images / 255.0

--- 
<h1> Testing Data </h1>

In [None]:
plt.figure()
plt.imshow(train_images[1])
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.show()


---
<h1> Feature Extraction </h1>


In [None]:
fmodel = VGG16(weights='imagenet', include_top=False)
train_features = fmodel.predict(train_images)
test_features = fmodel.predict(test_images)

n_train, x, y, z = train_features.shape
n_test, x, y, z = test_features.shape

---
<h1> Employing Model Ensembles Bagging</h1>

In [None]:

model_count = 5
max_samples = 0.8 # Proporition of samples to use to train for each model
max_samples *= n_train
max_samples = int(max_samples)

models = []
histories =[]

for _ in range(model_count):
    model = keras.Sequential([
      tf.keras.layers.Conv2D(filters=64, kernel_size=(7, 7), input_shape=(x,y,z), activation='relu'),
      tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
      tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
      tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
      tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), activation='relu'),
      tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dropout(.5),
      tf.keras.layers.Dense(units=9, activation='softmax')
    ])
    model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[EarlyStopping(monitor='val_loss', patience=3)])
    models.append(model)

In [None]:
#bagging
for i in range(model_count):
    train_idx = np.random.choice(len(train_features), size = max_samples)
    histories.append(models[i].fit(train_features[train_idx], train_labels[train_idx], batch_size=128, epochs=20, validation_split = 0.2))

In [None]:
#aggregating 
predictions = []
for i in range(model_count):
    prediction = models[i].predict(test_features)
    predictions.append(prediction)

predictions = np.array(predictions)
predictions = predictions.sum(axis = 0)
pred_labels = predictions.argmax(axis=1)

print("Accuracy : {}".format(accuracy_score(test_labels, pred_labels)))