## Comparing SVM and VGG16


In [40]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [41]:
import zipfile
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import LinearSVC
from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

## Data Preprocessing


In [42]:
m_files = os.listdir('/content/drive/MyDrive/fashion-data/men')
w_files = os.listdir('/content/drive/MyDrive/fashion-data/women')
all_files = m_files + w_files

#convert image files into arrays
path = '/content/drive/MyDrive/fashion-data'
def read_img(file,name,pth):
    images = np.zeros((14700))
    for image in file: 
        arr = Image.open(pth+'/'+name+'/'+image) #get img array 
        img = arr.resize((70,70)) #resize for standard sizing
        arr = np.asarray(img) #turn into array
        img.close()
        flatten = arr.flatten() #flatten
        images = np.vstack((images,flatten)) #stack 
    images = np.delete(images, 0, 0)
    return images

#read images from files 
men = read_img(m_files[:500],'men',path)
women = read_img(w_files[:500],'women',path)

#turn into dataframes and add class labels
men = pd.DataFrame(men)
men['label'] = 0
women = pd.DataFrame(women)
women['label'] = 1

#merge and shuffle
fashion = men.append(women,ignore_index=True)
fashion = shuffle(fashion)

#separate dependent and independent variables
X_ = fashion.loc[:, fashion.columns != 'label']
y_ = fashion['label']

#see dataframe
fashion.head()

#split dataset
X_train_, X_test_, y_train_, y_test_ = train_test_split(X_, y_, test_size=0.50, random_state=42)


## 1. Support Vector Machines
- Train a support vector classifier using each of the following kernels:
    - Linear
    - Poly (degree = 2)
    - RBF

- If you encounter any issues with training time or memory issues, then you may use a reduced dataset, but carefully detail why and how you reduced the dataset. Unnecessarily reducing the dataset will result in reduced grades!
- Report your error rates on the testing dataset for the different kernels.


In [50]:
## LINEAR
svm_linear = svm.SVC(kernel='linear')
svm_linear.fit(X_train_,y_train_)
## POLY
svm_poly = svm.SVC(kernel='poly', degree=2)
svm_poly.fit(X_train_,y_train_)
## RBF
svm_rbf = svm.SVC(kernel='rbf')
svm_rbf.fit(X_train_,y_train_)

print("Accuracy for kernel=POLY",svm_poly.score(X_test_,y_test_)*100,'%')
print("Accuracy for kernel=LINEAR",svm_linear.score(X_test_,y_test_)*100,'%')
print("Accuracy for kernel=POLY",svm_rbf.score(X_test_,y_test_)*100,'%')

Accuracy for kernel=POLY 62.2 %
Accuracy for kernel=LINEAR 56.8 %
Accuracy for kernel=POLY 66.0 %


## 2. Deep Neural Networks

I perform transfer learning with VGG16, which is the convolutional neural network that won ImageNet 2014.. The chosen loss function is binary cross entropy, which adds the log probabilities of belonging to each category for each datapoint. It is effective in accurately classifying data because it penalizes smaller probabilities more. Binary cross entropy is an appropriate loss function for our task because we are classifying photos into binary categories of male and female fashion. 


In [44]:
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions
from keras import models
from keras import optimizers
import tensorflow as tf

In [45]:
#prepaare images in bulk for VGG model
def get_features(files,class_,path_):
  features_ = []

  for i in files: 
    ipath = path_+class_+i
    im = load_img(ipath, target_size=(224, 224))
    # convert the image pixels to a numpy array
    im = img_to_array(im)
    # reshape data for the model
    im = im.reshape((1, im.shape[0], im.shape[1], im.shape[2]))
    # prepare the image for the VGG model
    im = preprocess_input(im)

    features = model.predict(im)
    features_.append(features)
  
  return features_

#get features from files
path_ = '/content/drive/MyDrive/fashion-data'
men_ = get_features(m_files[:500],'/men/',path_)
women_ = get_features(w_files[:500],'/women/',path_)

#convert list to arrays
men_ = np.asarray(men_)
women_ = np.asarray(women_)
#reshape into compatible format
men_ = men_.reshape(len(men_), 10, 10, 10)
women_ = women_.reshape(len(women_), 10, 10, 10)
#define X and y variables
X = np.append(men_, women_, axis = 0)
y = np.append(np.ones(len(men_)), np.zeros(len(women_)), axis = 0)
#split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [46]:
neuralnetwork = models.Sequential()
neuralnetwork.compile(optimizer=tf.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy'])
neuralnetwork.add(layers.Flatten(input_shape=(10,10,10)))
neuralnetwork.add(layers.Dense(256, activation='relu', input_dim=(10*10*10)))
neuralnetwork.add(layers.Dropout(0.5))
neuralnetwork.add(layers.Dense(1, activation='sigmoid'))
history = neuralnetwork.fit(X_train, y_train, epochs=50, validation_data = (X_test,y_test))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [49]:
accuracy = history.history['val_accuracy'][-1]
error = 1-accuracy
print ('Neural Net Accuracy:', accuracy*100,'%')


Neural Net Accuracy: 74.00000095367432 %


## Comparison 

