### In RNN_CNN.ipynb file, we build CNN and RNN models for image data and noisyTextDescription. After training, we get 3 model files(best.h5, char_rnn_classification_model_lstm.pt and char_rnn_classification_model_gru.pt).


# Load Data from Google Drive



In [None]:
from google.colab import drive
drive.mount('/content/drive')
!unzip 'drive/MyDrive/Colab Notebooks/uw-cs480-fall20.zip'

# Using CNN to predict categories with images

## Process Data

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

def load_data():
    train_file = "uw-cs480-fall20/train.csv"
    test_file = "uw-cs480-fall20/test.csv"
    df_train = pd.read_csv(train_file)

    df_test = pd.read_csv(test_file)
    df_test.head()
    return df_train, df_test

In [None]:
from keras.preprocessing.image import img_to_array, load_img

df_train, df_test = load_data()

n_train_data = 21627
n_test_data = 21628
train_id = df_train['id']
test_id = df_test['id']
train_category = df_train['category']

train_img = []
test_img = []
for i in range(n_train_data):
    img = load_img('uw-cs480-fall20/suffled-images/shuffled-images/' + str(train_id[i]) +'.jpg')
    train_img.append(img_to_array(img))

x_train = np.array(train_img)

for i in range(n_test_data):
    img = load_img('uw-cs480-fall20/suffled-images/shuffled-images/' + str(test_id[i]) +'.jpg')
    test_img.append(img_to_array(img))
    
x_test = np.array(test_img)

In [None]:
train_category = df_train["category"]
all_categories = list(set(train_category))
n_categories = len(all_categories)
y_train = []

for i in range(n_train_data):
    current_category = train_category[i]
    index = all_categories.index(current_category)
    y_train.append(index)

y_train = np.array(y_train)

In [None]:
# libraries
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint
import math
import matplotlib.pyplot as plt

# parameters for this script
batch_size = 32
num_classes = 27

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)

# normalize the data
x_train = x_train.astype('float32')
x_train /= 255
x_test = x_test.astype('float32')
x_test /= 255

# partition training set into training and validation set
x_validate = x_train[17301:,:]
x_train = x_train[:17301,:]
y_validate = y_train[17301:,:]
y_train = y_train[:17301,:]

## Create CNN model

In [None]:
# Define a convolutional neural network
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',input_shape=x_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    return model

## Train the model

In [None]:
def train(model, epochs, data_augmentation, opt):

    # Compile the model before using it
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    print(model.summary())

    # create a callback that will save the best model while training
    save_best_model = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)

    # train without data augmentation
    if not data_augmentation:
        print('Not using data augmentation.')
        history = model.fit(x_train, y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=(x_validate, y_validate),
                            shuffle=True,
                            callbacks=[save_best_model],
                            verbose=1)

    # train with data augmentation
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            zca_epsilon=1e-06,  # epsilon for ZCA whitening
            rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
            # randomly shift images horizontally (fraction of total width)
            width_shift_range=0.1,
            # randomly shift images vertically (fraction of total height)
            height_shift_range=0.1,
            shear_range=0.,  # set range for random shear
            zoom_range=0.,  # set range for random zoom
            channel_shift_range=0.,  # set range for random channel shifts
            # set mode for filling points outside the input boundaries
            fill_mode='nearest',
            cval=0.,  # value used for fill_mode = "constant"
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False,  # randomly flip images
            # set rescaling factor (applied before any other transformation)
            rescale=None,
            # set function that will be applied on each input
            preprocessing_function=None,
            # image data format, either "channels_first" or "channels_last"
            data_format=None,
            # fraction of images reserved for validation (strictly between 0 and 1)
            validation_split=0.0)

        # Compute quantities required for feature-wise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)

        # Fit the model on the batches generated by datagen.flow().
        history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
                            steps_per_epoch=math.ceil(x_train.shape[0]/batch_size),
                            epochs=epochs,
                            validation_data=(x_validate, y_validate),
                            callbacks=[save_best_model],
                            verbose=0)
    
    return history

In [None]:
# train and evaluate conv model with Adam
optimizer = keras.optimizers.Adam()
model = create_model()
history = train(model, epochs=40, data_augmentation=True, opt=optimizer)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 80, 60, 32)        896       
_________________________________________________________________
activation_6 (Activation)    (None, 80, 60, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 80, 60, 32)        9248      
_________________________________________________________________
activation_7 (Activation)    (None, 80, 60, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 40, 30, 32)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 40, 30, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 40, 30, 64)       

# Using RNN to predict categories with noisyTextDescription

In [None]:
import string 

df_train, df_test = load_data()

n_train_data = 21627
n_test_data = 21628
all_letters = string.ascii_letters + '0123456789&-\' '
n_letters = len(all_letters)
all_categories = list(np.unique(np.array(df_train['category'])))
n_categories = len(all_categories)

train_overall = np.array(df_train[['category', 'noisyTextDescription']])
train_data = np.array(df_train['noisyTextDescription'])
test_id = np.array(df_test['id'])
test_data = np.array(df_test['noisyTextDescription'])

In [None]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return all_letters.find(letter)

# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor


In [None]:
import random
import math

# Build the category_lines dictionary, a list of names per category
category_lines = {}
train_data = {}
validation_data = {}
test_data = {}

for category in all_categories:
  train_data[category] = []

for i in range(n_train_data):
  category = train_overall[i][0]
  train_data[category].append(train_overall[i][1])

for category in all_categories:
    lines = train_data[category]
    random.shuffle(lines)
    train_data[category] = lines[0:int(math.floor(0.7*len(lines)))]
    validation_data[category] = lines[int(math.floor(0.7*len(lines)))+1:int(math.floor(0.85*len(lines)))]
    test_data[category] = lines[int(math.floor(0.85*len(lines)))+1:]
    category_lines[category] = lines


## Create RNN models (LSTM and GRU)

In [None]:
import torch.nn as nn
from torch.autograd import Variable

class RNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN_LSTM, self).__init__()

        self.hidden_size = hidden_size
        self.lstm_cell = nn.LSTMCell(input_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden, cell):
        combined = torch.cat((input, hidden), 1)
        hidden, cell = self.lstm_cell(input,(hidden,cell))
        output = self.i2o(combined)        
        output = self.softmax(output)
        return output, hidden, cell

    def initHidden(self):
        return Variable(torch.zeros(1, self.hidden_size)), Variable(torch.zeros(1, self.hidden_size))
    
class RNN_GRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN_GRU, self).__init__()

        self.hidden_size = hidden_size
        self.gru_cell = nn.GRUCell(input_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.gru_cell(input,hidden)
        output = self.i2o(combined)        
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return Variable(torch.zeros(1, self.hidden_size))
    
n_hidden = 128
rnn_lstm = RNN_LSTM(n_letters, n_hidden, n_categories)
rnn_gru = RNN_GRU(n_letters, n_hidden, n_categories)

## Trainning

### Prepare for trainning

In [None]:
criterion_lstm = nn.NLLLoss()
criterion_gru = nn.NLLLoss()

lstm_optimizer = torch.optim.Adam(rnn_lstm.parameters())
gru_optimizer = torch.optim.Adam(rnn_gru.parameters())

def train(category_tensor, line_tensor):
    hidden_lstm, cell_lstm = rnn_lstm.initHidden()
    hidden_gru = rnn_gru.initHidden()

    # reset gradient
    rnn_lstm.zero_grad()
    rnn_gru.zero_grad()

    for i in range(line_tensor.size()[0]):
        output_lstm, hidden_lstm, cell_lstm = rnn_lstm(line_tensor[i], hidden_lstm, cell_lstm)
        output_gru, hidden_gru = rnn_gru(line_tensor[i], hidden_gru)

    loss_lstm = criterion_lstm(output_lstm, category_tensor)
    loss_gru = criterion_gru(output_gru, category_tensor)

    # compute gradient by backpropagation
    loss_lstm.backward()
    loss_gru.backward()

    # update parameters
    lstm_optimizer.step()
    gru_optimizer.step()

    return output_lstm, loss_lstm.item(), output_gru, loss_gru.item()

In [None]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i
    
import random

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(train_data[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = lineToTensor(line)
    return category, line, category_tensor, line_tensor

for i in range(10):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    print('category =', category, '/ line =', line)

category = Dress / line = Custom White Floral Design Dress
category = Cufflinks / line = Jamaica Men Steel Cufflinks
category = Topwear / line = UCB N9336SL03 NA2394SL02 Hazel Cl Top
category = Bags / line = Nike Noisy Blue Lt.khaki France 661 Backpack
category = Innerwear / line = Amante Women Tango Seamless Briefs PCSN02
category = Eyewear / line = Polaroid Unisex Sunglasses
category = Dress / line = Tonga Women Honeysuckle Dress
category = Innerwear / line = Lovable Women Tease Pink Bra
category = Dress / line = Avirate Women toned Dress
category = Sandal / line = Crocs Dora Boots Pink I071


In [None]:
def evaluate(line_tensor):
    hidden_lstm, cell_lstm = rnn_lstm.initHidden()
    hidden_gru = rnn_gru.initHidden()
    for i in range(line_tensor.size()[0]):
        output_lstm, hidden_lstm, cell_lstm = rnn_lstm(line_tensor[i], hidden_lstm, cell_lstm)
        output_gru, hidden_gru = rnn_gru(line_tensor[i], hidden_gru)
    return output_lstm, output_gru 

def eval_dataset(dataset):
    loss_lstm = 0
    loss_gru = 0
    n_instances = 0
    confusion_lstm = torch.zeros(n_categories, n_categories)
    confusion_gru = torch.zeros(n_categories, n_categories)
    for category in all_categories:
        category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
        n_instances += len(dataset[category])
        category_i = all_categories.index(category)
        for line in dataset[category]:
            line_tensor = Variable(lineToTensor(line))
            output_lstm, output_gru = evaluate(line_tensor)
            
            loss_lstm += criterion_lstm(output_lstm, category_tensor)
            guess_lstm, guess_i_lstm = categoryFromOutput(output_lstm)
            confusion_lstm[category_i][guess_i_lstm] += 1

            loss_gru += criterion_gru(output_gru, category_tensor)
            guess_gru, guess_i_gru = categoryFromOutput(output_gru)
            confusion_gru[category_i][guess_i_gru] += 1

    # Normalize by dividing every row by its sum
    for i in range(n_categories):
        confusion_lstm[i] = confusion_lstm[i] / confusion_lstm[i].sum()
        confusion_gru[i] = confusion_gru[i] / confusion_gru[i].sum()

    return loss_lstm.item() / n_instances, confusion_lstm, loss_gru.item() / n_instances, confusion_gru

In [None]:
import time
import math

n_iters = 20000
print_every = 5000
plot_every = 1000

# Keep track of losses for plotting
train_loss_lstm = 0
train_loss_gru = 0
all_train_losses_lstm = []
all_train_losses_gru = []
all_validation_losses_lstm = []
all_validation_losses_gru = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)
  
print('\nIter \tTrain% \tTime \t \tTrain_loss_LSTM \tTrain_loss_GRU \tExample')
start = time.time()
for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output_lstm, loss_lstm, output_gru, loss_gru = train(category_tensor, line_tensor)
    train_loss_lstm += loss_lstm
    train_loss_gru += loss_gru

    # Print iter number, train loss average, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output_lstm)
        correct = 'âœ“' if guess == category else 'âœ— (%s)' % category
        print('%d \t%d%% \t(%s) \t%.4f \t\t\t%.4f \t\t%s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), train_loss_lstm / plot_every, train_loss_gru / plot_every, line, guess, correct))

    # Add current train loss average to list of losses
    if iter % plot_every == 0:
        all_train_losses_lstm.append(train_loss_lstm / plot_every)
        all_train_losses_gru.append(train_loss_gru / plot_every)
        train_loss_lstm = 0
        train_loss_gru = 0
        
    # Compute loss based on validation data
    if iter % plot_every == 0:
        average_validation_loss_lstm, _, average_validation_loss_gru, _ = eval_dataset(validation_data)

        # save model with best validation loss
        if len(all_validation_losses_lstm) == 0 or average_validation_loss_lstm < min(all_validation_losses_lstm):
            torch.save(rnn_lstm, 'char_rnn_classification_model_lstm.pt')
        all_validation_losses_lstm.append(average_validation_loss_lstm)
        if len(all_validation_losses_gru) == 0 or average_validation_loss_gru < min(all_validation_losses_gru):
            torch.save(rnn_gru, 'char_rnn_classification_model_gru.pt')
        all_validation_losses_gru.append(average_validation_loss_gru)


Iter 	Train% 	Time 	 	Train_loss_LSTM 	Train_loss_GRU 	Example
5000 	25% 	(5m 47s) 	1.1422 			1.0697 		Water Chronograph Sandwichtost MW-600F-7AVDF(A507) Watch / Watches âœ— (Free Gifts)
10000 	50% 	(12m 34s) 	0.9300 			0.7907 		Fossil Men Brown Wallet / Wallets âœ“
15000 	75% 	(19m 26s) 	0.7321 			0.6133 		Murcia Women Brown & White Bag / Accessories âœ— (Bags)
20000 	100% 	(26m 20s) 	0.6608 			0.5355 		Lotus ace Plum Mist Nail Polish 952 / Nails âœ“


In [None]:
gru_data = []
lstm_data = []
for i in range(n_test_data):
  line = test_data[i]
  line_tensor = lineToTensor(line)
  output_lstm, output_gru = evaluate(line_tensor)
  lstm_data.append(output_lstm)
  gru_data.append(output_gru)
  
df = pd.DataFrame(result_data, columns=['id', 'category'])
df.to_csv('submission_lstm.csv', index=False)

In [None]:
def bin_to_category(predict_data, lstm_data):
  n_data = len(predict_data)
  y_test = []
  for i in range(n_data):
    index = np.argmax(predict_data[i])
    largest_value = predict_data[i][index]
    predict_data[i][index] = 0
    index2 = np.argmax(predict_data[i])
    largest_value_2 = predict_data[i][index2]
    if largest_value * 0.8 < largest_value_2:
      if lstm_data[i] == index or lstm_data[i] == index2:
        y_test.append(all_categories[lstm_data[i]])
      else:
        y_test.append(all_categories[index])
    else:
      y_test.append(all_categories[index])
  return y_test

In [None]:
def bin_to_category2(predict_data):
  n_data = len(predict_data)
  y_test = []
  for i in range(n_data):
    index = np.argmax(predict_data[i])
    y_test.append(all_categories[index])
  return y_test

In [None]:
saved_model = load_model('best_model.h5')
predict_val = saved_model.predict(x_test)
y_test = bin_to_category2(predict_val)
cnn_data = []

print(y_test[: 15])


['Apparel Set', 'Apparel Set', 'Scarves', 'Flip Flops', 'Free Gifts', 'Flip Flops', 'Apparel Set', 'Saree', 'Ties', 'Flip Flops', 'Free Gifts', 'Apparel Set', 'Bags', 'Free Gifts', 'Dress']
