<h3>Custom Neural Net for ILSVRC Submission, CSE 144, UC Santa Cruz, Winter 2021 Final Project</h3>


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers , datasets , layers, models
from tensorflow.keras.optimizers import SGD
from sklearn.model_selection import train_test_split
from tensorflow import keras
from PIL import Image
import numpy
import imageio

In [None]:
from google.colab import files

Next we build a module to import the dataset from the files downloaded, and split it into:

```
 (x_train , y_train , x_val , y_val , x_test , y_test)
```



In [None]:
def LoadDataset():

  files.upload()
  

In [None]:
def GetLabelDict():

  label_dict = {}

  for i, line in enumerate(open("wnids.txt", "r")):
    label_dict[line.rstrip("\n")] = i

  return label_dict  

Here we need to upload 'ucsc-cse144-tiny-imagenet-competition.zip'

In [None]:
LoadDataset()

Saving ucsc-cse144-tiny-imagenet-competition.zip to ucsc-cse144-tiny-imagenet-competition.zip


In [None]:
!unzip ucsc-cse144-tiny-imagenet-competition.zip

In [None]:
%cd tiny-imagenet-100

/content/tiny-imagenet-100


In [None]:
label_dict = GetLabelDict()

Now we load our training images and convert them to Numpy arrays

In [None]:
def LoadTrainingData(label_dict):
  x_train = []
  y_train = []

  for key, value in label_dict.items():
    y_count = 0
    for i in range(500):
      maybe = numpy.asarray(Image.open('train/{}/images/{}_{}.JPEG'.format(key, key, str(i)))) #for i in range(500
      if maybe.shape == (64,64,3):
        x_train += [maybe]
        y_count += 1

    y_train += [value]*y_count

  return numpy.array(x_train) , numpy.array(y_train)


In [None]:
x_train , y_train = LoadTrainingData(label_dict)

Normalizing

In [None]:
x_train = x_train / 255

In [None]:
x_train, x_val , y_train , y_val = train_test_split(x_train,y_train,test_size = .1)

Here is my (best performing) model from Homework 4, adapted for this project.


In [None]:

MyModel = keras.models.Sequential()

MyModel.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(64, 64, 3)))

MyModel.add(layers.Conv2D(128, (3, 3)))#,activation='relu'))

MyModel.add(layers.PReLU())

MyModel.add(layers.Dropout(.5)) # Exprerimenting with this

MyModel.add(layers.Flatten())

MyModel.add(layers.Dense(1024))#, activation='relu'))

MyModel.add(layers.PReLU())

MyModel.add(layers.Dense(100, activation='softmax'))

MyModel.compile(optimizer= keras.optimizers.SGD(learning_rate=0.01,momentum = 0.9),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

MyHistory = MyModel.fit(x_train,y_train,epochs = 10,validation_data = (x_val,y_val))
   

Epoch 1/10
Epoch 2/10
  64/1384 [>.............................] - ETA: 12:12 - loss: 3.9161 - accuracy: 0.1147

##Now we load the test data

In [None]:
import glob
filelist = glob.glob('test/images/*.JPEG')

In [None]:
def LoadTestData(filelist):

  test_data = []
  final_filenames = []
  rejected_filenames = []

  for filename in filelist:

    maybe = numpy.asarray(Image.open(filename))

    if maybe.shape == (64,64,3):
      test_data += [maybe]
      final_filenames.append(filename)
    else:
      rejected_filenames.append(filename)

  return numpy.array(test_data) , final_filenames , rejected_filenames

In [None]:
test_data , final_filenames , rejected_filenames = LoadTestData(filelist)

In [None]:
test_data = test_data / 255

And we predict the labels of the test set

In [None]:
def GetPredictedLabels(test_data):  
  prediction = MyModel.predict(test_data)
  predicted_labels = []
  for item in prediction:
    max = item.max()
    my_label = numpy.where(item == max)
    my_label = my_label[0]
    my_label = int(my_label)
    predicted_labels.append(my_label)

  return predicted_labels


In [None]:
predicted_labels = GetPredictedLabels(test_data)

And finally format the CSV

In [None]:
import csv
import random

In [None]:
for i in range(len(rejected_filenames)):
  predicted_labels.append(random.randint(0,100))

In [None]:
aggregate_filenames = final_filenames + rejected_filenames

In [None]:
count = 0
for filename in aggregate_filenames:

  aggregate_filenames[count] = filename.replace('test/images/','')
  aggregate_filenames[count] = aggregate_filenames[count].replace('.JPEG','')
  count += 1


In [None]:
def WritePredictionsCSV(predictions_list,image_ids_list):

  csv_filename = 'prediction_submission.csv'

  rows = []

  for i in range(len(image_ids_list)):

    rows.append([image_ids_list[i],predictions_list[i]])

  with open(csv_filename, 'w') as csvfile: 

    writer = csv.writer(csvfile)

    fields = ['image_id','label']

    writer.writerow(fields)

    for row in rows:
      writer.writerow(row)


In [None]:
WritePredictionsCSV(predicted_labels,aggregate_filenames)