In [0]:
# Change directory to VSCode workspace root so that relative path loads work correctly. Turn this addition off with the DataScience.changeDirOnImportExport setting
# ms-python.python added
import os
try:
	os.chdir(os.path.join(os.getcwd(), '..'))
	print(os.getcwd())
except:
	pass


 # Softmax Image Classifer

 In this exercise, I will:
 - implement a fully-vectorized **loss function** for the Softmax classifier
 - implement the fully-vectorized expression for its **analytic gradient**
 - use a validation set to **tune the learning rate and regularization** strength
 - **optimize** the loss function with **SGD**
 - **visualize** the final learned weights


 ### Importing the necessary libraries

In [1]:
from __future__ import print_function

import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt

get_ipython().run_line_magic('matplotlib', 'inline')
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')


ModuleNotFoundError: No module named 'cs231n'

 ### Preprocessing
 Here we first use `load_Cifar10()` function from `data_utils` to parse the CIFAR-10 pickle file. The file comes with
 separate training and test datasets. Since the dataset is already randomized for us, without a need for reshuffling,
 we separate the first 49,000 images of the training data for the final model training, and 49,000 - 50,000th images for the validation stage.
 The development dataset is used for checking if the loss and gradient functions we have implemented work correctly or not.

In [2]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier. These are the same steps as we used for the
    SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]   
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]
    
    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
    
    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis = 0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image
    
    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
    
    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)


NameError: name 'load_CIFAR10' is not defined

 ### Loss Function and Gradient
 Given the datasets, we now need to formulate a loss function and formulate an expression for computing its
 gradient. `softmax_loss_vectorized` function in `softmax.py` carries out these computations in an entirely
 vectorized manner. Training a classifier on our development dataset (500 images) takes less than 0.005 seconds.

 ### Cross Validation
 Here we use cross validation to find the best hyperparameters for our mode. To this end, we train the
 classifier on the training data, and then test its accuracy on both training and validation sets. We experiment with
 many values for learning rates and regularization strengths until we find one that is optimal.

In [3]:
from cs231n.classifiers import Softmax
results = {}
best_val = -1
best_softmax = None
learning_rates = [ 4.7e-7, 5e-7, 5.5e-7]
regularization_strengths =  [7.2e4, 7.5e4, 8e4]

# Function computing accuracy of a model
def compute_accuracy(y, y_pred):
    return np.mean(y == y_pred)

# Initialize the classifier
sftmx = Softmax()

for lr in learning_rates:
    for reg in regularization_strengths:
        # Train the W and predict
        sftmx.train(X_train, y_train, lr, reg, 100)
        y_pred_train = sftmx.predict(X_train)
        y_pred_val = sftmx.predict(X_val)

        # Compute the training and validation set accuracies
        train_accuracy = compute_accuracy(y_train, y_pred_train)
        val_accuracy = compute_accuracy(y_val, y_pred_val)

        # Store accuracy 
        results[(lr, reg)] = (train_accuracy, val_accuracy)

        if val_accuracy > best_val:
            best_softmax = sftmx
            best_val = val_accuracy
    
# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print('lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy))
    
print('best validation accuracy achieved during cross-validation: %f' % best_val)


ModuleNotFoundError: No module named 'cs231n'

 ### Evaluation
 We now evaluate the classifier on the test set to see the final performace of the model

In [4]:
# Evaluate the best softmax on test set
y_test_pred = best_softmax.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))



NameError: name 'best_softmax' is not defined

In [5]:
# Visualize the learned weights for each class
w = best_softmax.W[:-1,:] # strip out the bias
w = w.reshape(32, 32, 3, 10)

w_min, w_max = np.min(w), np.max(w)

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for i in range(10):
    plt.subplot(2, 5, i + 1)
    
    # Rescale the weights to be between 0 and 255
    wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
    plt.imshow(wimg.astype('uint8'))
    plt.axis('off')
    plt.title(classes[i])




NameError: name 'best_softmax' is not defined