In [0]:
import numpy as np      # effective math
import matplotlib.pyplot as plt     # ultimate plotting tool
from mpl_toolkits.mplot3d import Axes3D     # 3D plots
import pandas as pd     # allow us to make dataframes to store our data cleanly

In [0]:
from google.colab import files 
files.upload()

data = pd.read_csv('Iris.csv').set_index('Id')  # read our data into a dataframe and set the index to the ID
#the dataframe already have IDs 
print(data)     # last column is a label, all other columns are features

X = np.array(data[data.columns[:-1]])   # set our design matrix to the features (all columns except last)
label_dict = {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}  # dictionary containing label to number mapping 
#make a dictionary to allow simpler access to species' levels 
Y = np.array([label_dict[i] for i in data[data.columns[-1]]])   # put out labels into a np array
#for each element in last column, refer the species to the appropriate label --> made into array 
print(Y)

print(X.shape)      # 150 rows (datapoints), 4 columns (features)
print(Y.shape)      # 150 single dimension labels

m = X.shape[0]      # 150 rows

In [0]:
def normalise(x):
    """Centre around mean and divide by range to put all features on similar scale"""
    x_std = x - np.mean(x, axis=0)      # subtract the mean
    x_std = np.divide(x_std, np.std(x_std, axis=0))     # divide each feature by the range of that feature (-1 < x < 1)
    return x_std    # return our standardised features

X_std = normalise(X)    # centre data around mean and divide by range/s.d

In [0]:
def decompose(x):
    """Compute the covariance matrix of the data and find its eigen properties"""
    cov = np.matmul(x.T, x)     # compute the covariance matrix
    print('\nCovariance matrix')
    print(cov)

    eig_vals, eig_vecs = np.linalg.eig(cov)     # find the eigenvalues and eigenvectors of the covariance matrix
    print('\nEigenvectors')
    print(eig_vecs)
    print('\nEigenvalues')
    print(eig_vals)
    return eig_vals, eig_vecs, cov

eig_vals, eig_vecs, covariance = decompose(X_std)      # compute the covariance matrix and find its characteristics

In [0]:
def whicheigs(eig_vals):
    """"Plot the variance accounted for by each eigenvector and their cumulative sum"""
    total = sum(eig_vals)   # sum up the eigenvalues so we can compare each one to the total to determine their importance
    var_percent = [(i/total) * 100 for i in eig_vals]   # calculate the percentage variance of the data which this eigenvalue accounts for
    cum_var_percent = np.cumsum(var_percent)    # make a vector of the cumulative sum of the variance percentages

    fig = plt.figure()      # make a figure
    ax =  fig.add_subplot(111)      # add an axis
    plt.title('Variance along different principal components')
    ax.grid()
    plt.xlabel('Principal Component')
    plt.ylabel('Percentage total variance accounted for')

    ax.plot(cum_var_percent, '-ro')     # plot the cumulative sum of the variances accounted for by each eigenvector
    ax.bar(range(len(eig_vals)), var_percent) # position, height # show how much variance individual eig accounts for
    plt.xticks(np.arange(len(eig_vals)), ('PC{}'.format(i) for i in range(len(eig_vals))))  # set the xticks to 'PC1' etc
    plt.show()  # show us the figure
    
whicheigs(eig_vals)     # visualise the variance of the data for each eigenvector of the covariance matrix

In [0]:
colour_dict = {0:'r', 1:'g', 2:'b'}     # map labels to colours for plotting
colour_list = [colour_dict[i] for i in list(Y)]     # generator to give list of colours corresponding to each class

dim = 2 # variable to define how many dimensions we want to map our data to

def plotreduced(x):
    """Plot the data which has been transformed to a visualisable dimension"""
    fig = plt.figure()      # make a figure
    plt.grid()
    if dim == 3:
        ax = fig.add_subplot(111, projection='3d')      # add a 3d set of axes
        ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=colour_list)    # scatter plot our 3d data
        plt.xlabel('PC1 value')
        plt.ylabel('PC2 value')
        ax.set_zlabel('PC3 value')
    elif dim == 2:
        ax = fig.add_subplot(111)      # add a 2d set of axes
        ax.scatter(x[:, 0], x[:, 1], c=colour_list)    # scatter plot our 3d data
        plt.xlabel('PC1 value')
        plt.ylabel('PC2 value')
    elif dim == 1:
        ax = fig.add_subplot(111)       # add a 2d axis
        ax.scatter(x, np.zeros_like(x), c=colour_list)      # plot the 1D data along the x axis (zero for each y value)
        plt.xlabel('PC1 Value')
    plt.show()
    return ax

ax = plotreduced(X_reduced)      # check out how the data looks in a visualisable dimension

#weight initialization
If all of our weights have very high positive or negative values initially, then our neurons will satuarate and we will get very small gradients leading to slow learning or no learning at all. 

In [0]:
class Linear(nn.Linear): #overwrite reset parameter of nn.Linear 
    def reset_parameters(self):
        var = 2 / (self.in_features + self.out_features)
        self.weight.data.normal_(0, np.sqrt(var)) #takes in stdv as argument so we square-root variance to get stdv
        if self.bias is not None:
            self.bias.data.zero_()

class Conv2d(nn.Conv2d):
    def reset_parameters(self):
        var = 2 / ((self.in_channels + self.out_channels) * np.prod(self.kernel_size))
        self.weight.data.normal_(0, np.sqrt(var))
        if self.bias is not None:
            self.bias.data.zero_()