<a href="https://colab.research.google.com/github/faithrts/COMP-551/blob/Joey-added-experiments/A3_Draft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set-up

In [2]:
### importing libraries and setting the random seed

import numpy as np
import pandas as pd
import sys

%matplotlib notebook
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
#from matplotlib import cm

import re
import math
from importlib import reload

from sklearn.feature_extraction.text import CountVectorizer

import warnings
warnings.filterwarnings('ignore')

np.random.seed(1234)
#random_state = RandomState(1234)

# a folder to store the saved graphs
!mkdir images

# Data Handling

### Helper functions

#### Loading and cleaning data

In [None]:
def standardize_array(arr):
  # empty array to append the elements of the standardized array
  standardized_array = []

  # the mean and standard deviation of the input array
  mean_val = arr.mean()
  standard_dev = arr.std()

  # iterates through the input array
  for i in arr:
    # for each array element, calculates the standardized element
    # and appends it to the standardized array
    new_val = (i - mean_val) / standard_dev
    standardized_array.append(new_val)

  return standardized_array

In [None]:
def standardize_list(list):
  # empty array to append the elements of the standardized list
  standardized_list = []

  # the mean and standard deviation of the input list
  mean_val = sum(list) / len(list)
  standard_dev = np.std(list)

  # iterates through the input list
  for i in list:
    # for each list element, calculates the standardized element
    # and appends it to the standardized list
    new_val = (i - mean_val) / standard_dev
    standardized_list.append(new_val)

  return standardized_list

In [None]:
# standardizes columns in input dataframe df;
# assumes last column is the target labels
def standardize_df(df):
  standardized_df = df.copy()

  # iterates through each column, excluding the last column
  for col in standardized_df.iloc[:, :-1]:

    # calculates mean value and standard deviation of column
    mean_val = standardized_df[col].mean()
    standard_dev = standardized_df[col].std()

    # standardizes the values in the current column
    standardized_df[col] = (standardized_df[col] - mean_val) / standard_dev

  return standardized_df

In [None]:
# custom pre-processor to remove digits and punctuation for CountVectorizer
def keep_only_letters_preprocessor(text):
  # converts text to lowercase
  text = text.lower()
  # replaces digits and selected punctuation with empty string
  text = re.sub('([0-9,.!?;:_])', '', text)
  return text

In [None]:
# assumes the last column of the dataframe is the labels
def x_y_from_df(df):
  x = df.iloc[:, :-1].to_numpy()
  y = df.iloc[:, -1].to_numpy()

  return x, y

#### Plotting graphs

## Importing

In [None]:
# clones the github repo
!git clone https://github.com/zalandoresearch/fashion-mnist
sys.path.insert(1, 'fashion-mnist/utils')

# imports the mnist reader from the repo
import mnist_reader

Cloning into 'fashion-mnist'...
remote: Enumerating objects: 762, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 762 (delta 0), reused 3 (delta 0), pack-reused 758[K
Receiving objects: 100% (762/762), 105.85 MiB | 62.95 MiB/s, done.
Resolving deltas: 100% (444/444), done.


### Loading and cleaning data

In [None]:
### loading the data using the github repo's mnist_reader.load_mnist

# training data
x_train_temp, y_train = mnist_reader.load_mnist('fashion-mnist/data/fashion', kind = 'train')

# testing data
x_test_temp, y_test = mnist_reader.load_mnist('fashion-mnist/data/fashion', kind = 't10k')

In [None]:
### creating dataframes to better understand data

# training dataframe
train_df = pd.DataFrame(x_train_temp)
train_df['LABEL'] = y_train

# testing dataframe
test_df = pd.DataFrame(x_test_temp)
test_df['LABEL'] = y_test

In [None]:
train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,LABEL
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
1,0,0,0,0,0,1,0,0,0,0,...,114,130,76,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,22,...,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,33,96,...,0,0,0,0,0,0,0,0,0,3
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
59996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
59997,0,0,0,0,0,0,0,0,0,5,...,0,0,0,0,0,0,0,0,0,3
59998,0,0,0,0,0,0,0,0,0,0,...,54,50,5,0,1,0,0,0,0,0


# Implementing models

## Helper functions

### Accuracy 

In [4]:
def evaluate_acc(y_pred, y):
  return np.sum(y_pred==y)/y.shape[0]

### Mini Batch gradient descent

In [8]:
class StochasticGradientDescent:

    def __init__(self, batch_size, learning_rate=1e-4, max_iters=1e4, epsilon=1e-8, record_history=False):
        self.learning_rate = learning_rate
        self.max_iters = max_iters
        self.record_history = record_history
        self.epsilon = epsilon
        self.batch_size = batch_size
        if record_history:
            self.w_history = []                  # to store the weight history for visualization

    def run(self, gradient_fn, x, y, w):
        grad = np.inf
        t = 1
        if self.record_history:
            self.w_history.append(w)                 # store initial weights before the iterative update

        while np.linalg.norm(grad) > self.epsilon and t < self.max_iters: #stopping conditions
            
            
            #defining mini_batches
              mini_batches = []
              indices= np.arange(0, a.shape[0], 1, dtype=int) #create indices 
              np.random.shuffle(indices)
              
              n_minibatches = x.shape[0] //self.batch_size
              i = 0
          
              for i in range(n_minibatches + 1):
                  mini_batch = x[indices[i * self.batch_size:(i + 1)*self.batch_size] , :]
                  X_mini = mini_batch[:, :-1]
                  Y_mini = mini_batch[:, -1].reshape((-1, 1))
                  mini_batches.append((X_mini, Y_mini))
              if data.shape[0] % batch_size != 0:
                  mini_batch = data[i * batch_size:data.shape[0]]
                  X_mini = mini_batch[:, :-1]
                  Y_mini = mini_batch[:, -1].reshape((-1, 1))
                  mini_batches.append((X_mini, Y_mini))
              return mini_batches
          
            
            
            
            
            
            
            
            for n in range(x.shape[0]):
                grad = gradient_fn(x[n,None], y[n,None], w) # updates the weights right after seeing EACH training example
                w = w - self.learning_rate * grad     # weight update step
                if self.record_history:
                    self.w_history.append(w)
                t += 1
                # print('(t,w)',(t,w))
        return w

In [36]:
import random
a = np.array([[1,2,3],[6,7,8],[11,12,13]])
b = np.array([[4,5],[9,10],[14,15]])
indices= np.arange(0, a.shape[0], 1, dtype=int) #create indices 
np.random.shuffle(indices)
              
n_minibatches = x.shape[0] //self.batch_size
i = 0
          
for i in range(n_minibatches + 1):
  mini_batch = x[indices[i * self.batch_size:(i + 1)*self.batch_size] , :]

array([[ 6,  7,  8],
       [11, 12, 13]])

## MLP

# Running experiments

### Helper functions