### Setup code 
Imports and library installation and stuff! so fun!

In [0]:
#############################
# Library Installation Cell #
#############################

# !pip install mlrose
!pip uninstall -y mlrose
!pip install git+https://github.com/pipsqueaker/mlrose.git@master

In [2]:
import mlrose
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
import time
import matplotlib.pyplot as plt
%matplotlib inline
import pylab
import copy
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import nltk
import os.path
import scipy
import sklearn
import keras
import pylab
from keras.utils import np_utils 
try:
    from google.colab import files
except ImportError:
  print("Not running on colab")
  
def jagged_average(history_list):
    max_history_length = max([h.size for h in history_list])
    extended_histories = np.array([np.append(h, [np.nan] * (max_history_length - h.size)) 
                          for h in history_list])
    
    history_mask = extended_histories.copy()
    history_mask[~np.isnan(history_mask)] = 1
    extended_histories[np.isnan(extended_histories)] = 0
    
    return np.sum(extended_histories, axis=0) / np.sum(history_mask, axis=0)
  
RHC_metadict = []
SA_metadict = []
GA_metadict = []

Using TensorFlow backend.


In [0]:
#########################
# Load CIFAR-10 Dataset #
#########################


#################################################################
# Configurable parameters (don't look at anything outside here) #
#################################################################

VAL_OVER_TRAIN = .2
TEST_OVER_TOTAL = .2
DATA_FRACTION = .5

#########################################
# End stuff you should need to look at! #
#########################################

ONEHOTS_FILENAME = "imdb-onehots.gz"
LABELS_FILENAME = "imdb-labels.gz"

# Changeable parameters
MAX_WORD_FEATURES = 10000

def clean_text(raw_review):
    # Function to convert a raw review to a string of words
    
    # Import modules
    from bs4 import BeautifulSoup
    import re
    from nltk.corpus import stopwords
    from nltk.stem.porter import PorterStemmer
    
    review_text = BeautifulSoup(raw_review, 'html.parser').get_text() # Remove HTML
    letters_only = re.sub("[^a-zA-Z]", " ", review_text) # Remove non-letters 
    words = letters_only.lower().split() # Convert to lower case, split into individual words
    stops = set(stopwords.words("english")) # Remove stop words (use of sets makes this faster)               
    meaningful_words = [w for w in words if not w in stops]                             
    porter = PorterStemmer() # Reduce word to stem of word
    stemmed_words = [porter.stem(w) for w in meaningful_words]
    joined_words = ( " ".join( stemmed_words )) # Join the words back into one string separated by space
    return joined_words 

def apply_cleaning_function_to_series(X):
    print('Cleaning data')
    start_time = time.time()
    cleaned_X = []
    for element in X:
        cleaned_X.append(clean_text(element))
    print ('Finished in ', str((time.time() - start_time)/60), " minutes")
    return cleaned_X

if not os.path.isfile(ONEHOTS_FILENAME + ".npz"):
    nltk.download('stopwords')
    print("one-hots not created yet: cleaning and saving to file")
    print("Expect this to take about 10-15 minutes")
    data = pd.read_csv('https://gitlab.com/michaelallen1966/00_python_snippets_and_recipes/raw/master/machine_learning/data/IMDb.csv')
    
    x_cleaned = apply_cleaning_function_to_series(data["review"])
    all_y = np.array(data["sentiment"]).ravel()
    
    # Free up memory!
    data = None
    vectorizer = CountVectorizer(analyzer="word",
                                 tokenizer=None,
                                 preprocessor=None,
                                 stop_words=None,
                                 ngram_range=(1,1),
                                 max_features=MAX_WORD_FEATURES)
    vectorizer.fit(x_cleaned)
    x_all = vectorizer.transform(x_cleaned)
    x_cleaned = None
    scipy.sparse.save_npz(ONEHOTS_FILENAME, x_all)
    np.savetxt(LABELS_FILENAME, labels)
else:
    print("loading one-hots from file")
    start_time = time.time()
    all_x = scipy.sparse.load_npz(ONEHOTS_FILENAME + ".npz")
    all_y = np.loadtxt(LABELS_FILENAME)
    end_time = time.time()
    print("Finished loading one-hots in ", (end_time - start_time)/60, " minutes")
    
cutoff_len = int(DATA_FRACTION * all_x.shape[0])
all_x = all_x[:cutoff_len].toarray()
all_y = all_y[:cutoff_len]
# files.upload()
print("All X shape: ", all_x.shape)
print("All y shape: ", all_y.shape)

In [0]:
# all_x, all_y = np.concatenate([x_train, x_test]), np.concatenate([y_train, y_test])
# all_x = np.reshape(all_x, (all_x.shape[0], -1))
all_y = all_y.ravel()
# all_y = np_utils.to_categorical(all_y)
all_y = OneHotEncoder().fit_transform(all_y.reshape(-1, 1)).todense()

all_train_x, test_x, all_train_y, test_y = train_test_split(all_x, all_y, 
                                                            test_size=TEST_OVER_TOTAL)
train_x, val_x, train_y, val_y = train_test_split(all_train_x, all_train_y, 
                                                  test_size=VAL_OVER_TRAIN)

num_classes = all_y.shape[1]

scaler = sklearn.preprocessing.StandardScaler()
train_x = scaler.fit_transform(train_x)
val_x = scaler.transform(val_x)
test_x = scaler.transform(test_x)
    
# Model architecturse: 3x (dense = 50, dropout) 

print("Full Data, Label shapes = ", all_x.shape, ", ", all_y.shape)
print("All data finite? ", np.isfinite(all_x).all())
print("All labels finite? ", np.isfinite(all_y).all())

### Algorithm & Hyperparameter Selection

All sets of hyperparameters in the given dictionary will be run

In [0]:
################################
# Configurable constant thingy #
################################
RHC_TRIALS = 1
#################################

RHC_hyperparams = []
RHC_metadict = []

# Hyperparams to test
RHC_hyperparams = [
    # all no better than random?
    # {"max_iters": 1000, "learning_rate": 0.05, "early_stopping": True, "clip_max": 100, "max_attempts": 500000,},
    # {"max_iters": 1000, "learning_rate": 0.1, "early_stopping": True, "clip_max": 10**10, "max_attempts": 100,},
    # {"max_iters": 1000, "learning_rate": 1, "early_stopping": True, "clip_max": 10**10, "max_attempts": 200,},
    # {"max_iters": 1000, "learning_rate": 10, "early_stopping": True, "clip_max": 10**10, "max_attempts": 1000,},
]
for r_p in RHC_hyperparams:
    r_p["algorithm"] = "random_hill_climb"
    RHC_metadict.append({"name": "RHC", "trials": RHC_TRIALS, "params": r_p})

In [0]:
################################
# Configurable constant thingy #
################################
SA_TRIALS = 1
#################################

SA_hyperparams = []
SA_metadict = []

# Hyperparams to test
SA_hyperparams = [
    {"max_iters": 2000, "learning_rate": 0.01, "schedule": mlrose.GeomDecay(1, .99, .01),
     "early_stopping": True, "clip_max": 100, "max_attempts": 100,},
   #  {"max_iters": 2000, "learning_rate": 0.01, "schedule": mlrose.ExpDecay(1, .01, .01),
   #   "early_stopping": True, "clip_max": 100, "max_attempts": 100,},
]
for s_p in SA_hyperparams:
    s_p["algorithm"] = "simulated_annealing"
    SA_metadict.append({"name": "SA", "trials": SA_TRIALS, "params": s_p})

In [0]:
################################
# Configurable constant thingy #
################################
GA_TRIALS = 1
#################################

GA_hyperparams = []
GA_metadict = []

# Hyperparams to test
GA_hyperparams = [
    {"max_iters": 1000, "learning_rate": 0.1, "pop_size": 200, "mutation_prob": 0.1,
     "early_stopping": True, "clip_max": 5, "max_attempts": 100,},
    # {"max_iters": 1000, "learning_rate": 0.1, "pop_size": 100, "mutation_prob": 0.3,
    #  "early_stopping": True, "clip_max": 5, "max_attempts": 100,},
    # {"max_iters": 1000, "learning_rate": 0.1, "pop_size": 2000, "mutation_prob": 0.2,
    #  "early_stopping": True, "clip_max": 5, "max_attempts": 100,},
]
for g_p in GA_hyperparams:
    g_p["algorithm"] = "genetic_alg"
    GA_metadict.append({"name": "GA", "trials": GA_TRIALS, "params": g_p})

### Evaluate all sets of hyperparameters and graph losses

In [0]:
def eval_algos(train_tuple, val_tuple, algo_specs, architecture=[50, 50, 50], verbose=True):
  
    def print_v(*args):
        if verbose:
            print(*args)
  
    scaler = MinMaxScaler()
    
    train_x, train_y = train_tuple
    val_x, val_y = val_tuple
    
    acc_list = []

    algo_specs = copy.deepcopy(algo_specs)
    
    for s in algo_specs:
        s["params"]["hidden_nodes"] = architecture

    for index, algo_spec in enumerate(algo_specs):
        print_v("\n========================================================")
        print_v(algo_spec["name"] + " " + str(index))
        print_v("Hyperparams ", algo_spec["params"])

        start_time = time.time()

        num_trials = algo_spec["trials"]
        train_accuracies = [None] * num_trials
        val_accuracies = [None] * num_trials

        for trial_index in range(algo_spec["trials"]):
            network = mlrose.NeuralNetwork(**algo_spec["params"])
            network.fit(train_x, train_y)
            
            train_predicts = np.nan_to_num(network.predict(train_x))
            train_accuracies[trial_index] = accuracy_score(train_predicts, train_y)
            val_predicts = np.nan_to_num(network.predict(val_x))
            val_accuracies[trial_index] = accuracy_score(val_predicts, val_y)
            
        acc_list.append([np.average(train_accuracies), np.average(val_accuracies)])

        end_time = time.time()
        print_v("--------------------------------------------------------")
        print_v("Total time spent training is " + str(end_time - start_time) + " seconds")
        print_v("Average train accuracy achieved was " + str(train_accuracies[index]))
        print_v("Average val accuracy achieved was " + str(val_accuracies[index]))
        
    return acc_list

In [0]:
# Actually run the code which hallejuhah
c = eval_algos((train_x, train_y), (val_x, val_y), RHC_metadict + SA_metadict + GA_metadict)
print("C is ", c)

### Graph the performance of any set of hyperparameters vs num iterations

In [0]:
# Iteration vs Time Code
size_changer_params = [
    # {"baseline": RHC_metadict, 
    #  "pkey": "max_iters", "flavor": "Training Epochs",
    #  "tform": (lambda x: int(x)), "trials": 1,
    #  "min_val": 1, "max_val": 501, "pstep": 100},
    
    # {"baseline": SA_metadict, 
    #  "pkey": "max_iters", "flavor": "Training Epochs",
    #  "tform": (lambda x: int(x)), "trials": 1,
    # "min_val": 1, "max_val": 501, "pstep": 100},
    
    # {"baseline": SA_metadict, 
    #  "pkey": "max_iters", "flavor": "Training Iterations",
    #  "min_val": 1, "max_val": 501, "pstep": 100},

    {"baseline": GA_metadict, 
     "pkey": "max_iters", "flavor": "Training Epochs",
     "tform": (lambda x: int(x)), "trials": 1,
    "min_val": 1, "max_val": 501, "pstep": 100},
]

for index, cp in enumerate(size_changer_params):

    param_range = np.arange(cp["min_val"], cp["max_val"], cp["pstep"])
    train_accs = [None] * len(param_range)
    val_accs = [None] * len(param_range)
    for learner_index, pval in enumerate(param_range):
        params = copy.deepcopy(cp["baseline"][0])
        params["params"][cp["pkey"]] = cp["tform"](pval)
        params["trials"] = cp["trials"]
        
        res = eval_algos((train_x, train_y), (val_x, val_y), [params], verbose=True)
        train_accs[index] = res[0]
        val_accs[index] = res[0]
        
    # Graphing the Results!!!
    plt.figure(index + 1)
    #ax = plt.subplot(len(changer_params), 1, index + 1)
    plt.title(cp["baseline"][0]["name"] + ": Average Fitness vs " + cp["flavor"])
    plt.xlabel(cp["flavor"])
    plt.ylabel('Average Accuracy')
    # plt.ylim(ymin=0)
    # avg_fitnesses = np.transpose(np.array(avg_fitnesses))
    pylab.legend(loc='lower right')
    plt.plot(param_range, train_accs, label="Train")
    plt.plot(param_range, val_accs, label="Val")
    plt.show()


GA 0
Hyperparams  {'max_iters': 1, 'learning_rate': 0.1, 'pop_size': 200, 'mutation_prob': 0.1, 'early_stopping': True, 'clip_max': 5, 'max_attempts': 100, 'algorithm': 'genetic_alg', 'hidden_nodes': [50, 50, 50]}


  fx = np.exp(x)/np.reshape(np.sum(np.exp(x), axis=1), [len(x), 1])
  fx = np.exp(x)/np.reshape(np.sum(np.exp(x), axis=1), [len(x), 1])


In [0]:
# There's some bug with my plotting code above, so I manually plot out accuracies
# which are printed by my code during training
param_range = np.arange(1, 501, 100)


# RHC results
# NAME = "RHC"
# train_accs = np.array([.483, .5039, .495, .505, .4911])
# val_accs = np.array([.492, .489, .505, .502, .484])

# Simulated Annealing Results
NAME = "SA"
train_accs = np.array([.495, .5039, .497, .497, .49675])
val_accs = np.array([.495, .487, .505, .496, .48875])

#####################
# Graph the things! #
#####################

plt.title(NAME + ": Accuracy vs Iterations")
plt.xlabel("Number training iterations")
plt.ylabel('Accuracy')
# plt.ylim(ymin=0)
# avg_fitnesses = np.transpose(np.array(avg_fitnesses))
plt.plot(param_range, train_accs, label="Train")
plt.plot(param_range, val_accs, label="Val")
pylab.legend(loc='lower right')
plt.show()
