In [1]:
# Import modules and data
import pandas as pd
import numpy as np

df = pd.read_csv("data/diabetic_data.csv")
df.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,...,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,...,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),?,1,1,7,2,...,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),?,1,1,7,2,...,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),?,1,1,7,1,...,No,Steady,No,No,No,No,No,Ch,Yes,NO


In [2]:
# Determine whether the diabetes diagnosis was in the top 3
# Drop the diagnoses, they are too scattered
import re
DIABETES_REGEX = re.compile("250")
isDiabetes = lambda s: DIABETES_REGEX.match(s)
df["primary_diag"] = (df["diag_1"].str.contains("^250") |
                      df["diag_2"].str.contains("^250") |
                      df["diag_3"].str.contains("^250")).astype("int")
df.drop(["diag_1", "diag_2", "diag_3"], axis=1, inplace=True)
df.groupby("primary_diag").size()

primary_diag
0    63742
1    38024
dtype: int64

In [3]:
# Remove columns...
#  * encounter_id, patient_nbr (identification)
#  * weight, payer_code, medical_specialty (too sparse)
#  * diag_1, diag_2, diag_3 (too specific)
df.drop(["encounter_id", "patient_nbr", "weight", "payer_code", "medical_specialty"], axis=1, inplace=True)
df.head()

Unnamed: 0,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,...,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted,primary_diag
0,Caucasian,Female,[0-10),6,25,1,1,41,0,1,...,No,No,No,No,No,No,No,No,NO,1
1,Caucasian,Female,[10-20),1,1,7,3,59,0,18,...,Up,No,No,No,No,No,Ch,Yes,>30,1
2,AfricanAmerican,Female,[20-30),1,1,7,2,11,5,13,...,No,No,No,No,No,No,No,Yes,NO,1
3,Caucasian,Male,[30-40),1,1,7,2,44,1,16,...,Up,No,No,No,No,No,Ch,Yes,NO,1
4,Caucasian,Male,[40-50),1,1,7,1,51,0,8,...,Steady,No,No,No,No,No,Ch,Yes,NO,1


In [4]:
# Filter by...
#  * Must have a gender
df = df[df["gender"] != "Unknown/Invalid"]

#  * Must not be discharged by death or hospice
df = df.loc[~df["discharge_disposition_id"].isin([11, 13, 14, 19, 20, 21])]

df.shape

(99340, 43)

In [5]:
# Normalize continuous columns to [0, 1]
cols_continuous = ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications',
       'number_outpatient', 'number_emergency', 'number_inpatient','number_diagnoses']

for col in cols_continuous:
    lo, hi = df[col].min(), df[col].max()
    df[col] = (df[col] - lo) / (hi - lo)

# Normalize numerically-categorical columns (i.e. age)
AGE_MAPPING = {"[{}-{})".format(i * 10, (i + 1) * 10): i * 0.1 + 0.05 for i in range(10)}
df["age"] = df["age"].apply(lambda s: AGE_MAPPING[s])
df[["age"] + cols_continuous].head()

# Manipulate categorical columns
cols_categorical = ['race', 'gender',
        'admission_type', 'admission_source',
        'max_glu_serum', 'A1Cresult',
        'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
        'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
        'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
        'tolazamide', 'insulin',
        'glyburide-metformin', 'glipizide-metformin',
        'glimepiride-pioglitazone', 'metformin-rosiglitazone',
        'metformin-pioglitazone', 'change', 'diabetesMed',
        'examide', 'citoglipton']

# Create an "Unknown" category for race
df["race"] = df["race"].replace("?", "Unknown")

# Consolidate admissions types
ADMISSION_TYPE_MAPPING = {
    1: "Emergency",
    2: "Urgent",
    3: "Elective"
}
df["admission_type_id"] = df["admission_type_id"].apply(lambda s: ADMISSION_TYPE_MAPPING.get(s, "Other"))

# Discharge disposition is either going home OK or not going home OK
df["discharge_disposition_id"] = (df["discharge_disposition_id"] == 1).astype("int")

# Admission source is either Emergency, Referral, or Other
ADMISSION_SOURCE_MAPPING = {
    1: "EmergencyRoom",
    7: "Referral"
}
df["admission_source_id"] = df["admission_type_id"].apply(lambda s: ADMISSION_SOURCE_MAPPING.get(s, "Other"))

df.rename(columns={
    "admission_type_id": "admission_type",
    "discharge_disposition_id": "discharged_home",
    "admission_source_id": "admission_source"}, inplace=True)
#df["race", "admission_type", "admission_source", "discharged_home"].head()
df.head()

Unnamed: 0,race,gender,age,admission_type,discharged_home,admission_source,time_in_hospital,num_lab_procedures,num_procedures,num_medications,...,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted,primary_diag
0,Caucasian,Female,0.05,Other,0,Other,0.0,0.305344,0.0,0.0,...,No,No,No,No,No,No,No,No,NO,1
1,Caucasian,Female,0.15,Emergency,1,Other,0.153846,0.442748,0.0,0.2125,...,Up,No,No,No,No,No,Ch,Yes,>30,1
2,AfricanAmerican,Female,0.25,Emergency,1,Other,0.076923,0.076336,0.833333,0.15,...,No,No,No,No,No,No,No,Yes,NO,1
3,Caucasian,Male,0.35,Emergency,1,Other,0.076923,0.328244,0.166667,0.1875,...,Up,No,No,No,No,No,Ch,Yes,NO,1
4,Caucasian,Male,0.45,Emergency,1,Other,0.0,0.381679,0.0,0.0875,...,Steady,No,No,No,No,No,Ch,Yes,NO,1


In [6]:
# Handle categorical columns...
#  1. Remove categorical columns that are almost uniform (>99% of entries are one thing)
#  2. Split other columns into multiple boolean columns
n_rows = df.shape[0]
cols_insignificant = []
cols_significant = []
for col in cols_categorical:
    freqs = dict(df.groupby(col).size())
    
    # Remove this column
    if any([float(value)/float(n_rows) > 0.99 for value in freqs.values()]):
        cols_insignificant.append(col)
    
    else:
        # Sanity check
        assert(len(freqs) > 1)
        cols_significant.append(col)
        
        # Convert to column to string if not already string type
        if df[col].dtype != str:
            df[col] = df[col].astype(str)
            
# Extract output
df_output = pd.get_dummies(df["readmitted"])
df.drop(["readmitted"], axis=1, inplace=True)

df_cat = pd.get_dummies(df[cols_significant], drop_first=False)
df_input = pd.concat([df, df_cat], axis=1)

df_input.drop(cols_insignificant + cols_significant, axis=1, inplace=True)

print("Insignificant: {}".format(cols_insignificant))
print("Significant: {}".format(cols_significant))
df_input.head()

Insignificant: ['admission_source', 'nateglinide', 'chlorpropamide', 'acetohexamide', 'tolbutamide', 'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'glyburide-metformin', 'glipizide-metformin', 'glimepiride-pioglitazone', 'metformin-rosiglitazone', 'metformin-pioglitazone', 'examide', 'citoglipton']
Significant: ['race', 'gender', 'admission_type', 'max_glu_serum', 'A1Cresult', 'metformin', 'repaglinide', 'glimepiride', 'glipizide', 'glyburide', 'pioglitazone', 'rosiglitazone', 'insulin', 'change', 'diabetesMed']


Unnamed: 0,age,discharged_home,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,...,rosiglitazone_Steady,rosiglitazone_Up,insulin_Down,insulin_No,insulin_Steady,insulin_Up,change_Ch,change_No,diabetesMed_No,diabetesMed_Yes
0,0.05,0,0.0,0.305344,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,1,1,0
1,0.15,1,0.153846,0.442748,0.0,0.2125,0.0,0.0,0.0,0.533333,...,0,0,0,0,0,1,1,0,0,1
2,0.25,1,0.076923,0.076336,0.833333,0.15,0.047619,0.0,0.047619,0.333333,...,0,0,0,1,0,0,0,1,0,1
3,0.35,1,0.076923,0.328244,0.166667,0.1875,0.0,0.0,0.0,0.4,...,0,0,0,0,0,1,1,0,0,1
4,0.45,1,0.0,0.381679,0.0,0.0875,0.0,0.0,0.0,0.266667,...,0,0,0,0,1,0,1,0,0,1


In [7]:
df = pd.concat([df_input, df_output], axis=1)
df.rename(columns={"<30": "OUTPUT_<30", ">30": "OUTPUT_>30", "NO": "OUTPUT_NO"}, inplace=True)
df.head()

Unnamed: 0,age,discharged_home,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,...,insulin_No,insulin_Steady,insulin_Up,change_Ch,change_No,diabetesMed_No,diabetesMed_Yes,OUTPUT_<30,OUTPUT_>30,OUTPUT_NO
0,0.05,0,0.0,0.305344,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,1,1,0,0,0,1
1,0.15,1,0.153846,0.442748,0.0,0.2125,0.0,0.0,0.0,0.533333,...,0,0,1,1,0,0,1,0,1,0
2,0.25,1,0.076923,0.076336,0.833333,0.15,0.047619,0.0,0.047619,0.333333,...,1,0,0,0,1,0,1,0,0,1
3,0.35,1,0.076923,0.328244,0.166667,0.1875,0.0,0.0,0.0,0.4,...,0,0,1,1,0,0,1,0,0,1
4,0.45,1,0.0,0.381679,0.0,0.0875,0.0,0.0,0.0,0.266667,...,0,1,0,1,0,0,1,0,0,1


In [8]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pickle
import sys
import os

# Hyperparameters
N_INPUTS = 67

In [9]:
# Pull out validation (15%) and test (15%) data from training (70%) data
df_valid_test = df.sample(frac=0.3, random_state=0xda)
df_training = df.drop(df_valid_test.index)

df_valid = df_valid_test.sample(frac=0.5, random_state=0xdb)
df_test = df_valid_test.drop(df_valid.index, axis=0)

df_training.shape[0], df_valid.shape[0], df_test.shape[0]

(69538, 14901, 14901)

In [10]:
class TrainingBatches:

    def __init__(self, nn, batch_size=16):
        self.nn = nn
        self.batch_size = batch_size
        self.bounds = max([d.shape[0] for d in nn.training_data])
        self.it = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self.it >= self.bounds:
            raise StopIteration

        output = np.concatenate([self.getSlice(d, self.it, self.batch_size) for d in self.nn.training_data])
        self.it += self.batch_size
        return output[:, :N_INPUTS], output[:, N_INPUTS:]

    # Code must work with Python 2 and 3
    next = __next__

    def getSlice(self, array, start, length):
        start = start % array.shape[0]
        if start + length >= array.shape[0]:
            return np.concatenate((array[start:], array[:(start + length) % array.shape[0]]), axis=0)
        else:
            return array[start:start + length]

In [11]:
class CategoricalPreprocessor:
    
    def __init__(self, df, output_categories):
        self.output_categories = output_categories
        self.df = df
        self.is_modified = False
        
    def modifyDatafile(self):
        
        if self.is_modified:
            return self.df
        
        self.df = self.df.copy()
        self.is_modified = True
        
        if self.output_categories == "any":
            self.df["OUTPUT_ANY"] = self.df["OUTPUT_<30"] + self.df["OUTPUT_>30"]
            self.df.drop(["OUTPUT_<30", "OUTPUT_>30"], axis=1, inplace=True)

        elif self.output_categories == "rapid":
            self.df["OUTPUT_NO"] = self.df["OUTPUT_>30"] + self.df["OUTPUT_NO"]
            self.df.drop(["OUTPUT_>30"], axis=1, inplace=True)
            
        return self.df
    
    def getArraysByOutput(self):
        
        if not self.is_modified:
            self.modifyDatafile()
            
        if self.output_categories == "three":
            return (
                self.df[self.df["OUTPUT_<30"] == 1].to_numpy(),
                self.df[self.df["OUTPUT_>30"] == 1].to_numpy(),
                self.df[self.df["OUTPUT_NO"] == 1].to_numpy()
            )

        elif self.output_categories == "any":
            return (
                self.df[self.df["OUTPUT_ANY"] == 1].to_numpy(),
                self.df[self.df["OUTPUT_NO"] == 1].to_numpy()
            )

        else:
            return (
                self.df[self.df["OUTPUT_<30"] == 1].to_numpy(),
                self.df[self.df["OUTPUT_NO"] == 1].to_numpy()
            )

In [12]:
class SparseDense(tf.keras.layers.Dense):
    
    def __init__(self,
            units, activation=None, use_bias=True,
            kernel_initializer='glorot_uniform', bias_initializer='zeros',
            kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None,
            kernel_constraint=None, bias_constraint=None, **kwargs):
        
        super(SparseDense, self).__init__(
            units, activation, use_bias, kernel_initializer, bias_initializer,
            kernel_regularizer, bias_regularizer, activity_regularizer,
            kernel_constraint, bias_constraint, **kwargs)
            
        self.sparsity_matrix_initializer = tf.keras.initializers.Ones
        
    def build(self, input_shape):
        super(SparseDense, self).build(input_shape)
        self.sparsity_matrix_tensor = self.add_weight(
            name="sparsity_matrix",
            shape=self.kernel.shape,
            initializer=self.sparsity_matrix_initializer,
            regularizer=None,
            constraint=None,
            dtype=self.dtype,
            trainable=False)
        self.sparsity_matrix = np.ones(self.kernel.shape)
        
    def call(self, inputs):
        
        # Mask kernel with sparsity matrix
        #masked_kernel = tf.keras.backend.multiply(self.kernel, self.sparsity_matrix)
        masked_kernel = self.kernel * self.sparsity_matrix
        output = tf.keras.backend.dot(inputs, masked_kernel)
        return tf.keras.backend.bias_add(output, self.bias, data_format="channels_last")
    
    def makeSparse(self, p):
        
        kernel = tf.keras.backend.get_value(self.kernel)
        
        # Remove some percentage of the remaining weights
        items = []
        for row in range(kernel.shape[0]):
            for col in range(kernel.shape[1]):
                if self.sparsity_matrix[row][col] > 0:
                    items.append((row, col, kernel[row][col]))
                    
        items.sort(key=lambda x: abs(x[2]))
        num_to_remove = int(np.ceil(len(items) * p))
        for row, col, val in items[:num_to_remove]:
            self.sparsity_matrix[row][col] = 0
            
        tf.keras.backend.set_value(self.sparsity_matrix_tensor, self.sparsity_matrix)
        
        return 1 - float(len(items) - num_to_remove) / int(self.kernel.shape[0] * self.kernel.shape[1])

In [13]:
class NeuralNetwork:

    def __init__(self, df, df_validation, df_testing, layer_sizes, output_categories, dropout, batch_size):
        self.dropout = dropout
        self.output_categories = output_categories
        self.batch_size = batch_size
        self.layer_sizes = layer_sizes
        
        # Parameter to horizontally compress the sigmoid activation
        self.activation_param = 1.0
        
        self.sparsity = 0.0
        
        # Prepare training data
        training_processor = CategoricalPreprocessor(df, output_categories)
        self.training_data = training_processor.getArraysByOutput()
        
        # Prepare training batches
        training_in_out = list(iter(TrainingBatches(self, 32)))
        self.training_in = np.concatenate([i for i, o in training_in_out])
        self.training_out = np.concatenate([o for i, o in training_in_out])
        
        # Prepare validation data
        validation_processor = CategoricalPreprocessor(df_validation, output_categories)
        self.validation_data = validation_processor.modifyDatafile().to_numpy()
        self.validation_in = self.validation_data[:, :N_INPUTS]
        self.validation_out = self.validation_data[:, N_INPUTS:]
        
        with open("data/validation-{}.pickle".format(output_categories), "wb") as output_file:
            pickle.dump({"in": self.validation_in, "out": self.validation_out}, output_file)
        
        # Prepare testing data
        testing_processor = CategoricalPreprocessor(df_testing, output_categories)
        self.testing_data = testing_processor.modifyDatafile().to_numpy()
        self.testing_in = self.testing_data[:, :N_INPUTS]
        self.testing_out = self.testing_data[:, N_INPUTS:]
        
        with open("data/testing-{}.pickle".format(output_categories), "wb") as output_file:
            pickle.dump({"in": self.testing_in, "out": self.testing_out}, output_file)
        
        self.classifier = self.makeClassifier()
        
        
    def parametrizedSigmoid(self):
        
        def sigmoid(x):
            # In the original Tensorflow code, x is a Tensor
            # For efficiency, x will be scalar-multiplied by the sigmoid
            # parameter and then passed to tf.math.sigmoid
            scaled = tf.math.scalar_mul(self.activation_param, x)
            return tf.math.sigmoid(scaled)
        
        return sigmoid
    
    
    def makeClassifier(self):
        
        self.sparse_layers = []
        
        # Create layers
        classifier = tf.keras.Sequential()
        
        sparse_layer = SparseDense(self.layer_sizes[0], kernel_initializer="random_normal", input_dim=N_INPUTS)
        self.sparse_layers.append(sparse_layer)
        classifier.add(sparse_layer)
        classifier.add(tf.keras.layers.Activation(self.parametrizedSigmoid()))
        classifier.add(tf.keras.layers.Dropout(self.dropout))
        
        for size in self.layer_sizes[1:]:
            sparse_layer = tf.keras.layers.Dense(size, kernel_initializer="random_normal")
            #self.sparse_layers.append(sparse_layer)
            classifier.add(sparse_layer)
            classifier.add(tf.keras.layers.Activation(self.parametrizedSigmoid()))
            classifier.add(tf.keras.layers.Dropout(self.dropout))
            
        sparse_layer = tf.keras.layers.Dense(len(self.training_data), kernel_initializer="random_normal")
        #self.sparse_layers.append(sparse_layer)
        classifier.add(sparse_layer)
        classifier.add(tf.keras.layers.Softmax())
            
        #classifier.add(tf.keras.layers.Dense(
        #    len(self.training_data), activation="softmax", kernel_initializer="random_normal"))
        
        # Compile model
        classifier.compile(optimizer="adam", loss="binary_crossentropy",
                                metrics=["accuracy", tf.keras.metrics.AUC(name="auc")])
        
        return classifier
        
        
    def train(self, epochs=20):
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor="val_accuracy", mode="max", patience=5, restore_best_weights=False)
        
        class Logger(tf.keras.callbacks.Callback):
            
            def __init__(self, sparsity, steepness):
                self.sparsity = sparsity
                self.steepness = steepness
                self.epoch_number = 1
            
            def on_epoch_end(self, epoch, logs={}):
                stats = ", ".join(["{} = {:.3f}".format(k, v) for k, v in logs.items()])
                print("Epoch {} (Sparsity = {}, Steepness = {}): {}".format(
                        self.epoch_number, self.sparsity, self.steepness, stats))
                self.epoch_number += 1
                
        logger = Logger(self.sparsity, self.activation_param)
        
        return self.classifier.fit(self.training_in, self.training_out,
                                   validation_data=(self.validation_in, self.validation_out),
                                   batch_size=self.batch_size,
                                   epochs=epochs,
                                   callbacks=[early_stopping, logger], verbose=0)
    
    
    def evaluate(self):
        
        def confusionMatrix(predicted, actual):
            
            matrix = np.zeros((actual.shape[1], actual.shape[1]))
            predicted_encodings = [np.where(r == np.max(r))[0][0] for r in predicted]
            actual_encodings = [np.where(r == np.max(r))[0][0] for r in actual]
            
            for predicted_int, actual_int in zip(predicted_encodings, actual_encodings):
                matrix[actual_int][predicted_int] += 1
                
            return matrix
        
        output = {}
        sys.stdout = open(os.devnull, 'w')
        
        output["validation_eval"] = self.classifier.evaluate(self.validation_in, self.validation_out, 512)
        output["testing_eval"] = self.classifier.evaluate(self.testing_in, self.testing_out, 512)
        output["validation_confusion"] = confusionMatrix(self.classifier.predict(self.validation_in, 512), self.validation_out)
        output["testing_confusion"] = confusionMatrix(self.classifier.predict(self.testing_in, 512), self.testing_out)
        
        sys.stdout = sys.__stdout__
        
        return output

    
    def steepen(self):
        # Preserve old parameters
        params = self.getParams()
        
        # Construct a new model
        self.activation_param *= 2
        self.classifier = self.makeClassifier()
        
        # Apply old parameters
        self.setParams(params)
    
    
    def configStr(self):
        return "Layers: {}; Output Categories: {}; Dropout: {}; Batch Size: {}".format(
            self.layer_sizes, self.output_categories, self.dropout, self.batch_size)
    
    
    def getParams(self):
        return [layer.get_weights() for layer in self.classifier.layers if isinstance(layer, tf.keras.layers.Dense)]
    
    
    def setParams(self, params):
        layers = [layer for layer in self.classifier.layers if isinstance(layer, tf.keras.layers.Dense)]
        for layer, param in zip(layers, params):
            layer.set_weights(param)
    
    
    def makeSparse(self, p):
        self.sparsity = [layer.makeSparse(p) for layer in self.sparse_layers]
        return self.sparsity

In [14]:
HYPERPARAMETERS = {
    "df": df_training.copy(),
    "df_validation": df_valid.copy(),
    "df_testing": df_test.copy(),
    "layer_sizes": (70,),
    "output_categories": "rapid",
    "dropout": 0.1,
    "batch_size": 64}

def newNeuralNetwork():
    return NeuralNetwork(**HYPERPARAMETERS)

In [15]:
class SparseTraining:
    
    
    ## Dummy class to make data more structured and break up logical flow of process
    class Cycle:
        
        def __init__(self, nn):
            
            self.output = {}
            
            ## Train until the accuracy of the validation data decays
            self.output["training_history"] = nn.train(100).history
            
            ## Increase the sparsity (reduce number of weights by 20%)
            self.output["final_sparsity"] = nn.makeSparse(0.2)
            
            ## Store final weights and biases and sparsity matrix
            self.output["final_params"] = nn.getParams()
            
            ## Store final evaluation after increase of sparsity
            self.output["final_eval"] = nn.evaluate()
            
            
        def steepen(self):
            
            ## Create a new neural network and initialize to old parameters
            nn = newNeuralNetwork()
            nn.setParams(self.output["final_params"])
            nn.sparsity = self.output["final_sparsity"]
            
            self.output["steepening"] = []
            
            ## Steepen the activation function and train
            for i in range(9):
                nn.steepen()
                training_history = nn.train(100).history
                self.output["steepening"].append({
                    "training_history": training_history,
                    "final_params": nn.getParams(),
                    "final_eval": nn.evaluate(),
                    "steepness": nn.activation_param
                })
        
        
        def serialize(self):
            return self.output
        
    
    def __init__(self, nn):
        
        self.nn_config = nn.classifier.get_config()
        
        # Train 30 cycles until sparsity is approx. 99.9%
        self.results = []
        for i in range(30):
            cycle = SparseTraining.Cycle(nn)
            cycle.steepen()
            self.results.append(cycle)
            
            with open("sparsity_pickles/sparsity-checkpoint-{}.pickle".format(i), "wb") as output_file:
                pickle.dump(cycle.serialize(), output_file)
        
        
    def serialize(self):
        
        return {
            "results": [cycle.serialize() for cycle in self.results],
            "hyperparameters": {
                "layer_sizes": HYPERPARAMETERS["layer_sizes"],
                "output_categories": HYPERPARAMETERS["layer_sizes"],
                "dropout": HYPERPARAMETERS["layer_sizes"],
                "batch_size": HYPERPARAMETERS["layer_sizes"]},
            "nn_config": self.nn_config}

In [16]:
import os
if not os.path.exists("sparsity_pickles"):
    os.makedirs("sparsity_pickles")
else:
    raise ValueError("The output directory has not been vacated!")

In [17]:
nn = newNeuralNetwork()
sparsity_trainer = SparseTraining(nn)

Epoch 1 (Sparsity = 0.0, Steepness = 1.0): loss = 0.677, accuracy = 0.574, auc = 0.603, val_loss = 0.677, val_accuracy = 0.592, val_auc = 0.612
Epoch 2 (Sparsity = 0.0, Steepness = 1.0): loss = 0.664, accuracy = 0.594, auc = 0.633, val_loss = 0.702, val_accuracy = 0.560, val_auc = 0.562
Epoch 3 (Sparsity = 0.0, Steepness = 1.0): loss = 0.661, accuracy = 0.599, auc = 0.640, val_loss = 0.641, val_accuracy = 0.658, val_auc = 0.714
Epoch 4 (Sparsity = 0.0, Steepness = 1.0): loss = 0.660, accuracy = 0.602, auc = 0.643, val_loss = 0.659, val_accuracy = 0.624, val_auc = 0.670
Epoch 5 (Sparsity = 0.0, Steepness = 1.0): loss = 0.660, accuracy = 0.602, auc = 0.643, val_loss = 0.694, val_accuracy = 0.563, val_auc = 0.588
Epoch 6 (Sparsity = 0.0, Steepness = 1.0): loss = 0.659, accuracy = 0.602, auc = 0.645, val_loss = 0.681, val_accuracy = 0.583, val_auc = 0.620
Epoch 7 (Sparsity = 0.0, Steepness = 1.0): loss = 0.658, accuracy = 0.604, auc = 0.647, val_loss = 0.655, val_accuracy = 0.617, val_auc 

In [18]:
import pickle
with open("sparsity_pickles/sparsity.pickle", "wb") as output_file:
    pickle.dump(sparsity_trainer.serialize(), output_file)