In [16]:
#>1#(71524626):(71524626)
import scikitplot as skplt
import matplotlib.pyplot as plt
#<1#(71524626)~%(448899864) #>1#(85844285):(85844285)
from tensorflow.keras.losses import sparse_categorical_crossentropy
#<1#(85844285)~%(-1952786418) #>1#(33299708):(33299708)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
#<1#(33299708)~%(533338258) #>1#(16183209):(16183209)
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
#<1#(16183209)~%(696278483)
# Leave for imports

ModuleNotFoundError: No module named 'scikitplot'

Run the following !pip3 cells to install scikit-plot and tensorflow which are needed for this sandbox.

In [None]:
!pip3 install scikit-plot

In [None]:
!pip3 install tensorflow

Welcome to automating neural networks with Arctic Fox! Our focus of these automations is to make developing AI / ML solutions more akin to how we think about AI / ML - get data, build the network, train the model, and view the results. 

# Automations with Nueral Network Classifier


We all love AI! Well, most of us... And it's amazing how much easier it is to develop AI solutions today than in the past. But, what we found, is that a lot of times when we take on a new project, try to recycle an old notebook, etc. there's always more edits / refactoring than we thought. So, we figured it would be easier if a lot of it was automated!

To do this, we created 4 automations - Data, NeuralNetwork (NN), Train, and Visualize. We placed them each below, with some description, and pre-populated some parameters that worked for us. Play around with it and enjoy!

##### Note
The examples in codespace use simple csvs and neural networks due to processing power constraints. Automations exist for convolutional neural networks as well. However, for non-gpu environments, training is too slow to show meaningful examples. 

### Data, Load CSV

The first automation will load in the data. In this case, our source is a csv. Other times, the source could be a library in an imported source, a collection of images, etc. Additionally, we can tell it what to predict - we could tell it the name of a column or the column index. Our example uses the popular wine dataset, and since there are no headers, we give it the column index. 

Additionally, by providing column index 0, Arctic Fox will look at the data and determine that the desired model is a classifier. 

In [None]:
#[Data wine.csv --predict 0]#@16183209 #>1#(16183209):(16183209)
wine = pd.read_csv('wine.csv')
wineAllFeatures = wine.copy()

column0ToNumber = {
    1: 0, 2: 1, 3: 2
}

wineAllFeatures.iloc[:,0] = wineAllFeatures.iloc[:,0].apply(lambda cell : column0ToNumber[cell])

wineAllLabels = wineAllFeatures.iloc[:,0]
wineAllFeatures.drop(wineAllFeatures.columns[0], axis=1, inplace=True)
wineAllFeatures = np.array(wineAllFeatures)


numberToWineAllLabels = {
    0: 1, 1: 2, 2: 3
}

wineTrainingFeatures, wineTestFeatures, wineTrainingLabels, wineTestLabels = train_test_split(wineAllFeatures, wineAllLabels, test_size=0.2)

wineTrainingLabels = tf.one_hot(wineTrainingLabels, 3)
wineTestLabels = tf.one_hot(wineTestLabels, 3)
#<1#(16183209)~%(352366057)

### Build Neural Network

Next, we use the NeuralNetwork automation, or NN for short. We have provided some example parameters, but if you didn't have these, you could always just use #[NN --help] to see the available options. The start and end specify the number of nodes in the start and end (second to last) layers. DenseLayers specifies how many layers should be in the model. 

Play around with the parameters to see how the model changes.  

In [None]:
#[NN --denseLayers 20 --denseStart 96 --denseEnd 32 --dropOutRatio 1:5]#@33299708 #>1#(33299708):(33299708)
# Create the model
model = Sequential()

model.add(Dense(96, activation='relu', input_shape=(13,)))
model.add(Dense(90, activation='relu'))
model.add(Dense(84, activation='relu'))
model.add(Dense(79, activation='relu'))
model.add(Dense(75, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(70, activation='relu'))
model.add(Dense(66, activation='relu'))
model.add(Dense(62, activation='relu'))
model.add(Dense(58, activation='relu'))
model.add(Dense(55, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(52, activation='relu'))
model.add(Dense(49, activation='relu'))
model.add(Dense(46, activation='relu'))
model.add(Dense(43, activation='relu'))
model.add(Dense(40, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(38, activation='relu'))
model.add(Dense(36, activation='relu'))
model.add(Dense(34, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))
#<1#(33299708)~%(-543925969)

In [None]:
#>1#(33299708):(33299708)
# Compile the model
model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(),
    metrics=['accuracy']
    )
#<1#(33299708)~%(277325532)

In [None]:
#>1#(33299708):(33299708)
model.summary()
#<1#(33299708)~%(1975814818)

### Train Model


Pretty much the only thing to specify when training is how long to train, or the number of epochs. You can always re-run the training, or fit, cell to do more training. Train, retrain, keep training, that's all pretty normal. 

In [None]:
#[Train --epochs 300]#@85844285 #>1#(85844285):(85844285)
# Fit data to model
history = model.fit(
    wineTrainingFeatures,
    wineTrainingLabels,
    batch_size=1,
    epochs=300,
    verbose=1,
    validation_split=0.2
)
#<1#(85844285)~%(-1821562651)

In [None]:
#>1#(85844285):(85844285)
# Generate generalization metrics
score = model.evaluate(wineTestFeatures, wineTestLabels, verbose=0, batch_size=1)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')
#<1#(85844285)~%(-1738228711)

### Visualize

Finally, it's always import to visualize a model's performance. Seeing the training loss and accuracy evolve over time let's you get a feel fore how the model trained. We think for a classifier that the confusion matrix is one of the most telling visuals. 

In [None]:
#>1#(71524626):(71524626)
#***Info: Generating graph code for the accuracy and loss for training and validation results
#<1#(71524626)~%(0)
#[Visualize --loss --accuracy --confusionMatrix]#@71524626 #>1#(71524626):(71524626)
#***Plot history: Accuracy
plt.plot(history.history['accuracy'], color='blue', label='train')
plt.plot(history.history['val_accuracy'], color='orange', label='val')
plt.title('Train and Validation Accuracy History')
plt.ylabel('Accuracy (%)')
plt.xlabel('Epoch')
plt.legend()
plt.show()
#<1#(71524626)~%(126940084)

In [None]:
#>1#(71524626):(71524626)
#***Plot history: Loss
plt.plot(history.history['loss'], color='blue', label='train')
plt.plot(history.history['val_loss'], color='orange', label='val')
plt.title('Train and Validation Loss History')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()
#<1#(71524626)~%(-52552)

In [None]:
#>1#(71524626):(71524626)
#***Visualize Confusion Matrix for Test Data
confusionMatrixPrediction = model.predict(wineTestFeatures, batch_size=1)
confusionMatrixActual = [np.argmax(label) for label in wineTestLabels]
confusionMatrixPrediction = [np.argmax(label) for label in confusionMatrixPrediction]

confusionMatrixActual = [numberToWineAllLabels[label] for label in confusionMatrixActual]
confusionMatrixPrediction = [numberToWineAllLabels[label] for label in confusionMatrixPrediction]

skplt.metrics.plot_confusion_matrix(confusionMatrixActual, confusionMatrixPrediction, normalize=False, title = 'Confusion Matrix for wine.csv')
skplt.metrics.plot_confusion_matrix(confusionMatrixActual, confusionMatrixPrediction, normalize=True, title = 'Normalized Confusion Matrix for wine.csv')
#<1#(71524626)~%(1746290697)

That's a wrap on the basics of automating neural networks. The next sandbox makes a small change, and instantly your notebook is setup for regression. Check it out!