In [2]:
# perceptron in scikit with single tlu network

import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
X = iris.data[:, (2, 3)] # petal length and width
y = (iris.target == 0).astype(np.int)

per_clf = Perceptron()
per_clf.fit(X, y)
y_pred = per_clf.predict([[2, .5]])
print(y_pred)
# perceptron does not output a class probability like logistic regression
# but uses a hard threshold classification

[0]


In [3]:
# we use nonlinear activations functions in mlps like sigmoid, hyperbolic tangent, relu because
# linear transformations of multiple linear functions are still linear. by nesting the
# above activation functions in each layer, you can basically approximate any continuous function

# a typical regression mlp follows this format
# input neurons : one per feature
# hidden layers: typically 1 to 5
# neurons per hidden layer : typically 10 to 100
# output neurons : 1 per prediction dimension
# hidden activation : ReLU
# output activation: None, or relu/softplus if positive only outputs or logistic/tanh if bounded outputs
# loss function: MSE or MAE/Huber if outliers


# classification mlp
# input and hidden layers: same as regression
# output neurons: 1 for binary classification, 1 per label for multilabel binary, 1 per class for multiclass
# output layer activation: logistic for binary and multilabel binary, softmax for multiclass
# loss function: cross entropy
# use softmax for multiclass because if the classes are exclusive the sum of all 
# output neurons will be 1. for multilabel and binary classes, you can you log because they may not be exclusive



In [6]:
# tensorflow and keras time

import tensorflow as tf
from tensorflow import keras

print(tf.__version__)
print(keras.__version__)

2.7.0
2.7.0


In [8]:
# now, we will build an image classifier

fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [11]:
# X_train_full.shape
# X_train_full.dtype

# split train set into validation and train / scale pixel intensities by dividing by 255.0

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

# get the class names
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print(class_names[y_train[0]])

Coat


In [12]:
# now build the neural net. classification mlp with 2 hidden layers

model = keras.models.Sequential() # create sequential model
model.add(keras.layers.Flatten(input_shape=[28, 28])) # convert image into 1d array
model.add(keras.layers.Dense(300, activation='relu')) # dense layer, 300 neurons, relu activation
model.add(keras.layers.Dense(100, activation='relu')) # dense layer, 100 neurons, relu
model.add(keras.layers.Dense(10, activation='softmax')) # 10 output neurons, softmax because exclusivity of class
# each dense layer manages its own weight matrix containing all connection weights between the neurons and
# their inputs. it also manages a vector of bias terms (one per neuron)

In [13]:
# instead of adding layers one by one, we could just do it this way
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300, activation='relu'),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])


In [14]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 300)               235500    
                                                                 
 dense_4 (Dense)             (None, 100)               30100     
                                                                 
 dense_5 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
_________________________________________________________________


In [15]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy',
             optimizer='sgd',
             metrics=['accuracy'])
# use sparse categorical crossentropy because we have sparse labels