# Implementing a neural network in Python

In this project you will learn to build a neural network in Python that can predict
the phases of the Ising model.  
  
The network architecture we propose will consist of an input layer with $L \times L$ neurons,
two hidden layers and an output layer of two neurons. For the output layer we interpret the two neurons as the probability of being in class 0 or 1. This architecture can be easily extended to classifying multiple categories.

In [1]:
%load_ext blackcellmagic

In [2]:
import pickle
import numpy as np

ising_data = pickle.load(open("../data/ising/Ising2DFM_reSample_L40_T=All.pkl", "rb"))
ising_data = np.unpackbits(ising_data).reshape(-1, 1600)
ising_data = ising_data.astype("int")
ising_data[np.where(ising_data == 0)] = -1

T = np.linspace(0.25, 4.0, 16)
T_C = 2.26
labels = np.ones((16, 10000), dtype=np.int8)

for i, T_ in enumerate(T):
    if T_ > T_C:
        labels[i] = np.zeros(10000, dtype=np.int8)
labels = labels.reshape(16 * 10000)


In [3]:
from sklearn.model_selection import train_test_split

train_to_test_ratio = 0.8
X_train, X_test, Y_train, Y_test = train_test_split(ising_data, labels, train_size=train_to_test_ratio)




In [4]:
from scipy.special import expit
from tqdm import tqdm

class BinaryNeuralNetwork:
    def __init__(
        self,
        X_data,
        Y_data,
        epochs=1,
        batch_size=100,
        eta=1e-2,
        lmbd=0.0,
        n_neurons_layer1=100,
        n_neurons_layer2=50,
        n_categories=2,
    ):
        self.X_data_full = X_data
        self.Y_data_full = Y_data

        self.n_inputs = X_data.shape[0]
        self.n_features = X_data.shape[1]
        self.n_neurons_layer1 = n_neurons_layer1
        self.n_neurons_layer2 = n_neurons_layer2
        self.n_categories = n_categories

        self.epochs = epochs
        self.batch_size = batch_size
        self.iterations = self.n_inputs // self.batch_size
        self.eta = eta
        self.lmbd = lmbd

        self.create_biases_and_weights()

    def create_biases_and_weights(self):
        self.weights_layer1 = np.random.randn(self.n_features, self.n_neurons_layer1)
        self.bias_layer1 = np.zeros(self.n_neurons_layer1)

        self.weights_layer2 = np.random.randn(self.n_neurons_layer1, self.n_neurons_layer2)
        self.bias_layer2 = np.zeros(self.n_neurons_layer2)

        self.weights_output = np.random.randn(self.n_neurons_layer2, self.n_categories)
        self.bias_output = np.zeros(self.n_categories)

    def feed_forward(self):
        self.z1 = np.dot(self.X_data, self.weights_layer1) + self.bias_layer1
        self.a1 = expit(self.z1)

        self.z2 = np.dot(self.a1, self.weights_layer2) + self.bias_layer2
        self.a2 = expit(self.z2)

        self.z3 = np.dot(self.a2, self.weights_output) + self.bias_output

        exp_term = np.exp(self.z3)
        self.probabilities = exp_term / np.sum(exp_term, axis=1, keepdims=True)

    def feed_forward_out(self, X):
        z1 = np.dot(X, self.weights_layer1) + self.bias_layer1
        a1 = expit(z1)

        z2 = np.dot(a1, self.weights_layer2) + self.bias_layer2
        a2 = expit(z2)

        z3 = np.dot(a2, self.weights_output) + self.bias_output

        exp_term = np.exp(z3)
        probabilities = exp_term / np.sum(exp_term, axis=1, keepdims=True)
        return probabilities

    def backpropagation(self):
        error_output = self.probabilities
        error_output[range(self.n_inputs), self.Y_data] -= 1
        error_layer2 = (np.dot(error_output, self.weights_output.T) * self.a2 * (1 - self.a2))
        error_layer1 = (np.dot(error_layer2, self.weights_layer2.T) * self.a1 * (1 - self.a1))

        self.weights_output_gradient = np.dot(self.a2.T, error_output)
        self.bias_output_gradient = np.sum(error_output)

        self.weights_layer2_gradient = np.dot(self.a1.T, error_layer2)
        self.bias_layer2_gradient = np.sum(error_layer2)

        self.weights_layer1_gradient = np.dot(self.X_data.T, error_layer1)
        self.bias_layer1_gradient = np.sum(error_layer1)

        if self.lmbd > 0.0:
            self.weights_output_gradient += self.lmbd * self.weights_output
            self.weights_layer2_gradient += self.lmbd * self.weights_layer2
            self.weights_layer1_gradient += self.lmbd * self.weights_layer1

        self.weights_output -= self.eta * self.weights_output_gradient
        self.bias_output -= self.eta * self.bias_output_gradient
        self.weights_layer2 -= self.eta * self.weights_layer2_gradient
        self.bias_layer2 -= self.eta * self.bias_layer2_gradient
        self.weights_layer1 -= self.eta * self.weights_layer1_gradient
        self.bias_layer1 -= self.eta * self.bias_layer1_gradient

    def predict(self, X):
        probabilities = self.feed_forward_out(X)
        return np.argmax(probabilities, axis=1)

    def predict_probabilities(self, X):
        probabilities = self.feed_forward_out(X)
        return probabilities

    def train(self):
        data_indices = np.arange(self.n_inputs)

        for i in range(self.epochs):
            for j in tqdm(range(self.iterations)):
                chosen_datapoints = np.random.choice(
                    data_indices, size=self.batch_size, replace=False
                )

                self.X_data = self.X_data_full[chosen_datapoints]
                self.Y_data = self.Y_data_full[chosen_datapoints]

                self.n_inputs = self.X_data.shape[0]

                self.feed_forward()
                self.backpropagation()


In [5]:
from sklearn.metrics import accuracy_score

epochs = 1
batch_size = 100
n_neurons_layer1 = 100
n_neurons_layer2 = 50
n_categories = 2

eta_vals = np.logspace(-5, 0, 6)
lmbd_vals = np.logspace(-5, 0, 6)

In [6]:
DNN_numpy = np.zeros((len(eta_vals), len(lmbd_vals)), dtype=object)

for i, eta in enumerate(eta_vals):
    for j, lmbd in enumerate(lmbd_vals):
        dnn = BinaryNeuralNetwork(X_train, Y_train, eta=eta, lmbd=lmbd, epochs=1, batch_size=batch_size,
                                  n_neurons_layer1=n_neurons_layer1, n_neurons_layer2=n_neurons_layer2,
                                  n_categories=n_categories)
        dnn.train()
        
        DNN_numpy[i][j] = dnn
        
        test_predict = dnn.predict(X_test)
        
        print("Learning rate  = ", eta)
        print("Lambda = ", lmbd)
        print("Accuracy score on test set: ", accuracy_score(Y_test, test_predict))

100%|██████████| 1280/1280 [00:05<00:00, 237.96it/s]
  2%|▏         | 23/1280 [00:00<00:05, 226.92it/s]

Learning rate  =  1e-05
Lambda =  1e-05
Accuracy score on test set:  0.740875


100%|██████████| 1280/1280 [00:05<00:00, 245.51it/s]
  2%|▏         | 23/1280 [00:00<00:05, 228.36it/s]

Learning rate  =  1e-05
Lambda =  0.0001
Accuracy score on test set:  0.7395625


100%|██████████| 1280/1280 [00:05<00:00, 236.69it/s]
  2%|▏         | 23/1280 [00:00<00:05, 226.62it/s]

Learning rate  =  1e-05
Lambda =  0.001
Accuracy score on test set:  0.76428125


100%|██████████| 1280/1280 [00:05<00:00, 243.33it/s]
  2%|▏         | 25/1280 [00:00<00:05, 249.19it/s]

Learning rate  =  1e-05
Lambda =  0.01
Accuracy score on test set:  0.76409375


100%|██████████| 1280/1280 [00:05<00:00, 239.60it/s]
  2%|▏         | 24/1280 [00:00<00:05, 233.08it/s]

Learning rate  =  1e-05
Lambda =  0.1
Accuracy score on test set:  0.7284375


100%|██████████| 1280/1280 [00:05<00:00, 237.68it/s]
  2%|▏         | 20/1280 [00:00<00:06, 193.17it/s]

Learning rate  =  1e-05
Lambda =  1.0
Accuracy score on test set:  0.76640625


100%|██████████| 1280/1280 [00:05<00:00, 235.79it/s]
  2%|▏         | 26/1280 [00:00<00:04, 253.57it/s]

Learning rate  =  0.0001
Lambda =  1e-05
Accuracy score on test set:  0.90428125


100%|██████████| 1280/1280 [00:06<00:00, 196.00it/s]
  2%|▏         | 26/1280 [00:00<00:04, 253.89it/s]

Learning rate  =  0.0001
Lambda =  0.0001
Accuracy score on test set:  0.88984375


100%|██████████| 1280/1280 [00:05<00:00, 231.18it/s]
  2%|▏         | 26/1280 [00:00<00:04, 255.57it/s]

Learning rate  =  0.0001
Lambda =  0.001
Accuracy score on test set:  0.90571875


100%|██████████| 1280/1280 [00:05<00:00, 248.58it/s]
  2%|▏         | 26/1280 [00:00<00:04, 255.76it/s]

Learning rate  =  0.0001
Lambda =  0.01
Accuracy score on test set:  0.90315625


100%|██████████| 1280/1280 [00:05<00:00, 241.19it/s]
  2%|▏         | 24/1280 [00:00<00:05, 232.28it/s]

Learning rate  =  0.0001
Lambda =  0.1
Accuracy score on test set:  0.90128125


100%|██████████| 1280/1280 [00:05<00:00, 237.34it/s]
  2%|▏         | 26/1280 [00:00<00:04, 253.14it/s]

Learning rate  =  0.0001
Lambda =  1.0
Accuracy score on test set:  0.91278125


100%|██████████| 1280/1280 [00:05<00:00, 231.34it/s]
  2%|▏         | 26/1280 [00:00<00:05, 230.71it/s]

Learning rate  =  0.001
Lambda =  1e-05
Accuracy score on test set:  0.98125


100%|██████████| 1280/1280 [00:05<00:00, 236.91it/s]
  2%|▏         | 24/1280 [00:00<00:05, 237.24it/s]

Learning rate  =  0.001
Lambda =  0.0001
Accuracy score on test set:  0.97765625


100%|██████████| 1280/1280 [00:05<00:00, 224.73it/s]
  2%|▏         | 26/1280 [00:00<00:04, 252.46it/s]

Learning rate  =  0.001
Lambda =  0.001
Accuracy score on test set:  0.97790625


100%|██████████| 1280/1280 [00:05<00:00, 241.52it/s]
  2%|▏         | 25/1280 [00:00<00:05, 248.12it/s]

Learning rate  =  0.001
Lambda =  0.01
Accuracy score on test set:  0.976125


100%|██████████| 1280/1280 [00:05<00:00, 245.31it/s]
  2%|▏         | 26/1280 [00:00<00:04, 253.48it/s]

Learning rate  =  0.001
Lambda =  0.1
Accuracy score on test set:  0.98146875


100%|██████████| 1280/1280 [00:05<00:00, 245.89it/s]
  2%|▏         | 26/1280 [00:00<00:04, 253.23it/s]

Learning rate  =  0.001
Lambda =  1.0
Accuracy score on test set:  0.98978125


100%|██████████| 1280/1280 [00:06<00:00, 207.59it/s]
  2%|▏         | 22/1280 [00:00<00:05, 219.56it/s]

Learning rate  =  0.01
Lambda =  1e-05
Accuracy score on test set:  0.989625


100%|██████████| 1280/1280 [00:05<00:00, 226.13it/s]
  2%|▏         | 23/1280 [00:00<00:05, 222.48it/s]

Learning rate  =  0.01
Lambda =  0.0001
Accuracy score on test set:  0.99053125


100%|██████████| 1280/1280 [00:05<00:00, 222.47it/s]
  2%|▏         | 25/1280 [00:00<00:05, 244.37it/s]

Learning rate  =  0.01
Lambda =  0.001
Accuracy score on test set:  0.9893125


100%|██████████| 1280/1280 [00:05<00:00, 241.75it/s]
  2%|▏         | 26/1280 [00:00<00:04, 252.63it/s]

Learning rate  =  0.01
Lambda =  0.01
Accuracy score on test set:  0.99125


100%|██████████| 1280/1280 [00:05<00:00, 237.67it/s]
  2%|▏         | 26/1280 [00:00<00:05, 250.19it/s]

Learning rate  =  0.01
Lambda =  0.1
Accuracy score on test set:  0.9931875


100%|██████████| 1280/1280 [00:05<00:00, 234.57it/s]
  2%|▏         | 26/1280 [00:00<00:04, 252.62it/s]

Learning rate  =  0.01
Lambda =  1.0
Accuracy score on test set:  0.99140625


100%|██████████| 1280/1280 [00:05<00:00, 241.63it/s]
  2%|▏         | 21/1280 [00:00<00:06, 204.38it/s]

Learning rate  =  0.1
Lambda =  1e-05
Accuracy score on test set:  0.5643125


100%|██████████| 1280/1280 [00:05<00:00, 231.58it/s]
  2%|▏         | 20/1280 [00:00<00:06, 199.34it/s]

Learning rate  =  0.1
Lambda =  0.0001
Accuracy score on test set:  0.522625


100%|██████████| 1280/1280 [00:05<00:00, 237.85it/s]
  2%|▏         | 26/1280 [00:00<00:05, 249.95it/s]

Learning rate  =  0.1
Lambda =  0.001
Accuracy score on test set:  0.5644375


100%|██████████| 1280/1280 [00:05<00:00, 221.14it/s]
  2%|▏         | 20/1280 [00:00<00:06, 195.32it/s]

Learning rate  =  0.1
Lambda =  0.01
Accuracy score on test set:  0.986625


100%|██████████| 1280/1280 [00:05<00:00, 218.09it/s]
  2%|▏         | 25/1280 [00:00<00:05, 239.77it/s]

Learning rate  =  0.1
Lambda =  0.1
Accuracy score on test set:  0.56428125


100%|██████████| 1280/1280 [00:05<00:00, 250.67it/s]
  2%|▏         | 21/1280 [00:00<00:06, 202.48it/s]

Learning rate  =  0.1
Lambda =  1.0
Accuracy score on test set:  0.43571875


100%|██████████| 1280/1280 [00:06<00:00, 199.26it/s]
  2%|▏         | 22/1280 [00:00<00:05, 210.51it/s]

Learning rate  =  1.0
Lambda =  1e-05
Accuracy score on test set:  0.43571875


100%|██████████| 1280/1280 [00:07<00:00, 179.80it/s]
  2%|▏         | 23/1280 [00:00<00:05, 225.60it/s]

Learning rate  =  1.0
Lambda =  0.0001
Accuracy score on test set:  0.43571875


100%|██████████| 1280/1280 [00:06<00:00, 203.62it/s]
  1%|          | 7/1280 [00:00<00:22, 56.73it/s]

Learning rate  =  1.0
Lambda =  0.001
Accuracy score on test set:  0.43571875


100%|██████████| 1280/1280 [00:07<00:00, 178.96it/s]
  2%|▏         | 23/1280 [00:00<00:05, 228.59it/s]

Learning rate  =  1.0
Lambda =  0.01
Accuracy score on test set:  0.43571875


100%|██████████| 1280/1280 [00:05<00:00, 238.67it/s]
  1%|▏         | 19/1280 [00:00<00:07, 175.59it/s]

Learning rate  =  1.0
Lambda =  0.1
Accuracy score on test set:  0.43571875


100%|██████████| 1280/1280 [00:05<00:00, 235.41it/s]


Learning rate  =  1.0
Lambda =  1.0
Accuracy score on test set:  0.43571875


In [7]:
from sklearn.neural_network import MLPClassifier

DNN_scikit = np.zeros((len(eta_vals), len(lmbd_vals)), dtype=object)

for i, eta in enumerate(eta_vals):
    for j, lmbd in enumerate(lmbd_vals):
        dnn = MLPClassifier(hidden_layer_sizes=(n_neurons_layer1, n_neurons_layer2), activation='logistic',
                            alpha=lmbd, learning_rate_init=eta, max_iter=100)
        dnn.fit(X_train, Y_train)
        
        DNN_scikit[i][j] = dnn
        
        print("Learning rate  = ", eta)
        print("Lambda = ", lmbd)
        print("Accuracy score on test set: ", dnn.score(X_test, Y_test))



Learning rate  =  1e-05
Lambda =  1e-05
Accuracy score on test set:  0.99240625
Learning rate  =  1e-05
Lambda =  0.0001
Accuracy score on test set:  0.992125
Learning rate  =  1e-05
Lambda =  0.001
Accuracy score on test set:  0.99346875
Learning rate  =  1e-05
Lambda =  0.01
Accuracy score on test set:  0.9931875
Learning rate  =  1e-05
Lambda =  0.1
Accuracy score on test set:  0.99265625
Learning rate  =  1e-05
Lambda =  1.0
Accuracy score on test set:  0.9929375
Learning rate  =  0.0001
Lambda =  1e-05
Accuracy score on test set:  0.99384375
Learning rate  =  0.0001
Lambda =  0.0001
Accuracy score on test set:  0.99259375
Learning rate  =  0.0001
Lambda =  0.001
Accuracy score on test set:  0.9933125
Learning rate  =  0.0001
Lambda =  0.01
Accuracy score on test set:  0.99421875
Learning rate  =  0.0001
Lambda =  0.1
Accuracy score on test set:  0.99525
Learning rate  =  0.0001
Lambda =  1.0
Accuracy score on test set:  0.9934375
Learning rate  =  0.001
Lambda =  1e-05
Accuracy sc