In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import expit

In [None]:
def gen_1Ddata(n_spins, n_sets):
    J = 1
    spins    = np.random.choice(np.array([-1,1],dtype=np.int8), size = (n_sets, n_spins))
    energies = np.sum(-J*spins*np.roll(spins,1, axis = 1), axis = 1)
    return np.einsum('...i,...j->...ij', spins, spins).reshape(n_sets,-1), energies

input_data, target_data = gen_1Ddata(40, 100)
input_data.shape, target_data.shape

In [None]:
from neuralnet import NeuralNet
def test_net(test_inputs, test_targets, net):
    cost = []
    
    for x,y in zip(test_inputs, test_targets):
        zs, outputs = net.feed_forward(x)
        cost.append(np.mean((outputs[-1] - y)**2))
    return np.array(cost)

def batches(inputs, targets, n_batches = 10):
    
    mask = np.arange(len(inputs), dtype = np.int64)
    np.random.shuffle(mask)
    inputs = inputs[mask]
    targets = targets[mask]
    
    if len(inputs) != len(targets):
        raise ValueError('length of inputs and targets must be equal')
    indx = np.linspace(0, len(inputs), n_batches + 1, dtype=int)
    for i in range(n_batches):
        batch = [(x,y) for x, y in zip(inputs[indx[i]:indx[i+1]], targets[indx[i]:indx[i+1]])] 
        yield batch
   

In [None]:
L = 40
n = L*L
N = 1000
J = 1

from sklearn.model_selection import train_test_split
data_input, data_targets = gen_1Ddata(n_spins = L, n_sets = N) 

temp = train_test_split(data_input, data_targets, test_size = 0.33)
input_train, input_test, target_train, target_test = temp

list(map(np.shape, temp))

find last delta:
$$ \delta_j^L = f'(z_j^L) \frac{\partial \mathcal{C}}{\partial a_j^L} $$

propagate deltas for each layer
$$ \delta_j^l = \sum_k \delta_k^{l+1} w_{kj}^{l+1} f'(z_j^l)$$

update weights and bias
$$w_{jk}^l \leftarrow w_{jk}^l - \eta \delta_j^l a_k^{l-1} $$
$$b_{j}^l \leftarrow b_{j}^l - \eta \delta_j^l  $$



we have

$$ \frac{\partial \mathcal{C}}{\partial a_j^L} = a_j^L - t_j, $$
and
$$ f'(z_j^l) = f(z_j^l)  (1 - f(z_j^l) $$
for sigmoid

In [None]:
layer_sizes = [1600, 1]
print(layer_sizes)

In [None]:
%load_ext line_profiler

In [None]:
from IPython.display import clear_output
#def test():
i = 0
net = NeuralNet(layer_sizes, act_func = ['identity'])
# net = NeuralNet(layer_sizes, act_func = ['identity'])

eta = 1e-3
mse = []

print('epoch, MSE')
for j in range(100):
    b = batches(input_train, target_train, n_batches = 99)
    for k, batch in enumerate(b):
        net.update_batch(batch, eta)

    mse.append(np.average(test_net(input_test, target_test, net)))
    print('{:5}  {:.2f}'.format(j, mse[-1]))#, 'o', markersize = 9)
    if np.isnan(mse[-1]) or np.any([np.any(np.isnan(w)) for w in net.weights]):
        print('NAN!!! Break! Abort mission!')
        break

$\in$

In [None]:
net.weights

In [None]:
mse[-1]

In [None]:
plt.plot(mse)

In [None]:
from nielsen_network import Network as nNetwork

net2 = nNetwork(layer_sizes)
net2.backprop(np.atleast_2d(x),np.atleast_2d(y))
# x,y = batch[0]

In [None]:
np.atleast_2d(x).shape

In [None]:
y

In [None]:
for i, batch in enumerate(batches(input_train, target_train, n_batches = 100)):
    break
    

In [None]:
x,y = batch[0]
x,y

In [None]:
net = NeuralNet(layer_sizes, act_func = ['sigmoid','identity'])

for k in range(20):
    for i, batch in enumerate(batches(input_train, target_train, n_batches = 100)):
        net.update_batch(batch, eta = 1e-2)

    plt.plot(k, np.average(test_net(input_test, target_test, net)), 'o', markersize = 10)

In [None]:
fig,[ax1,ax2] = plt.subplots(1,2, figsize = [8,4])
i = 43
test_out = np.array([net.feed_forward(inp)[1][-1] for inp in input_test]).squeeze()
ax1.scatter(test_out, target_test)
ax1.set_xlabel('predicted')
ax1.set_ylabel('target')

ax1.plot([-12,12],[-12,12])
ax1.axis('equal')


#ax2.hist(test_out,  alpha = 0.5)
#ax2.hist(target_test,  alpha = 0.5);

In [None]:
grad_b, grad_w = net.backpropagate(x,y)

In [None]:
a = np.arange(10)
b = a - 5
a,b

In [None]:
# def relu(x):
#     return np.maximum(0, x)


# def der_expit(x=None, act=None):
#     if x is None:
#         return act*(1-act)
#     elif act is None:
#         return expit(x)*(1-expit(x))
#     else:
#         raise ValueError('must include either x or act')

# def der_tanh(x = None, act = None):
#     if x is None:
#         return 1 - act**2
#     elif act is None:
#         return 1 - np.tanh(x)**2
#     else:
#         raise ValueError('must include either x or act')


# activation_choices = {'sigmoid':expit,'tanh':np.tanh}
# act_derivatives = {'sigmoid':der_expit,'tanh':der_tanh}

# activation = 'tanh'
# act_func = activation_choices[activation]
# act_deriv = act_derivatives[activation]

In [None]:
for x,y in zip(a,b):
    print(x,y)

In [None]:
# def forward(inp):
#     outs = []
#     activations = []
#     z = inp
#     act = act_func(inp)

    
#     for i,n in enumerate(n_nodes[:-1]):
#         z = weights[i] @ np.append(act, 1)  # add a bias
#         act = act_func(z)

#         outs.append(z)
#         activations.append(act)
#     return outs, activations
    
# N_layers = 3
# n_nodes = [20, 10, 5, 1] # input, hidden, ..., hidden, output
# L = len(n_nodes)-2
# eta = 1e-2

# small = 1e-2
# weights = [small*np.random.random(size = (n,m+1)) for n,m in zip(n_nodes[1:], n_nodes[:-1])]
# biases = [small*np.random.random(size = n) for n in n_nodes[1:]]
    
# for inp, target in zip(train_inp, train_targets):
#     outs,activations = forward(inp)

#     deltas = [np.zeros(n) for n in n_nodes[1:]]
#     # print('hei0', list(map(np.shape,deltas)))
#     deltas[L] = act_deriv(outs[-1])*(activations[-1] - target)

#     for i in range(1,L+1):
#         l = L-i
#         deltas[l] = np.einsum('k,kj,j->j', deltas[l+1], weights[l+1][:,:-1],act_deriv(outs[l]))
        
#     for i in range(0,L):
#         l = L-i
#         weights[l] = weights[l] - eta * np.outer(deltas[l], np.append(activations[l-1], 1))

In [None]:
np.c_