$$
\begin{aligned}
    &\textbf{1: Load necessary libraries}\\
    &\textbf{2: Load Dataset}\\
    &\textbf{2.1: Read in dataset from location}\\
    &\textbf{2.2: Format dataset into tensors}
\end{aligned}
$$

In [16]:
# import libraries
import torch
from torch import nn
import matplotlib.pyplot as plt


In [17]:
# create data
# Input data. Next to the input is the actual output in a comment
x_train = torch.Tensor([[0., 0.],  # 0
                        [0., 1.],  # 1
                        [1., 0.],  # 1
                        [1., 1.]])  # 0

# Output data
y_train = torch.Tensor([0., 1., 1., 0.]).reshape(x_train.shape[0], 1)


$$
\begin{aligned}
    \textbf{------------------------- GPU STUFF BEGIN -------------------------}
\end{aligned}
$$

In [18]:
# Neural Network:
dim_input  = 2
dim_output = 1

model = nn.Sequential(nn.Linear(dim_input, 10),  # HL1 (input)
                      nn.ReLU(),                 # Activation function

                      nn.Linear(10,10),          # HL1 -> HL2
                      nn.ReLU(),                 # Activation function

                      nn.Linear(10,10),          # HL2 -> HL3
                      nn.ReLU(),                 # Activation function

                      nn.Linear(10, dim_output)  # HL3 -> OUTPUT
                      )

In [19]:
# HYPER PARAMETERS & PARAMETERS

# The learning rate
learning_rate = 0.05
# The loss function
loss_fn = torch.nn.MSELoss(reduction='sum')
# The optimizer (E.G.) What function we use to optimize our model
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# This is used to intialized the weights within our model, when nn.apply(this_fn)
# is called our, all the children from nn.children have this function applied
def init_normal(module):
    if type(module) == nn.Linear:
        nn.init.normal_(module.weight, mean=0, std=0.01)
        nn.init.zeros_(module.bias)

model.apply(init_normal)

# named_layers = dict(model.named_modules())

# print(f"{named_layers}")



Sequential(
  (0): Linear(in_features=2, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): ReLU()
  (6): Linear(in_features=10, out_features=1, bias=True)
)

In [20]:
# FORWARD PROPAGATION:

for t in range(200000):
    # Calculates the output (the y prediction), this propagates the data through our model
    y_pred = model(x_train)
    # Calculates the loss using our defined function loss_fn = Mean Squared Error
    loss = loss_fn(y_pred, y_train)

    if t % 1000 == 999:
        print(t, loss.item())

    # if our targeted loss value is below some delta then we break as our model is 'complete' I.E. done training
    # although we don't know how well our model actually works, could very well be overfit, not work on test data etc
    if abs(loss) < 1e-10:
        break

    optimizer.zero_grad()  # sets the gradients to 0 before running the backward pass
    loss.backward()       # the backward pass
    optimizer.step()


999 1.0
1999 1.0
2999 1.0
3999 1.0
4999 1.0
5999 1.0
6999 1.0
7999 1.0
8999 1.0
9999 1.0
10999 1.0
11999 1.0
12999 0.9999999403953552
13999 1.0
14999 1.0
15999 1.0
16999 0.9999999403953552
17999 0.9999999403953552
18999 1.0
19999 1.0
20999 0.9999999403953552
21999 1.0
22999 1.0
23999 1.0
24999 1.0
25999 1.0
26999 1.0
27999 1.0
28999 1.0
29999 0.9999999403953552
30999 0.9999999403953552
31999 0.9999999403953552
32999 0.9999999403953552
33999 0.9999999403953552
34999 0.9999999403953552
35999 0.9999999403953552
36999 0.9999998807907104
37999 0.9999998807907104
38999 0.9999998807907104
39999 0.9999998807907104
40999 0.9999998211860657
41999 0.9999999403953552
42999 0.9999998807907104
43999 0.9999998807907104
44999 0.9999998211860657
45999 0.9999998211860657
46999 0.9999998211860657
47999 0.9999997615814209
48999 0.9999997615814209
49999 0.9999997615814209
50999 0.9999997615814209
51999 0.9999997019767761
52999 0.9999996423721313
53999 0.9999995231628418
54999 0.9999995231628418
55999 0.999

In [21]:
# TESTING Training Data

# Looking at the output of our model
# Here we propagate forward our data and get a y predicted value

print("TEST INPUT AND OUTPUTS\n")
for i,data in enumerate(x_train):
    y_out = model(data)
    print(f"INPUT:           {data}")
    print(f"OUTPUT:          {y_out.data}")
    print(f"EXPECTED OUTPUT: {y_train[i]}")
    print("*****************")


TEST INPUT AND OUTPUTS

INPUT:           tensor([0., 0.])
OUTPUT:          tensor([7.1190e-06])
EXPECTED OUTPUT: tensor([0.])
*****************
INPUT:           tensor([0., 1.])
OUTPUT:          tensor([1.])
EXPECTED OUTPUT: tensor([1.])
*****************
INPUT:           tensor([1., 0.])
OUTPUT:          tensor([1.])
EXPECTED OUTPUT: tensor([1.])
*****************
INPUT:           tensor([1., 1.])
OUTPUT:          tensor([-7.0222e-06])
EXPECTED OUTPUT: tensor([0.])
*****************


In [22]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)


0.weight tensor([[-0.0067,  0.0368],
        [ 0.0713, -0.0723],
        [ 0.3800,  0.1526],
        [ 0.4153, -0.4118],
        [ 0.6244, -0.6194],
        [ 0.0445, -0.0450],
        [-0.5899,  0.7333],
        [-0.0398,  0.0899],
        [ 0.1429,  0.0515],
        [ 0.0029, -0.0237]])
0.bias tensor([-2.5033e-06, -2.1187e-03, -1.7685e-01, -3.7735e-03, -6.8647e-03,
        -6.4822e-04,  2.6566e-09,  3.4920e-10, -7.2259e-02, -2.9091e-03])
2.weight tensor([[-1.9690e-02,  6.8175e-03,  1.2018e-03, -1.4364e-02,  6.3322e-04,
          4.7306e-03, -1.3504e-03, -1.6164e-04, -3.1412e-03, -1.1250e-02],
        [ 2.7424e-02,  5.5043e-02, -2.7445e-01,  3.7794e-01,  5.5608e-01,
          1.2650e-02,  6.0597e-01,  6.1696e-02, -9.6136e-02, -8.9583e-03],
        [ 1.1469e-02,  4.7591e-02, -1.7515e-01,  2.1405e-01,  3.6802e-01,
          2.0722e-02,  3.9708e-01,  3.7993e-02, -5.0554e-02,  2.3663e-03],
        [ 2.9068e-02,  5.5069e-02, -1.9417e-01,  3.0798e-01,  4.3575e-01,
          5.7007e-02,  4.4

In [25]:
#torch.save(model.state_dict(),"xor.pt")

#THIS CODE IS USED FOR GPU ACCELERATION

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print(device)

cuda:0
