# WELCOME TO PYTORCH

In [4]:
#install these packages
#conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch

#or

#find your download link here (for mac)
#https://pytorch.org/get-started/locally/

In [2]:
import torch
import torchvision
from torchvision import transforms, datasets #these are just a bunch of datasets about Vision tasks that you can use in pytorch 
import torch.nn as nn
import torch.nn.functional as F

### PyTorch 
is an open source machine learning library based on the Torch library, used for applications such as computer vision
and natural language processing, primarily developed by Facebook's AI Research lab. It is free and open-source software 
released under the Modified BSD license.

In [3]:
x = torch.Tensor([5,3])
y = torch.Tensor([2,1])

print(x*y)

tensor([10.,  3.])


### Tensor
is a generalization of vectors and matrices and is easily understood as a multidimensional array

Neural Network: computing systems vaguely inspired by the biological neural networks that constitute animal brains. An ANN is based on a collection of connected units or nodes called artificial neurons, which loosely model the neurons in a biological brain


Input ::Hidden Layer:: Output
 
 
Input (features): going to have to be numerical valued in some way, could be pixel values which are already numerical values like RGB or grayscale numbers

In [4]:
x = torch.zeros([2,5])
print(x)


tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [5]:
x.shape

torch.Size([2, 5])

In [6]:
y = torch.rand([2,5])
print(y)

tensor([[0.6045, 0.5997, 0.7304, 0.6306, 0.3642],
        [0.6284, 0.5950, 0.9788, 0.5225, 0.1512]])


Say this image is a 2 pixel by 5 pixel image. Quite samll and useless but for our computers calcuation purpose we will leave it
as that. we need flatten this 2 x 5 image into a 1 x 10. "flattening it". 

In [7]:
y = y.view([1,10])

In [8]:
y

tensor([[0.6045, 0.5997, 0.7304, 0.6306, 0.3642, 0.6284, 0.5950, 0.9788, 0.5225,
         0.1512]])

Above it all just doing simple math with arrays, at its core it is really just a way to help you do math with arrays. That is 
really what neural networks are. 

## Training and Testing Data

In [9]:
train = datasets.MNIST('', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                       ]))

test = datasets.MNIST('', train=False, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                       ]))


### Batchsize
is how much data at a time we want to pass at a time into the model. Deep learning is best for datasets that have
tons of data. Here we have a very small amount of data. We have to batch because we want it to optimize by going through
the network a batch at a time. This helps with not overfitting the model. The bigger the batch size the faster
your data will be done with its testing (anywhere between 8 and 64 is typical).


In [10]:
trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=False)

In [11]:
for data in trainset:
    print(data)
    break

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        ...,


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0

Constantly want to think about how the machine will take the easiest way out. What if 60% of our data is a 3 and 20% is a 2 and
10% is a 1. The machine will begin to instantly figure out what a 3 is and then start to get stuck. You want your data
to be BALANCED so that it doesnt hold heavy weight on specific things.

In [12]:
total = 0

counter_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0}

for data in trainset:
    Xs, ys = data
    for y in ys:
        counter_dict[int(y)] += 1
        total += 1

print(counter_dict)

{0: 5923, 1: 6742, 2: 5958, 3: 6131, 4: 5842, 5: 5421, 6: 5918, 7: 6265, 8: 5851, 9: 5949}


In [13]:
for i in counter_dict:
    print(f"{i}:{counter_dict[i]/total*100}")

0:9.871666666666666
1:11.236666666666666
2:9.93
3:10.218333333333334
4:9.736666666666666
5:9.035
6:9.863333333333333
7:10.441666666666666
8:9.751666666666667
9:9.915000000000001


In [14]:
import torch.nn as nn
import torch.nn.functional as F

torch.nn helps us get access to neural network things and various neural network types. We have only been talking about fully 
connected layers but just know there are others out there. torch.nn.functional gives us access to some handy functions that will 
help us with developing the neural network.

In [24]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)
   


net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [23]:
X = torch.randn((28,28))


In [18]:
X = X.view(-1,(28*28))

In [19]:
output = net(X)
output

tensor([[-2.3398, -2.2586, -2.3030, -2.2283, -2.4048, -2.3955, -2.2675, -2.2570,
         -2.2576, -2.3305]], grad_fn=<LogSoftmaxBackward>)

### Loss
is a measure of how off the model is. Even if a model predicts correctly it doesnt mean it was 100% confident in its 
predictions. This is loss is some degree of error and we want to be 100% accurate. Loss is how we optimize our weights
so that the model can actually learn

In [20]:
import torch.optim as optim 

optimizer = optim.Adam(net.parameters(), lr = 0.001)

EPOCHS = 3 

import torch.optim as optim

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

It is defintely possible to know exactly why there was loss and how to fix it for that exact situation. This will completely
overfit the model. This is why we want to do a learning rate in small steps. If it takes huge steps it is making too big of
jumps and will never really get anywhere. If it takes too small steps then it will take forever to actually learn anything.
This is where the idea of a decaying learning rate, meaning it uses large jumps and moves to smaller and smaller as it goes on.

In [22]:
for EPOCHS in range(3): 
    for data in trainset: 
        X, y = data  
        net.zero_grad() 
        output = net(X.view(-1,784)) 
        loss = F.nll_loss(output, y)  
        loss.backward()  
        optimizer.step() 
    print(loss)  

tensor(0.0003, grad_fn=<NllLossBackward>)
tensor(0.2171, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)


In [27]:
correct = 0
total = 0

with torch.no_grad():
    for data in testset:
        X, y = data
        output = net(X.view(-1,784))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1


print("Accuracy: ", round(correct/total, 3))

Accuracy:  0.963


In [None]:
# import matplotlib.pyplot as plt

# plt.imshow(X[0].view(28,28))
# plt.show()

In [28]:
print(torch.argmax(net(X[0].view(-1,784))[0]))

tensor(7)
