In [9]:
# imports files
import numpy as np


In [None]:
import torch

# Set device and dtype
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # Update weights using gradient descent
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2


0 30002776.0
1 23993788.0
2 23883384.0
3 25675426.0
4 26253200.0
5 23683676.0
6 17994168.0
7 11680863.0
8 6779081.5
9 3830075.0
10 2254032.25
11 1447833.25
12 1023366.4375
13 783547.0625
14 634119.75
15 531450.0625
16 454994.0625
17 394746.5
18 345492.90625
19 304373.3125
20 269414.875
21 239448.703125
22 213600.65625
23 191180.9375
24 171613.390625
25 154460.0625
26 139384.796875
27 126112.5
28 114361.3359375
29 103940.90625
30 94657.6796875
31 86371.8203125
32 78952.796875
33 72301.2421875
34 66325.7265625
35 60940.5859375
36 56075.9609375
37 51686.99609375
38 47713.5859375
39 44103.61328125
40 40819.8515625
41 37834.84375
42 35110.484375
43 32619.232421875
44 30338.677734375
45 28246.7421875
46 26326.783203125
47 24561.75390625
48 22937.080078125
49 21438.6484375
50 20056.203125
51 18778.443359375
52 17596.85546875
53 16502.44140625
54 15488.0673828125
55 14547.310546875
56 13673.6923828125
57 12861.4365234375
58 12105.5537109375
59 11401.41796875
60 10745.0595703125
61 10132.613281

In [4]:
import numpy as np

print("Python list operations:")
a = [1,2,3]
b = [4,5,6]
print("a+b:", a+b)
try:
    print(a*b)
except TypeError:
    print("a*b has no meaning for Python lists")
print()
print("numpy array operations:")
a = np.array([1,2,3])
b = np.array([4,5,6])
print("a+b:", a+b)
print("a*b:", a*b)

Python list operations:
a+b: [1, 2, 3, 4, 5, 6]
a*b has no meaning for Python lists

numpy array operations:
a+b: [5 7 9]
a*b: [ 4 10 18]


In [7]:
# print('a:')
# print(a)
# print('a.sum(axis=0):', a.sum(axis=0))
# print('a.sum(axis=1):', a.sum(axis=1))

In [8]:
a = np.array([[1,2,3],
[4,5,6]])
b = np.array([10,20,30])
print("a+b:\n", a+b)

a+b:
 [[11 22 33]
 [14 25 36]]


In [29]:
import numpy as np
from numpy import ndarray

def square(x: ndarray) -> ndarray: # "-> ndarray" making code more readable
    '''
    Square each element in the input ndarray.
    '''
    return np.power(x, 2)

def leaky_relu(x: ndarray) -> ndarray:
    '''
    Apply "Leaky ReLU" function to each element in ndarray.
    '''
    return np.maximum(0.2 * x, x)


In [30]:
square(5)

np.int64(25)

In [31]:
leaky_relu(5)

np.float64(5.0)