# Introduction to Neural Networks(2)

## Building deep learning models with keras

In [9]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

### Creating a keras model

In [12]:
# 导入模块
import keras
from keras.layers import Dense
from keras.models import Sequential
import pandas as pd

# 读取数据集
df_wages = pd.read_csv('./data/hourly_wages.csv')
# 特征数量
n_cols = df_wages.iloc[:, 1:].shape[1]

# 构建模型
model = Sequential()
# 增加第一个隐层
model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
# 增加第二个隐层
model.add(Dense(32, activation='relu'))
# 增加输出层
model.add(Dense(1))

FileNotFoundError: [Errno 2] File ./data/hourly_wages.csv does not exist: './data/hourly_wages.csv'

In [None]:
model.summary()

In [None]:
df_wages.head()
n_cols
df_wages.shape
df_wages.info()

### Compiling and fitting a model
https://keras.io/optimizers/#adam

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
print("Loss function: " + model.loss)

In [None]:
history = model.fit(df_wages.iloc[:, 1:], df_wages.iloc[:, 0], epochs=10)

In [None]:
history.epoch

In [None]:
history.history['loss']

In [None]:
history.history

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.epoch, history.history['loss'])
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.title('Training Loss')
plt.show()

In [None]:
model.predict(df_wages.iloc[0:5, 1:])

### Classification models

In [None]:
df_titanic = pd.read_csv('./resource/titanic_all_numeric.csv')
X_train_titanic = df_titanic.drop(['survived'], axis=1)
n_cols = X_train_titanic.shape[1]

In [None]:
df_titanic.head()
df_titanic.info()

In [None]:
# 模块导入
from keras.utils import to_categorical

# 生成标签
y_train_titanic = to_categorical(df_titanic.survived)

# 建模
model = Sequential()
# 单隐层
model.add(Dense(32, activation='relu', input_shape=(n_cols,)))
# 输出层
model.add(Dense(2, activation='softmax'))
# Compile
model.compile(optimizer='sgd', loss = 'categorical_crossentropy', 
              metrics = ['accuracy'])
# Fit the model
model.fit(X_train_titanic, y_train_titanic, epochs=10)

In [None]:
y_train_titanic

In [None]:
model.history.history.keys()

In [None]:
plt.plot(model.history.epoch, model.history.history['accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')
plt.show()

### Using models

**Saving, reloading and using your Model**

In [None]:
import numpy as np
from keras.models import load_model
model.save('./models/model_file.h5')

my_model = load_model('./models/model_file.h5')
predictions = my_model.predict(X_train_titanic)
probability_true = predictions[:, 1]

In [None]:
my_model.summary()

In [None]:
predictions
probability_true

In [None]:
pred_classes = np.argmax(predictions,axis=1)
pred_classes

In [None]:
from sklearn import metrics
accuracy = metrics.accuracy_score(y_pred=pred_classes,y_true=df_titanic.survived)
accuracy

In [None]:
# loss,accuracy
score = model.evaluate(X_train_titanic, y_train_titanic, verbose=1)
score

## Fine-tuning keras models

In [None]:
def get_new_model(input_shape):
    model = Sequential()
    model.add(Dense(100, activation='relu', input_shape=input_shape))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(2, activation='softmax'))
    return model

In [None]:
lr_to_test = [.000001, 0.01, 1]

# loop over learning rates
input_shape = (n_cols,)
for lr in lr_to_test:
    print('\n\nTesting model with learning rate: %f\n'%lr )
    model = get_new_model(input_shape)
    my_optimizer = keras.optimizers.SGD(lr=lr)
    model.compile(optimizer=my_optimizer, loss='categorical_crossentropy')
    model.fit(X_train_titanic, y_train_titanic, epochs=10)
    print('\n' + '=' * 100)

## Model validation

In [None]:
model = get_new_model(input_shape)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_titanic, y_train_titanic, validation_split=0.3, epochs=10)

### Early Stopping

In [None]:
from keras.callbacks import EarlyStopping

early_stopping_monitor = EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=3,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
)

model = get_new_model(input_shape)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_titanic, y_train_titanic, validation_split=0.3, epochs=50,
          callbacks=[early_stopping_monitor])

**More experimentation**

In [None]:
model_1 = get_new_model(input_shape)
model_2 = Sequential()

model_2.add(Dense(30, activation='relu', input_shape=input_shape))
model_2.add(Dense(50, activation='relu')) 
model_2.add(Dense(30, activation='relu')) 
model_2.add(Dense(2, activation='softmax'))

model_1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fit model_1
model_1_training = model_1.fit(X_train_titanic, y_train_titanic, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=0)

# Fit model_2
model_2_training = model_2.fit(X_train_titanic, y_train_titanic, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=0)

# Create the plot
plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.show()

## Pytorch
### PyTorch compared to NumPy

In [15]:
import torch
import numpy as np

In [16]:
torch.tensor([[2, 3, 5], [1, 2, 9]])

np.array([[2, 3, 5], [1, 2, 9]])

tensor([[2, 3, 5],
        [1, 2, 9]])

array([[2, 3, 5],
       [1, 2, 9]])

In [17]:
torch.rand(2, 2)

np.random.rand(2, 2)

tensor([[0.8948, 0.1103],
        [0.9110, 0.8475]])

array([[0.19089095, 0.68994118],
       [0.31879831, 0.02630596]])

In [18]:
a = torch.rand((3, 5))
a.size()

a = np.random.rand(3, 5) 
a.shape

torch.Size([3, 5])

(3, 5)

### Matrix operations
**PyTorch**

In [19]:
a = torch.rand(2, 2) 
b = torch.rand(2, 2)
a
b

tensor([[0.2021, 0.3002],
        [0.6086, 0.2630]])

tensor([[0.4071, 0.4041],
        [0.8681, 0.9787]])

In [None]:
torch.matmul(a, b)

a[0, 0] * b[0, 0] + a[0, 1] * b[1, 0]

In [None]:
a * b

a[0, 0] * b[0, 0]

**NumPy**

In [20]:
a = np.random.rand(2, 2) 
b = np.random.rand(2, 2)
a
b

array([[0.76774303, 0.12673361],
       [0.9809297 , 0.60655661]])

array([[0.96938192, 0.60063558],
       [0.06316672, 0.61934459]])

In [None]:
np.dot(a, b)

a[0, 0] * b[0, 0] + a[0, 1] * b[1, 0]

In [None]:
np.multiply(a, b)

a[0, 0] * b[0, 0]

### Zeros and Ones
**PyTorch**

In [None]:
a_torch = torch.zeros(2, 2)
a_torch

In [None]:
b_torch = torch.ones(2, 2)
b_torch

In [None]:
c_torch = torch.eye(2)
c_torch

**NumPy**

In [None]:
a_numpy = np.zeros((2, 2))
a_numpy

In [None]:
b_numpy = np.ones((2, 2))
b_numpy

In [None]:
c_numpy = np.identity(2)
c_numpy

### PyTorch to NumPy and vice versa

In [None]:
torch.from_numpy(c_numpy)

In [None]:
c_torch.numpy()

In [21]:
# Create a matrix of ones with shape 3 by 3
tensor_of_ones = torch.ones(3, 3)

# Create an identity matrix with shape 3 by 3
identity_tensor = torch.eye(3)

# Do a matrix mulitplication of tensor_of_ones with identity_tensor
matrices_multiplied = torch.matmul(tensor_of_ones, identity_tensor)
print(matrices_multiplied)

# Do an element-wise multiplication of tensor_of_ones with identity_tensor
element_multiplication = tensor_of_ones * identity_tensor
print(element_multiplication)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


### Backpropagation by auto-differentiation

In [23]:
x = torch.tensor(-3., requires_grad=True)
y = torch.tensor(5., requires_grad=True)
z = torch.tensor(-2., requires_grad=True)

In [24]:
q = x + y
f = q * z

f.backward()

print("Gradient of z is: " + str(z.grad)) 
print("Gradient of y is: " + str(y.grad)) 
print("Gradient of x is: " + str(x.grad))

Gradient of z is: tensor(2.)
Gradient of y is: tensor(-2.)
Gradient of x is: tensor(-2.)


In [25]:
a = torch.tensor(0.)
b = torch.tensor(1.)


w1 = torch.tensor(0., requires_grad=True)
w2 = torch.tensor(1., requires_grad=True)
w3 = torch.tensor(2., requires_grad=True)
w4 = torch.tensor(3., requires_grad=True)
w5 = torch.tensor(1., requires_grad=True)
w6 = torch.tensor(2., requires_grad=True)

c = a * w1 + b * w2
d = a * w3 + b * w4
e = c * w5 + d * w6
l = (e - 4) ** 2

l.backward()

w5.grad, w6.grad, w1.grad, w2.grad, w3.grad, w4.grad

(tensor(6.), tensor(18.), tensor(0.), tensor(6.), tensor(0.), tensor(12.))

In [26]:
input_data = torch.tensor([[0., 1.]])
w_h1 = torch.tensor([[0., 2.],
                     [1., 3.]], requires_grad=True)
w_h2 = torch.tensor([[1.],
                     [2.]], requires_grad=True)

h1 = torch.matmul(input_data, w_h1)
p = torch.matmul(h1, w_h2)

l = (p - 4) ** 2

l.backward()
w_h2.grad
w_h1.grad

tensor([[ 6.],
        [18.]])

tensor([[ 0.,  0.],
        [ 6., 12.]])

###  Nerual Networks

In [27]:
input_layer = torch.rand(10)

w1 = torch.rand(10, 20) 
w2 = torch.rand(20, 20) 
w3 = torch.rand(20, 4)

h1 = torch.matmul(input_layer, w1) 
h2 = torch.matmul(h1, w2)
output_layer = torch.matmul(h2, w3) 

print(output_layer)

tensor([340.5824, 281.8968, 303.5075, 288.3683])


In [28]:
import torch.nn as nn

class Net(nn.Module): 
    def __init__(self):
        super(Net, self).__init__() 
        self.fc1 = nn.Linear(10, 20) 
        self.fc2 = nn.Linear(20, 20) 
        self.output = nn.Linear(20, 4)

    def forward(self, x): 
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.output(x) 
        return x

In [None]:
net = Net()
result = net(input_layer)
result

### Activation functions
#### Motivation

In [None]:
input_layer = torch.tensor([2., 1.])
weight_1 = torch.tensor([[0.45, 0.32], [-0.12, 0.29]]) 
hidden_layer = torch.matmul(input_layer, weight_1) 
weight_2 = torch.tensor([[0.48, -0.12], [0.64, 0.91]]) 
output_layer = torch.matmul(hidden_layer, weight_2) 
print(output_layer)

#### Matrix multiplication is a linear transformation

In [None]:
input_layer = torch.tensor([2., 1.])
weight_1 = torch.tensor([[0.45, 0.32], [-0.12, 0.29]]) 
weight_2 = torch.tensor([[0.48, -0.12], [0.64, 0.91]]) 
weight = torch.matmul(weight_1, weight_2)
output_layer = torch.matmul(input_layer, weight) 
print(output_layer)
print(weight)

#### Activation functions

In [29]:
relu = nn.ReLU()
tensor_1 = torch.tensor([2., -4.], requires_grad=True)
out = relu(tensor_1)
out1 = out.sum()
print(out)
tensor_2 = torch.tensor([[2., -4.], [1.2, 0.]], requires_grad=True)
out = relu(tensor_2)
out2 = out.sum()
print(out)

tensor([2., 0.], grad_fn=<ReluBackward0>)
tensor([[2.0000, 0.0000],
        [1.2000, 0.0000]], grad_fn=<ReluBackward0>)


In [30]:
out1.backward()
out2.backward()
tensor_1.grad
tensor_2.grad

tensor([1., 0.])

tensor([[1., 0.],
        [1., 0.]])

In [31]:
input_layer = torch.tensor([[ 0.0401, -0.9005,  0.0397, -0.0876]])
weight_1 = torch.tensor([[-0.1094, -0.8285,  0.0416, -1.1222],
                         [ 0.3327, -0.0461,  1.4473, -0.8070],
                         [ 0.0681, -0.7058, -1.8017,  0.5857],
                         [ 0.8764,  0.9618, -0.4505,  0.2888]])
weight_2 = torch.tensor([[ 0.6856, -1.7650,  1.6375, -1.5759],
                         [-0.1092, -0.1620,  0.1951, -0.1169],
                         [-0.5120,  1.1997,  0.8483, -0.2476],
                         [-0.3369,  0.5617, -0.6658,  0.2221]])
weight_3 = torch.rand(4, 4)


# 计算隐层输出
hidden_1 = torch.matmul(input_layer, weight_1)
hidden_2 = torch.matmul(hidden_1, weight_2)

# 计算网络输出
print(torch.matmul(hidden_2, weight_3))

# 计算参数矩阵乘法
weight_composed_1 = torch.matmul(weight_1, weight_2)
weight = torch.matmul(weight_composed_1, weight_3)

# 计算网络输出
print(torch.matmul(input_layer, weight))

tensor([[-0.9892, -1.3952,  0.0102,  0.6443]])
tensor([[-0.9892, -1.3952,  0.0102,  0.6443]])


In [32]:
# 引入relu增加网络的非线性拟合能力
hidden_1_activated = relu(torch.matmul(input_layer, weight_1))
hidden_2_activated = relu(torch.matmul(hidden_1_activated, weight_2))
print(torch.matmul(hidden_2_activated, weight_3))

# 计算参数矩阵乘积
weight_composed_1_activated = relu(torch.matmul(weight_1, weight_2))
weight = relu(torch.matmul(weight_composed_1_activated, weight_3))

# 计算网络输出
print(torch.matmul(input_layer, weight))

tensor([[0.3763, 0.0828, 0.4502, 0.5157]])
tensor([[-1.4975, -2.0198, -1.1524, -0.6569]])


### Softmax Cross-Entropy Loss

In [None]:
logits = torch.tensor([[3.2, 5.1, -1.7]]) 
ground_truth = torch.tensor([0]) 
criterion = nn.CrossEntropyLoss()
loss = criterion(logits, ground_truth) 
print(loss)

In [None]:
logits = torch.tensor([[10.2, 5.1, -1.7]]) 
loss = criterion(logits, ground_truth) 
print(loss)

In [None]:
logits = torch.tensor([[-10, 5.1, -1.7]]) 
loss = criterion(logits, ground_truth) 
print(loss)

### Preparing a dataset in PyTorch

In [34]:
import torchvision  # 数据集和预训练网络
import torch.utils.data
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.4914, 0.48216, 0.44653),
                                                     (0.24703, 0.24349, 0.26159))])

In [None]:
trainset = torchvision.datasets.CIFAR10(root='/Applications/GitHub/MachineLearning-tarin-kaikeba/task_5/data', train=True, 
                                        download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='/Applications/GitHub/MachineLearning-tarin-kaikeba/task_5/data', train=False, 
                                       download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, 
                                          shuffle=True, num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, 
                                         shuffle=False, num_workers=0)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /Applications/GitHub/MachineLearning-tarin-kaikeba/task_5/data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

**Inspecting the dataloader**

In [None]:
testloader.dataset
trainloader.dataset

In [None]:
testloader.dataset.data.shape
trainloader.dataset.data.shape

In [None]:
testloader.batch_size
trainloader.sampler
testloader.sampler

### Training Neural networks

In [None]:
import torch.nn.functional as F

class Net(nn.Module): 
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 500) 
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        x = F.relu(self.fc1(x)) 
        return self.fc2(x)

In [None]:
import torch.optim as optim

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4)
for epoch in range(10): # 数据集的循环次数
    for i, data in enumerate(trainloader, 0):
        # 获取输入
        inputs, labels = data
        inputs = inputs.view(-1, 32 * 32 * 3)

        # 重置优化器
        optimizer.zero_grad()
        # 正向传播 + 反向传播 + loss计算
        outputs = net(inputs)
        loss = criterion(outputs, labels) 
        # 梯度下降
        loss.backward()
        # 参数更行
        optimizer.step()

In [None]:
correct, total = 0, 0
predictions = []
net.eval()
for i, data in enumerate(testloader, 0):
    inputs, labels = data
    inputs = inputs.view(-1, 32 * 32 * 3)
    outputs = net(inputs)
    _, predicted = torch.max(outputs, 1) 
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))

## 作业
* 使用Keras或PyTorch搭建全连接神经网络，完成在MNIST and CIFAR-10上的训练和评测
* 优化网络结构和超参，尽量提高模型效果
* ``目前我们还没有讲到在Pytorch中模型的验证方法，在后续的课程中我会给大家慢慢介绍。但是聪明的你们可以利用目前所学，自己建立一种验证机制吗``

*到此，我们虽然已经有能力对具体的问题建模训练和预测了，但是可能效果并不会很好，因为我们后面还会给大家讲到网络的正则，以及更powerful的网络结构，在此之前，也请大家积极练习，夯实基础方法和网络原理，加油！*