In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from utils.joblib_wrapper import joblib_wrapper

@joblib_wrapper("../datasets/mnist/dataframe.pkl")
def load_mnist():
    return fetch_openml('mnist_784', cache=True, data_home='../datasets', as_frame=True, parser='auto')


mnist_dataset = load_mnist()['frame']  # type: pd.DataFrame

load_mnist发现位于../datasets/mnist/dataframe.pkl的任务结果


In [55]:
from sklearn.preprocessing import OneHotEncoder

train_dataset, test_dataset = train_test_split(mnist_dataset, train_size=60000,
                                               test_size=10000)  # type: pd.DataFrame, pd.DataFrame

def dataset2xy(dataset: pd.DataFrame):
    x_df = dataset.iloc[:, :-1]  # type: pd.DataFrame
    y_df = dataset.iloc[:, -1]  # type: pd.DataFrame

    return x_df.to_numpy(), y_df.to_numpy(dtype=int)

x_train, y_train = dataset2xy(train_dataset) # type: np.ndarray, np.ndarray
x_test, y_test = dataset2xy(test_dataset) # type: np.ndarray, np.ndarray


one_hot_encoder = OneHotEncoder()
one_hot_encoder.fit(y_train.reshape(-1, 1))
y_train = one_hot_encoder.transform(y_train.reshape(-1, 1)).toarray()
y_test = one_hot_encoder.transform(y_test.reshape(-1, 1)).toarray()

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 784), (60000, 10), (10000, 784), (10000, 10))

In [56]:
import numpy as np
from nets import TwoLayerNet

net = TwoLayerNet(784, 100, 10)
net.predict(x_train[0, :])

array([0.09976469, 0.09368956, 0.10018773, 0.09619245, 0.0990026 ,
       0.09779123, 0.10233236, 0.09895777, 0.11765148, 0.09443013])

### mini-batch 的实现

In [63]:
train_loss_list = []

# 超参数
iters_num = 40
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(0, iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask, :]
    y_batch = y_train[batch_mask]
    
    grad = network.numerical_gradient(x_batch, y_batch)
    
    # 更新参数
    for key in ('W1', 'W2', 'b1', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, y_batch)
    train_loss_list.append(loss)
    print(i)
train_loss_list

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39


[14.50266205526922,
 14.515999985608888,
 14.503530236788887,
 14.490359101620399,
 14.501208971048836,
 14.499689227393231,
 14.494002763329675,
 14.519482957667229,
 14.496080197307432,
 14.495768913545453,
 14.475748080823056,
 14.49949939676777,
 14.499319112111753,
 14.509472849766336,
 14.486969953833178,
 14.486780695892529,
 14.501303979489641,
 14.51291421037389,
 14.5034201588029,
 14.500808597679608,
 14.49480674824808,
 14.503999340267438,
 14.491333289125212,
 14.496974782105559,
 14.505936443618456,
 14.503013368309208,
 14.508661683597653,
 14.50136701271151,
 14.477339881214277,
 14.495006072243791,
 14.503832015075627,
 14.51669732001049,
 14.50188971553795,
 14.490378449615694,
 14.500874525462232,
 14.502485101906023,
 14.494143956938816,
 14.499620636360946,
 14.501245702161608,
 14.490864849394585]