In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

torch.manual_seed(1)

<torch._C.Generator at 0x7fd26023e1d0>

In [2]:
### sigmoid DNN ###
class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)

    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        x = self.linear3(x)
        return x

model = Net(25, 50, 50, 10)

In [4]:
### tanh DNN ###
class NetTanh(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(NetTanh, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)

    def forward(self, x):
        x = torch.tanh(self.linear1(x))
        x = torch.tanh(self.linear2(x))
        x = self.linear3(x)
        return x

In [8]:
### ReLu DNN ###
class NetReLu(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(NetReLu, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)

    def forward(self, x):
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = self.linear3(x)
        return x

model = NetReLu(2, 5, 5, 1)
model.state_dict()

OrderedDict([('linear1.weight',
              tensor([[ 0.5672, -0.1062],
                      [ 0.0950, -0.7037],
                      [-0.4370, -0.6238],
                      [-0.3244, -0.3555],
                      [-0.2603,  0.1897]])),
             ('linear1.bias',
              tensor([ 0.2265, -0.2988,  0.3972, -0.5336,  0.0151])),
             ('linear2.weight',
              tensor([[ 0.2002, -0.4043, -0.0483, -0.4221,  0.0464],
                      [ 0.3649,  0.3876,  0.1515, -0.2125,  0.3433],
                      [-0.4433,  0.2778, -0.2198,  0.2249,  0.0263],
                      [-0.3574,  0.2918,  0.3817, -0.0623, -0.1894],
                      [ 0.4068,  0.3609, -0.1206, -0.2718, -0.0192]])),
             ('linear2.bias',
              tensor([ 0.3456, -0.3081, -0.3023,  0.2690, -0.3898])),
             ('linear3.weight',
              tensor([[ 0.2579, -0.2733,  0.0681, -0.1431,  0.0789]])),
             ('linear3.bias', tensor([-0.1783]))])

In [12]:
### Dropout ###
import torch.nn.functional as F

class NetReLu_add_Dropout(nn.Module):
    def __init__(self, D_in, H1, H2, D_out, p):
        super(NetReLu_add_Dropout, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
        self.drop = nn.Dropout(p=p)

    def forward(self, x):
        x = F.relu(self.drop(self.linear1(x)))
        x = F.relu(self.drop(self.linear2(x)))
        x = self.linear3(x)
        return x

model = NetReLu_add_Dropout(2, 5, 5, 1, p=0.5)
model.train()

NetReLu_add_Dropout(
  (linear1): Linear(in_features=2, out_features=5, bias=True)
  (linear2): Linear(in_features=5, out_features=5, bias=True)
  (linear3): Linear(in_features=5, out_features=1, bias=True)
  (drop): Dropout(p=0.5, inplace=False)
)

In [13]:
### Xavier initialization ###
class NetTanh_Xavier(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(NetTanh_Xavier, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        torch.nn.init.xavier_uniform_(self.linear1.weight)

        self.linear2 = nn.Linear(H1, H2)
        torch.nn.init.xavier_uniform_(self.linear2.weight)

        self.linear3 = nn.Linear(H2, D_out)
        torch.nn.init.xavier_uniform_(self.linear3.weight)

    def forward(self, x):
        x = torch.tanh(self.linear1(x))
        x = torch.tanh(self.linear2(x))
        x = self.linear3(x)
        return x

model = NetTanh_Xavier(2, 5, 5, 1)
model.train()

NetTanh_Xavier(
  (linear1): Linear(in_features=2, out_features=5, bias=True)
  (linear2): Linear(in_features=5, out_features=5, bias=True)
  (linear3): Linear(in_features=5, out_features=1, bias=True)
)

In [14]:
### He initialization ###
class NetReLu_He(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(NetReLu_He, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        torch.nn.init.kaiming_uniform_(self.linear1.weight, nonlinearity="relu")
        self.linear2 = nn.Linear(H1, H2)
        torch.nn.init.kaiming_uniform_(self.linear2.weight, nonlinearity="relu")
        self.linear3 = nn.Linear(H2, D_out)
        torch.nn.init.kaiming_uniform_(self.linear3.weight, nonlinearity="relu")

    def forward(self, x):
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = self.linear3(x)
        return x

model = NetReLu(2, 5, 5, 1)
model.state_dict()

OrderedDict([('linear1.weight',
              tensor([[-0.5201, -0.2306],
                      [ 0.0918,  0.4077],
                      [-0.3124,  0.5924],
                      [-0.5024, -0.5171],
                      [ 0.2782,  0.6645]])),
             ('linear1.bias',
              tensor([ 0.5354,  0.5100,  0.0427, -0.2513, -0.5256])),
             ('linear2.weight',
              tensor([[-0.3190,  0.1387, -0.0316, -0.0711, -0.0853],
                      [-0.0009, -0.2620, -0.1015,  0.3669, -0.2376],
                      [-0.0332, -0.3532, -0.1792,  0.1515,  0.1936],
                      [-0.3707,  0.3401,  0.1042,  0.2141, -0.2594],
                      [ 0.3073, -0.4073, -0.3445,  0.3010, -0.1397]])),
             ('linear2.bias',
              tensor([ 0.1901,  0.2177, -0.1890, -0.2742,  0.2485])),
             ('linear3.weight',
              tensor([[ 0.2984, -0.0030,  0.0701,  0.1773,  0.1449]])),
             ('linear3.bias', tensor([0.4170]))])

In [15]:
### momentum ###
class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)

    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        x = self.linear3(x)
        return x

model = Net(25, 50, 50, 10)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.4)

In [16]:
### batch normalization ###
class Net_BatchNorm(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net_BatchNorm, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)

        self.bn1 = nn.BatchNorm1d(H1)
        self.bn2 = nn.BatchNorm1d(H2)

    def forward(self, x):
        x = F.sigmoid(self.bn1(self.linear1(x)))
        x = F.sigmoid(self.bn2(self.linear2(x)))
        x = self.linear3(x)
        return x

model = Net(25, 50, 50, 10)
model.train()


Net(
  (linear1): Linear(in_features=25, out_features=50, bias=True)
  (linear2): Linear(in_features=50, out_features=50, bias=True)
  (linear3): Linear(in_features=50, out_features=10, bias=True)
)