# PyTorch: Initialization Methods

One thing to explore to see if I can't get out of the 0.693 DEATH VALLEY...

In [1]:
import audiomod
import ptmod

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt
# import seaborn as sns
# plt.style.use('seaborn')

from functools import partial
from copy import deepcopy
import pickle

%matplotlib inline

In [2]:
seed_gen = torch.manual_seed(23)
torch.initial_seed()

23

## Setup

In [3]:
sax1203_dg = audiomod.pull_datagroup_from_db('sax1203')

In [4]:
sax_train, sax_test = audiomod.tts(sax1203_dg)

In [5]:
train_ds = ptmod.SpectroDataset(sax_train, scaling=0.25)
test_ds = ptmod.SpectroDataset(sax_test, scaling=0.25)

## Init

A whole bunch of initialization functions that can be passed to `.apply()` to do their things.

In [6]:
def check_weights(m):
    print(m)
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        print("\nNumber of weight variables:", len(m.weight))
        for v in m.weight:
            print("\nMean:", v.data.mean())
            print("Std:", v.data.std())
            print("Dimensions:", v.data.ndimension())
    print("\n---\n")

In [7]:
def init_ones(m):
    print(m)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        for v in m.weight:
            nn.init.constant(v, 1)
        print(m.weight)

In [8]:
def init_norm(m, mean=0, std=1):
    print(m)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        for v in m.weight:
            nn.init.normal(v, mean=mean, std=std)
        print(m.weight)

In [9]:
def init_uni(m, a=0, b=1):
    print(m)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        for v in m.weight:
            nn.init.uniform(v, a=a, b=b)
        print(m.weight)

In [10]:
def init_xav_norm(m, gain=nn.init.calculate_gain('relu')):
    """Only for convolution layers, where weight tensors are 3D"""
    seed_gen = torch.manual_seed(23)
    print(m)
    if isinstance(m, nn.Conv2d):
        for v in m.weight:
            nn.init.xavier_normal(v, gain=gain)
        print(m.weight)

In [11]:
def init_xav_uni(m, gain=nn.init.calculate_gain('relu')):
    """Only for convolution layers, where weight tensors are 3D"""
    seed_gen = torch.manual_seed(23)
    print(m)
    if isinstance(m, nn.Conv2d):
        for v in m.weight:
            nn.init.xavier_uniform(v, gain=gain)
        print(m.weight)

In [12]:
def init_norm_auto(m):
    """Based on self.reset_parameters() in nn.Linear and nn.Conv2n"""
    seed_gen = torch.manual_seed(23)
    # print(m)
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        if isinstance(m, nn.Conv2d):
            n = m.in_channels
            for k in m.kernel_size:
                n *= k
        if isinstance(m, nn.Linear):
            n = m.weight.size(1)
        std = 1. / np.sqrt(n)
        m.weight.data.normal_(mean=0, std=std)
        # print(m.weight)

Try 'em out below:

In [13]:
params = {
    'c1': [5,2,10],
    'p1': [2,2],
    'c2': [5,2,20],
    'p2': [2,2],
    'f1': [700,100],
    'f2': [100,2]
}

In [14]:
cnn_test = ptmod.CNN_cpcpff(params)

In [15]:
cnn_test.apply(check_weights)

Conv2d(1, 10, kernel_size=(5, 5), stride=(2, 2))

Number of weight variables: 10

Mean: -0.015132528664544224
Std: 0.21811257034980172
Dimensions: 3

Mean: 0.05179643907118589
Std: 0.20803878548480093
Dimensions: 3

Mean: -0.018625109121203423
Std: 0.20626533429662186
Dimensions: 3

Mean: -0.03366573915642221
Std: 0.21036013568227915
Dimensions: 3

Mean: 0.05743596957996488
Std: 0.2071027104605339
Dimensions: 3

Mean: 0.03357626121491194
Std: 0.23690357343006566
Dimensions: 3

Mean: 0.01646064016968012
Std: 0.20282516725087724
Dimensions: 3

Mean: -0.023105934746563434
Std: 0.21450259003892214
Dimensions: 3

Mean: -0.02120270038023591
Std: 0.16509413848798968
Dimensions: 3

Mean: -0.05459213100373745
Std: 0.12605178332404404
Dimensions: 3

---

MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))

---

Conv2d(10, 20, kernel_size=(5, 5), stride=(2, 2))

Number of weight variables: 20

Mean: -0.00022309319907799362
Std: 0.06366714319628283
Dimensions: 3

Mean: -0.000144376871176064
St

CNN_cpcpff (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(2, 2))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(2, 2))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (700 -> 100)
  (fc2): Linear (100 -> 2)
)

In [53]:
# create partials to set params, as .apply() only allows for one arg
# init_p = partial(init_norm, mean=-0.5, std=0.5)
# init_p = partial(init_uni, a=-5, b=5)
# init_p = partial(init_xav_norm, gain=1)

In [16]:
cnn_test.apply(init_norm_auto)

CNN_cpcpff (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(2, 2))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(2, 2))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (700 -> 100)
  (fc2): Linear (100 -> 2)
)

In [17]:
losses = ptmod.fit(
    cnn_test,
    train_ds, 
    optim.SGD(cnn_test.parameters(), lr=0.01, weight_decay=0.01, momentum=0.8), 
    nn.CrossEntropyLoss(), 
    3,
    8
)

Epoch 1
 * Avg loss: 0.682	Time: 7474.772 ms
 * Weights updated: True
Epoch 2
 * Avg loss: 0.681	Time: 7683.385 ms
 * Weights updated: True
Epoch 3
 * Avg loss: 0.646	Time: 7234.953 ms
 * Weights updated: True

Training Complete!


### Init methods that do NOT work:

* fill Conv and Linear layers with ones
* norm (0, 1)
* norm (0, 0.1)
* norm (0.1, 0.1)
* norm (0.5, 0.1)
* norm (-0.5, 0.5)
* uniform (-0.5,0.5)
* uniform (-1, 1)
* uniform (-1, 0)

### Init methods that DO work:

* Built-in (sometimes)
* `init_norm_auto()` has hope! Like built-in but normal distrib instead of uniform