# Weight Initialization
---

In [1]:
import torch
import torch.nn as nn

In [4]:
layer = nn.Linear(5, 5)

In [6]:
layer.weight

Parameter containing:
tensor([[-0.2793,  0.4101, -0.0553, -0.1348,  0.1374],
        [ 0.3870, -0.0047, -0.2084,  0.1973,  0.3080],
        [ 0.4134,  0.4107, -0.3409, -0.4010,  0.4322],
        [-0.1311, -0.1307, -0.0655, -0.2347,  0.3385],
        [ 0.2919,  0.2116, -0.2574, -0.3377, -0.3073]], requires_grad=True)

#### **Note:**
Pytorch uses its default method for weight initialization called `Kaiming Uniform`.

In [7]:
layer.bias

Parameter containing:
tensor([-0.0342,  0.3228, -0.4265,  0.3022,  0.2599], requires_grad=True)

In [9]:
layer.weight.data

tensor([[-0.2793,  0.4101, -0.0553, -0.1348,  0.1374],
        [ 0.3870, -0.0047, -0.2084,  0.1973,  0.3080],
        [ 0.4134,  0.4107, -0.3409, -0.4010,  0.4322],
        [-0.1311, -0.1307, -0.0655, -0.2347,  0.3385],
        [ 0.2919,  0.2116, -0.2574, -0.3377, -0.3073]])

## Uniform Init

In [10]:
nn.init.uniform_(layer.weight, a=0, b=3)

Parameter containing:
tensor([[2.5533, 0.8264, 2.5776, 1.7932, 0.8558],
        [0.8001, 1.3693, 1.9604, 1.8112, 1.4717],
        [2.3437, 0.1766, 1.7707, 1.6340, 1.8732],
        [2.6028, 0.6882, 1.2955, 2.1089, 1.1100],
        [1.9580, 0.5289, 1.4817, 2.8136, 2.3397]], requires_grad=True)

## Normal Init

In [11]:
nn.init.normal_(layer.weight, mean=0, std=2)

Parameter containing:
tensor([[ 0.0118, -2.8880,  1.9994, -1.5407,  3.0063],
        [ 1.5872, -1.0582,  0.4163,  2.3199, -1.8046],
        [ 0.2873,  0.2263, -0.3292, -3.1019,  2.6741],
        [-1.4416,  3.9376, -0.7510,  3.2287,  0.8234],
        [-2.1870,  0.7475,  7.1670,  1.2480,  0.3517]], requires_grad=True)

## Constant Init
Constant initializing for weights is not recommended. So, here it is used for bias initializing.

In [12]:
nn.init.constant_(layer.bias, 8)

Parameter containing:
tensor([8., 8., 8., 8., 8.], requires_grad=True)

In [15]:
nn.init.zeros_(layer.bias)

Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)

## Xavier Init

In [16]:
nn.init.xavier_normal_(layer.weight, gain=1)

Parameter containing:
tensor([[ 0.4240,  0.3501, -0.1820, -0.3103,  0.2878],
        [-0.1825,  0.2665,  0.3655, -0.1316, -0.4517],
        [ 0.0081,  0.0183, -0.0564,  0.0033,  0.1833],
        [-0.0391, -0.5506, -0.8905, -0.1031,  0.1937],
        [-0.6832, -0.0134,  0.5989,  0.1344, -0.5393]], requires_grad=True)

In [17]:
nn.init.xavier_uniform_(layer.weight, gain=1)

Parameter containing:
tensor([[ 0.2417, -0.6862, -0.6930, -0.4115,  0.1182],
        [-0.4950, -0.5358, -0.7297,  0.5610, -0.4174],
        [ 0.5580, -0.3569, -0.7046, -0.6895,  0.4307],
        [ 0.4834, -0.4453,  0.2645, -0.7627, -0.7484],
        [-0.5108, -0.0168,  0.5571, -0.2125, -0.4866]], requires_grad=True)