In [29]:
import torch

In [30]:
import torch.nn as nn

In [31]:
class Model(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        super(Model, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size)
    
    def forward(self, inp):
        self.hidden, self.cell = self.initHidden()
        x = self.embedding(inp).view(inp.shape[1],1,-1)
        out, (h_n, c_n) = self.lstm(x, (self.hidden, self.cell))
        return out, h_n, c_n
    
    def initHidden(self):
        return torch.zeros(1,1,self.hidden_size), torch.zeros(1,1,self.hidden_size)

In [32]:
inp = torch.tensor([1,2,3,4],dtype=torch.long).view(1,-1)

In [None]:
inp

In [33]:
model = Model(5, 10, 10)

In [34]:
out, h_n, c_n = model(inp)

In [35]:
out.shape

torch.Size([4, 1, 10])

In [36]:
h_n

tensor([[[ 0.2033,  0.0575, -0.1110,  0.0744,  0.3849,  0.0932, -0.0111,
           0.0138, -0.0621, -0.0169]]], grad_fn=<StackBackward>)

In [37]:
out

tensor([[[-0.0953,  0.0944, -0.1214, -0.0330, -0.2039, -0.2101,  0.0898,
          -0.0486, -0.0505,  0.1225]],

        [[-0.2406,  0.1974, -0.1025, -0.0595,  0.0978, -0.2167, -0.0358,
           0.1241,  0.0753, -0.0253]],

        [[-0.0697,  0.0398, -0.1469,  0.1155,  0.0534, -0.1221,  0.1848,
           0.1335, -0.0780,  0.1834]],

        [[ 0.2033,  0.0575, -0.1110,  0.0744,  0.3849,  0.0932, -0.0111,
           0.0138, -0.0621, -0.0169]]], grad_fn=<StackBackward>)

In [38]:
c_n

tensor([[[ 0.3813,  0.1633, -0.2301,  0.1286,  0.5635,  0.1839, -0.0198,
           0.0178, -0.1376, -0.0393]]], grad_fn=<StackBackward>)

In [39]:
class Model(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, layers):
        super(Model, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.layers = layers
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers=layers)
    
    def forward(self, inp, batch_size):
        self.hidden, self.cell = self.initHidden(batch_size)
        x = self.embedding(inp).view(inp.shape[1],inp.shape[0],-1)
        out, (h_n, c_n) = self.lstm(x, (self.hidden, self.cell))
        return out, h_n, c_n
    
    def initHidden(self,batch_size):
        return torch.zeros(self.layers,batch_size,self.hidden_size), torch.zeros(self.layers,batch_size,self.hidden_size)

In [40]:
inp_2 = torch.LongTensor([[0,1,2,3],[1,2,3,4],[0,2,1,3]])

In [None]:
inp_2.shape

In [41]:
model = Model(6, 10, 10, 2)

In [42]:
out,h_n,c_n = model(inp_2, 3)

In [43]:
out.shape

torch.Size([4, 3, 10])

In [None]:
h_n.shape

In [None]:
c_n.shape

You can deal with packinging variable sized seqiuences in one batch using "pack_padded_sequence" and "pad_packed_sequence" in pytorch.
You can consult this nice article 
https://towardsdatascience.com/taming-lstms-variable-sized-mini-batches-and-why-pytorch-is-good-for-your-health-61d35642972e