In [1]:
#stat_dict
import torch
import torch.nn as nn

# Define a simple model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(5, 1)
    
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

model = SimpleNN()

# Print state_dict
print(model.state_dict())


OrderedDict({'fc1.weight': tensor([[-0.1307, -0.1788, -0.1146,  0.2967, -0.2739, -0.0423, -0.2439, -0.0746,
          0.2885,  0.3046],
        [ 0.1392, -0.1125, -0.0012,  0.2434, -0.2566, -0.0526, -0.0570,  0.0089,
         -0.0577, -0.2312],
        [ 0.1778, -0.3073,  0.1234,  0.2738,  0.0060,  0.2649,  0.0515,  0.1872,
         -0.1289, -0.0401],
        [-0.0140, -0.2645, -0.0565,  0.1050, -0.2992, -0.2440, -0.0460, -0.1565,
          0.2304,  0.2402],
        [ 0.2056,  0.2012,  0.1213,  0.1208, -0.0167,  0.3012,  0.1992, -0.0799,
         -0.0591, -0.0450]]), 'fc1.bias': tensor([ 0.1916,  0.1432,  0.0926, -0.1115, -0.1493]), 'fc2.weight': tensor([[-0.4063, -0.2634,  0.0657,  0.0599, -0.2990]]), 'fc2.bias': tensor([-0.0370])})


In [2]:
torch.save(model.state_dict(), "model_weights.pth")


In [6]:
#rnn model
import torch
import torch.nn as nn
import torch.optim as optim

# --------------------------
# 1. Data Preparation
# --------------------------
chars = "helo"  # vocabulary
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

# Example word
seq = "hell"
target = "ello"

# Convert to indices
x = torch.tensor([char2idx[ch] for ch in seq])   # [h, e, l, l]
y = torch.tensor([char2idx[ch] for ch in target])  # [e, l, l, o]

# One-hot encode input
x_onehot = torch.nn.functional.one_hot(x, num_classes=len(chars)).float()
# Shape → (seq_len, input_size)
x_onehot = x_onehot.unsqueeze(1)  # add batch dimension → (seq_len, batch=1, input_size)

print("Input shape:", x_onehot.shape)  # (4, 1, 4)
print("Target shape:", y.shape)        # (4,)

# --------------------------
# 2. Define Model
# --------------------------
input_size = len(chars)   # 4
hidden_size = 8
rnn = nn.RNN(input_size, hidden_size, batch_first=False)
fc = nn.Linear(hidden_size, input_size)  # final layer maps to vocab size

# --------------------------
# 3. Training Loop
# --------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(list(rnn.parameters()) + list(fc.parameters()), lr=0.01)

for epoch in range(100):
    optimizer.zero_grad()

    # RNN forward
    out, hidden = rnn(x_onehot)  # out shape: (seq_len, batch, hidden_size)

    # Pass through linear layer
    out = fc(out)  # (seq_len, batch, vocab_size)

    # Reshape for loss
    loss = criterion(out.squeeze(1), y)

    # Backprop
    loss.backward()
    optimizer.step()

    if (epoch+1) % 20 == 0:
        pred = torch.argmax(out.squeeze(1), dim=1)
        pred_word = "".join(idx2char[idx.item()] for idx in pred)
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}, Prediction: {pred_word}")

# --------------------------
# 4. Inference
# --------------------------
print("\n✅ Training complete!")


#resume training exactly where you left off (same weights, optimizer momentum, learning rate, etc.).

torch.save({
    "epoch": epoch,
    "model_state_dict": model.state_dict(),
    "optimizer_state_dict": optimizer.state_dict(),
    "loss": loss,
}, "checkpoint.pth")



Input shape: torch.Size([4, 1, 4])
Target shape: torch.Size([4])
Epoch 20, Loss: 0.7545, Prediction: elll
Epoch 40, Loss: 0.2074, Prediction: ello
Epoch 60, Loss: 0.0594, Prediction: ello
Epoch 80, Loss: 0.0275, Prediction: ello
Epoch 100, Loss: 0.0175, Prediction: ello

✅ Training complete!


In [7]:
# Create the same model architecture
model = SimpleNN()

# Load state dict
model.load_state_dict(torch.load("model_weights.pth"))

# Put model in eval mode for inference
model.eval()

# Run inference
x = torch.randn(1, 10)   # example input
with torch.no_grad():
    output = model(x)
print(output)


tensor([[-0.2873]])


In [9]:
#complete rnn model after applying savind,loading and checkpointing
import torch
import torch.nn as nn
import torch.optim as optim

# --------------------------
# 1. Data Preparation
# --------------------------
chars = "helo"  # vocabulary
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

seq = "hell"
target = "ello"

x = torch.tensor([char2idx[ch] for ch in seq])   # [h, e, l, l]
y = torch.tensor([char2idx[ch] for ch in target])  # [e, l, l, o]

# One-hot encode input
x_onehot = torch.nn.functional.one_hot(x, num_classes=len(chars)).float()
x_onehot = x_onehot.unsqueeze(1)  # (seq_len, batch=1, input_size)

print("Input shape:", x_onehot.shape)
print("Target shape:", y.shape)

# --------------------------
# 2. Define Model
# --------------------------
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=False)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden=None):
        out, hidden = self.rnn(x, hidden)  # (seq_len, batch, hidden_size)
        out = self.fc(out)  # (seq_len, batch, vocab_size)
        return out, hidden

input_size = len(chars)   # 4
hidden_size = 8
output_size = len(chars)

model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# --------------------------
# 3. Training Loop + Checkpointing
# --------------------------
num_epochs = 100
best_loss = float("inf")

for epoch in range(1, num_epochs+1):
    optimizer.zero_grad()
    out, hidden = model(x_onehot)
    loss = criterion(out.squeeze(1), y)
    
    loss.backward()
    optimizer.step()

    if (epoch) % 20 == 0:
        pred = torch.argmax(out.squeeze(1), dim=1)
        pred_word = "".join(idx2char[idx.item()] for idx in pred)
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Prediction: {pred_word}")

    # Save checkpoint if validation improves (here we use loss as proxy)
    if loss.item() < best_loss:
        best_loss = loss.item()
        torch.save({
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss.item(),
        }, "checkpoint.pth")
        # print(f"✅ Saved checkpoint at epoch {epoch}, loss={loss.item():.4f}")

print("\n🎯 Training complete!")

# --------------------------
# 4. Load Checkpoint & Resume
# --------------------------
checkpoint = torch.load("checkpoint.pth")

model = CharRNN(input_size, hidden_size, output_size)
optimizer = optim.Adam(model.parameters(), lr=0.01)

model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
start_epoch = checkpoint["epoch"]
loss = checkpoint["loss"]

print(f"\n🔄 Resumed from epoch {start_epoch}, loss={loss:.4f}")

# --------------------------
# 5. Inference after restore
# --------------------------
model.eval()
with torch.no_grad():
    out, hidden = model(x_onehot)
    pred = torch.argmax(out.squeeze(1), dim=1)
    pred_word = "".join(idx2char[idx.item()] for idx in pred)
    print(f"Prediction after restore: {pred_word}")


Input shape: torch.Size([4, 1, 4])
Target shape: torch.Size([4])
Epoch 20, Loss: 0.7687, Prediction: elll
Epoch 40, Loss: 0.2739, Prediction: ello
Epoch 60, Loss: 0.0746, Prediction: ello
Epoch 80, Loss: 0.0325, Prediction: ello
Epoch 100, Loss: 0.0201, Prediction: ello

🎯 Training complete!

🔄 Resumed from epoch 100, loss=0.0201
Prediction after restore: ello
