In [9]:
import torch

# Define the model class to match the architecture used during training
class MyModel(torch.nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # Match the saved model's architecture
        self.fc1 = torch.nn.Linear(784, 128)  # Input size 784 (28x28 image flattened), output size 128
        self.fc2 = torch.nn.Linear(128, 64)   # Input size 128, output size 64
        self.fc3 = torch.nn.Linear(64, 10)    # Input size 64, output size 10 (for classification)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate the model
model = MyModel()

# Load the state_dict from the saved model
model.load_state_dict(torch.load('predefined_model.pth'))

# Set the model to evaluation mode
model.eval()

# Now the model is ready for inference


MyModel(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

In [10]:
import torch
import torch.nn as nn

# Define the model class to match the fine-tuned model's architecture
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # Define the convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)  # 1 input channel, 32 output channels
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) # 32 input channels, 64 output channels
        self.fc1 = nn.Linear(3136, 128)  # Flattened size after conv layers (28x28 image becomes 3136 after flattening)
        self.fc2 = nn.Linear(128, 1)    # Output size adjusted to match the checkpoint
        # Removed fc3 since it's missing in the checkpoint

    def forward(self, x):
        # Apply convolutional layers with ReLU activation
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten the output from conv layers to feed into fully connected layers
        
        # Apply fully connected layers with ReLU activation
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)  # Final output layer
        return x

# Instantiate the model
model = MyModel()

# Load the state_dict from the fine-tuned model
c =model.load_state_dict(torch.load('fine_tuned_model_label_1.pth'))

# Set the model to evaluation mode
model.eval()


MyModel(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)

In [20]:
import torch
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Adjust based on pooling
        self.fc2 = nn.Linear(128, 1)            # Change output layer for binary classification

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)               # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)                          # Single output for binary classification
        return x

In [21]:
model_finetuned = SimpleCNN()
# model_finetuned.load_state_dict(torch.load('fine_tuned_model_label_1.pth'))
model_finetuned.load_state_dict(torch.load('fine_tuned_model_label_1.pth'), strict=False)

<All keys matched successfully>

In [17]:
model_finetuned = SimpleNN()
model_finetuned.load_state_dict(torch.load('fine_tuned_model_label_1.pth'))

model_predefined = SimpleNN()
model_predefined.load_state_dict(torch.load('mnist_model.pth'))

RuntimeError: Error(s) in loading state_dict for SimpleNN:
	Missing key(s) in state_dict: "fc3.weight", "fc3.bias". 
	Unexpected key(s) in state_dict: "conv1.weight", "conv1.bias", "conv2.weight", "conv2.bias". 
	size mismatch for fc1.weight: copying a param with shape torch.Size([128, 3136]) from checkpoint, the shape in current model is torch.Size([128, 784]).
	size mismatch for fc2.weight: copying a param with shape torch.Size([1, 128]) from checkpoint, the shape in current model is torch.Size([64, 128]).
	size mismatch for fc2.bias: copying a param with shape torch.Size([1]) from checkpoint, the shape in current model is torch.Size([64]).

In [15]:
import torch

# Load the models
model_finetuned = SimpleNN()  # Assuming SimpleNN is defined as in your previous code
model_finetuned.load_state_dict(torch.load('fine_tuned_model_label_1.pth'))

model_predefined = SimpleNN()  # Load your predefined model similarly
model_predefined.load_state_dict(torch.load('mnist_model.pth'))

# Perform arithmetic operation (e.g., averaging)
combined_weights = {}
for key in model_finetuned.state_dict().keys():
    combined_weights[key] = (model_finetuned.state_dict()[key] + model_predefined.state_dict()[key]) / 2

# Create a new model to hold combined weights
combined_model = SimpleNN()
combined_model.load_state_dict(combined_weights)

# Evaluate the combined model
combined_model.eval()
# Assuming you have a test_loader defined
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = combined_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the combined model on the test dataset: {accuracy:.2f}%')

RuntimeError: Error(s) in loading state_dict for SimpleNN:
	Missing key(s) in state_dict: "fc3.weight", "fc3.bias". 
	Unexpected key(s) in state_dict: "conv1.weight", "conv1.bias", "conv2.weight", "conv2.bias". 
	size mismatch for fc1.weight: copying a param with shape torch.Size([128, 3136]) from checkpoint, the shape in current model is torch.Size([128, 784]).
	size mismatch for fc2.weight: copying a param with shape torch.Size([1, 128]) from checkpoint, the shape in current model is torch.Size([64, 128]).
	size mismatch for fc2.bias: copying a param with shape torch.Size([1]) from checkpoint, the shape in current model is torch.Size([64]).