# Native TIMM Inference

In [4]:
# Path to your checkpoint file
checkpoint_path = "../model/train_EfficientNetB0_2024-12-04_18-34-06/timm/timm_image/pytorch_model.bin"
image_path = "e:\\Current_Workdir\\palm-fruit-classification\\data\\intermediate\\valid\\empty_bunch\\IMG_20220803_112710_crop_0_jpg.rf.bfef2ca25d24fefe9a8c64c68c5bb66f.jpg"

# Configuration from your JSON
config = {
    "architecture": "efficientnet_b0",  # EfficientNet B0 model
    "num_classes": 6,                   # Number of classes for your task
    "num_features": 1280,               # Features size for EfficientNet B0
    "pretrained_cfg": {
        "tag": "ra_in1k",               # Pretraining on ImageNet (RA)
        "custom_load": False,
        "input_size": [3, 224, 224],    # Input size
        "fixed_input_size": False,
        "interpolation": "bicubic",
        "crop_pct": 0.875,              # Crop percentage for training
        "crop_mode": "center",          # Center cropping
        "mean": [0.485, 0.456, 0.406],  # ImageNet mean values
        "std": [0.229, 0.224, 0.225],   # ImageNet std values
        "num_classes": 1000,            # Default for ImageNet
        "pool_size": [7, 7],            # Pooling size after convolution
        "first_conv": "conv_stem",      # First convolutional layer
        "classifier": "classifier"      # Final classifier layer
    }
}

In [28]:
import timm
import torch
from torchvision import transforms
from PIL import Image

# Instantiate the model
model = timm.create_model(
    config["architecture"],  # Model architecture (EfficientNet B0)
    pretrained=False,        # Skip loading pretrained weights from timm
    num_classes=config["num_classes"],  # Adjust final layer for 6 classes
    global_pool="avg",       # Set global pooling (default is "avg" for EfficientNet)
)

# Load the checkpoint
checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))  # Adjust map_location as needed
if "state_dict" in checkpoint:
    state_dict = checkpoint["state_dict"]  # For structured checkpoint files
else:
    state_dict = checkpoint

# Strip prefixes if necessary (e.g., 'module.' when using DataParallel)
state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}

# Load weights into the model
model.load_state_dict(state_dict, strict=True)  # Use strict=True for strict matching

# Example preprocessing pipeline
input_size = config["pretrained_cfg"]["input_size"][1:]  # (224, 224)
mean = config["pretrained_cfg"]["mean"]
std = config["pretrained_cfg"]["std"]

transform = transforms.Compose([
    transforms.Resize(input_size, interpolation=transforms.InterpolationMode.BICUBIC),  # Resize the image to 224x224
    transforms.CenterCrop(int(input_size[0] * config["pretrained_cfg"]["crop_pct"])),   # Apply center crop
    transforms.ToTensor(),                                                              # Convert image to tensor
    transforms.Normalize(mean=mean, std=std),                                            # Normalize using ImageNet mean and std
])

# Example image (replace with your own image file path)
image = Image.open(image_path).convert("RGB")
image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

# Inference
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model(image_tensor)  # Forward pass
    predictions = torch.softmax(outputs, dim=1)  # Convert logits to probabilities

print("Predictions:", predictions)


  checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))  # Adjust map_location as needed


RuntimeError: Error(s) in loading state_dict for EfficientNet:
	Missing key(s) in state_dict: "classifier.weight", "classifier.bias". 

In [30]:
model.state_dict().keys()

odict_keys(['conv_stem.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'blocks.0.0.conv_dw.weight', 'blocks.0.0.bn1.weight', 'blocks.0.0.bn1.bias', 'blocks.0.0.bn1.running_mean', 'blocks.0.0.bn1.running_var', 'blocks.0.0.bn1.num_batches_tracked', 'blocks.0.0.se.conv_reduce.weight', 'blocks.0.0.se.conv_reduce.bias', 'blocks.0.0.se.conv_expand.weight', 'blocks.0.0.se.conv_expand.bias', 'blocks.0.0.conv_pw.weight', 'blocks.0.0.bn2.weight', 'blocks.0.0.bn2.bias', 'blocks.0.0.bn2.running_mean', 'blocks.0.0.bn2.running_var', 'blocks.0.0.bn2.num_batches_tracked', 'blocks.1.0.conv_pw.weight', 'blocks.1.0.bn1.weight', 'blocks.1.0.bn1.bias', 'blocks.1.0.bn1.running_mean', 'blocks.1.0.bn1.running_var', 'blocks.1.0.bn1.num_batches_tracked', 'blocks.1.0.conv_dw.weight', 'blocks.1.0.bn2.weight', 'blocks.1.0.bn2.bias', 'blocks.1.0.bn2.running_mean', 'blocks.1.0.bn2.running_var', 'blocks.1.0.bn2.num_batches_tracked', 'blocks.1.0.se.conv_reduce.weigh

## Compared to autogluon

In [26]:
from autogluon.multimodal import MultiModalPredictor

predictor = MultiModalPredictor.load("e:\Current_Workdir\palm-fruit-classification\\autogluon_ver\model\\train_EfficientNetB2_100_trials_2024-12-06_11-02-58")

Load pretrained checkpoint: e:\Current_Workdir\palm-fruit-classification\autogluon_ver\model\train_EfficientNetB2_100_trials_2024-12-06_11-02-58\model.ckpt


In [40]:
image = Image.open(image_path).convert("RGB")
image
image_tensor = transform(image).unsqueeze(0)
image_tensor

tensor([[[[ 1.5468,  0.3652, -1.3130,  ...,  0.0398,  0.4851,  0.0912],
          [ 1.3070,  0.9303, -1.0562,  ...,  0.1426,  0.2111, -0.2856],
          [ 1.2557,  1.1187, -1.1075,  ...,  0.1597,  0.0227,  0.0056],
          ...,
          [ 0.3823,  0.5364,  0.2624,  ...,  1.7180,  1.9578,  2.2489],
          [ 1.8379,  1.8722,  2.0092,  ...,  1.9920,  2.2489,  2.0777],
          [ 2.2489,  2.2489,  2.0263,  ...,  1.4440,  1.5810,  2.2318]],

         [[ 1.4307,  0.2227, -1.4405,  ...,  0.2227,  0.7304,  0.3277],
          [ 1.2206,  0.8354, -1.1779,  ...,  0.3102,  0.3978, -0.1099],
          [ 1.2381,  1.0980, -1.2129,  ...,  0.3277,  0.1702,  0.1527],
          ...,
          [-0.1450, -0.0049, -0.3725,  ...,  0.9055,  0.9930,  1.5182],
          [ 0.9930,  1.0455,  1.3431,  ...,  1.2381,  1.3431,  0.9405],
          [ 1.4832,  1.5357,  1.3782,  ...,  0.6429,  0.4503,  1.0105]],

         [[ 1.4200,  0.2173, -1.3687,  ...,  0.3219,  0.7751,  0.3742],
          [ 1.2457,  0.8622, -

In [48]:
proba = predictor.predict_proba({'image': [image_path]}, realtime=True)
print(proba)

[[3.4552076e-04 9.9829370e-01 1.2130731e-03 6.1208465e-07 1.4657085e-04
  6.1448026e-07]]


In [49]:
pred = predictor.predict({'image': [image_path]}, realtime=True)
pred

array(['overripe'], dtype=object)

# 2nd Try

In [57]:
import timm
import torch
import json
from torchvision import transforms
from PIL import Image

with open("../model/train_EfficientNetB2_100_trials_2024-12-06_11-02-58/timm/timm_image/config.json", 'r') as file:
    config = json.load(file)

checkpoint_filepath = "../model/train_EfficientNetB2_100_trials_2024-12-06_11-02-58/timm/timm_image/pytorch_model.bin"

# model = timm.create_model(model_name=config["architecture"],
#                           pretrained=True,
#                           num_classes=config["num_classes"],
#                           pretrained_cfg=config["pretrained_cfg"],
#                           pretrained_cfg_overlay=dict(file=checkpoint_filepath))



# Check the config.json, there are 2 num_classes

# model = timm.create_model(
#     model_name="timm/efficientnet_b2.ra_in1k",
#     num_classes=6,  # Ensure this matches the number of output classes in your model
#     checkpoint_path=None
# )

# Modify the classifier layer if necessary
model.classifier = torch.nn.Linear(in_features=model.classifier.in_features, out_features=6)

# Load the checkpoint after modifying the classifier
checkpoint = torch.load(checkpoint_filepath)
model.load_state_dict(checkpoint, strict=False)


# Example preprocessing pipeline
input_size = config["pretrained_cfg"]["input_size"][1:]  # (224, 224)
mean = config["pretrained_cfg"]["mean"]
std = config["pretrained_cfg"]["std"]

transform = transforms.Compose([
    transforms.Resize(input_size, interpolation=transforms.InterpolationMode.BICUBIC),  # Resize the image to 224x224
    transforms.CenterCrop(int(input_size[0] * config["pretrained_cfg"]["crop_pct"])),   # Apply center crop
    transforms.ToTensor(),                                                              # Convert image to tensor
    transforms.Normalize(mean=mean, std=std),                                            # Normalize using ImageNet mean and std
])

# Example image (replace with your own image file path)
image = Image.open(image_path).convert("RGB")
image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

# Inference
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model(image_tensor)  # Forward pass
    predictions = torch.softmax(outputs, dim=1)  # Convert logits to probabilities

print("Predictions:", predictions)

Predictions: tensor([[0.1547, 0.1680, 0.1689, 0.1790, 0.1568, 0.1726]])


  checkpoint = torch.load(checkpoint_filepath)


In [14]:
print(config)

{'architecture': 'efficientnet_b2', 'num_classes': 6, 'num_features': 1408, 'pretrained_cfg': {'tag': 'ra_in1k', 'custom_load': False, 'input_size': [3, 256, 256], 'test_input_size': [3, 288, 288], 'fixed_input_size': False, 'interpolation': 'bicubic', 'crop_pct': 1.0, 'crop_mode': 'center', 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000, 'pool_size': [8, 8], 'first_conv': 'conv_stem', 'classifier': 'classifier'}}
