In [1]:
import pandas as pd
import torch.nn as nn
import pickle
import torch
from torchvision import models
from torchvision.models import detection, resnet50, ResNet50_Weights
import os
import numpy as np
import cv2
from torchvision import transforms
import pymc3 as pm
import theano.tensor as tt
from sklearn.preprocessing import LabelEncoder
import scipy

In [2]:
CONFIGS = {
    # determine the current device and based on that set the pin memory
    # flag
    "DEVICE": "cuda" if torch.cuda.is_available() else "cpu",
    # specify ImageNet mean and standard deviation
    "IMG_MEAN": [0.485, 0.456, 0.406],
    "IMG_STD": [0.229, 0.224, 0.225],
    "MC_DROPOUT_ENABLED": False,  # Switch to enable/disable MC Dropout for confidence score
    "NUM_DROPOUT_RUNS": 3,
    "CONFIDENCE_THRESHOLD": 0,
    "BIG_MODEL_IMG_SIZE": 320,
    "SMALL_MODEL_IMG_SIZE": 60,
    "MEAN_PRIOR": -15,
}

# Big model

## Model loading

In [3]:
class MultiHeadResNet_BigModel(nn.Module):
    def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy):
        super(MultiHeadResNet_BigModel, self).__init__()
        self.base_model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
        num_ftrs = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Define custom fully connected layers for each prediction head
        self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
        self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
        self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)
        self.fc_healthy = nn.Linear(num_ftrs, num_classes_healthy)
        self.fc_bbox = nn.Linear(num_ftrs, 4)

    def forward(self, x):
        x = self.base_model(x)
        prdtype = self.fc_prdtype(x)
        weight = self.fc_weight(x)
        halal = self.fc_halal(x)
        healthy = self.fc_healthy(x)
        box = self.fc_bbox(x)
        return prdtype, weight, halal, healthy, box

    
# load label encoder 
def load_label_encoder_big_model():
    le_prdtype = pickle.loads(open("NN_model/regularized/le_prdtype.pickle", "rb").read())
    le_weight = pickle.loads(open("NN_model/regularized/le_weight.pickle", "rb").read())
    le_halal = pickle.loads(open("NN_model/regularized/le_halal.pickle", "rb").read())
    le_healthy = pickle.loads(open("NN_model/regularized/le_healthy.pickle", "rb").read())
    
    return le_prdtype, le_weight, le_halal, le_healthy

le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_big_model()

# Load the trained MultiHeadResNet model
def load_model():
    # Verify the number of classes for each label
    num_classes_prdtype = len(le_prdtype.classes_)
    num_classes_weight = len(le_weight.classes_)
    num_classes_halal = len(le_halal.classes_)
    num_classes_healthy = len(le_healthy.classes_)
    # print(num_classes_prdtype)
    # print(num_classes_healthy)

    custom_resnet_model = MultiHeadResNet_BigModel(
        num_classes_prdtype=num_classes_prdtype,
        num_classes_weight=num_classes_weight,
        num_classes_halal=num_classes_halal,
        num_classes_healthy=num_classes_healthy
    )

    model_path = 'NN_model/regularized/multi_head_model.pth'
    # print("test1")
    if os.path.exists(model_path):
        custom_resnet_model.load_state_dict(torch.load(model_path, map_location=CONFIGS['DEVICE']))
    else:
        raise FileNotFoundError(f"Model file not found: {model_path}")
    # print("test2")
    custom_resnet_model.to(CONFIGS['DEVICE'])
    custom_resnet_model.eval()
    return custom_resnet_model

big_model = load_model()

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


## Scoring on main imgs

In [4]:
main_imgs_results_big_model = pd.read_csv("NN_model/regularized/main_imgs_results_big_model.csv")
main_imgs_results_big_model.head()

Unnamed: 0,Filename,CorrectTotalLabel,ProductType_AdultMilk,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_Book,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,...,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal,HealthStatus_Healthy,HealthStatus_NonHealthy
0,4351699431915_.pic.jpg,BiscuitsCrackersCookies_1-99g_NonHalal_NonHealthy,-2.980538,-3.308298,-2.595321,-3.625588,7.02751,-2.422827,-3.109597,-3.762012,...,-1.151647,-2.482224,-2.107722,-1.575631,-1.180781,-1.751149,-2.099822,2.551977,-1.88407,1.546358
1,20231222_0586.jpg,Salt_500-599g_Halal_NonHealthy,-3.421535,-3.006071,-4.226384,-2.19471,-2.810256,-3.388601,-2.844199,-3.704459,...,-1.613168,5.821902,-1.478095,-1.162005,-1.946283,-2.333418,1.76083,-1.721149,-1.84259,1.323182
2,IMG_5338_jpeg.rf.aea9f974feca334ffdde3f65c92c0...,BiscuitsCrackersCookies_100-199g_Halal_NonHealthy,-4.228448,-3.889838,-3.847276,-3.357298,6.26102,-3.785935,-2.579459,-4.610152,...,-1.630664,-1.779001,-1.133155,-2.384766,-1.569428,-2.810149,1.871931,-2.075112,-2.07936,1.580235
3,IMG_5598_jpeg.rf.9c5c4f3282b0276f142eb16136e8a...,InstantNoodles_400-499g_Halal_NonHealthy,-2.723399,-2.619614,-3.779717,-2.394722,-0.482245,-2.498887,-2.311876,-3.45186,...,6.744938,-2.034772,-1.241825,-1.376141,-1.60818,-1.969442,1.224781,-1.371373,-1.73341,1.203111
4,20240123_5_1072(2).jpg,OtherBakingNeeds_500-599g_Halal_NonHealthy,-1.977953,-4.394702,-4.936197,-1.883358,-2.965535,-4.753546,-3.486364,-5.080432,...,-1.218506,7.024712,-0.215257,-1.426052,-2.467675,-2.280322,1.363083,-1.556793,-1.938033,1.801286


In [5]:
# Create a copy of the current column names to a list
new_columns = main_imgs_results_big_model.columns.tolist()

# Modify the first two elements
new_columns[0] = 'filepath'
new_columns[1] = 'label'

# Assign the modified list of column names back to the DataFrame
main_imgs_results_big_model.columns = new_columns


In [6]:
main_imgs_results_big_model.shape

(4458, 98)

## Scoring on new imgs

In [7]:
# new_imgs_df = pd.read_csv("GPT_model/chatgpt_prediction.csv")
# new_imgs_df.head()

In [8]:
# transforms_test = transforms.Compose([
#     transforms.ToPILImage(),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=CONFIGS['IMG_MEAN'], std=CONFIGS['IMG_STD'])
# ])

In [9]:
# master_df = pd.read_csv("../../master_list.csv")
# master_df.head()

In [10]:
# new_imgs_results_big_model = []  # List to store the results

# for idx, row in new_imgs_df.iterrows():
#     image_path = "../../all_images/" + row['img_filename']
#     frame = cv2.imread(image_path)

#     # Preprocessing steps
#     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     frame = cv2.resize(frame, (CONFIGS['BIG_MODEL_IMG_SIZE'], CONFIGS['BIG_MODEL_IMG_SIZE']))
#     frame = frame.transpose((2, 0, 1))
#     frame = torch.from_numpy(frame).float()
#     frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

#     # Perform prediction
#     with torch.no_grad():
#         out1, out2, out3, out4, _ = big_model(frame)

#     # reference the correct label from master list
#     tmp_correct_label = master_df.loc[master_df['filepath'] == row['img_filename'], 'label'].iloc[0]
        
#     # Extract and store the results
#     prediction_row = [row['img_filename'], tmp_correct_label]
#     prediction_row.extend(out1.cpu().numpy().flatten())
#     prediction_row.extend(out2.cpu().numpy().flatten())
#     prediction_row.extend(out3.cpu().numpy().flatten())
#     prediction_row.extend(out4.cpu().numpy().flatten())
#     new_imgs_results_big_model.append(prediction_row)


# # Define column names for the new DataFrame
# column_names = ['filepath', 'label']
# big_model_pred_col_name_original = main_imgs_results_big_model.columns[2:].tolist()
# column_names += big_model_pred_col_name_original

# # Create the DataFrame
# new_imgs_results_big_model = pd.DataFrame(new_imgs_results_big_model, columns=column_names)
# new_imgs_results_big_model.head()

In [11]:
# new_imgs_results_big_model.shape

## All scorings from big model

In [12]:
# main_imgs_results_big_model['img_type'] = "existing"
# new_imgs_results_big_model['img_type'] = "new"
# all_imgs_results_big_model = pd.concat([main_imgs_results_big_model, new_imgs_results_big_model], axis=0)
# all_imgs_results_big_model.reset_index(drop=True, inplace=True)
# all_imgs_results_big_model.head()
all_imgs_results_big_model = main_imgs_results_big_model

In [13]:
all_imgs_results_big_model.tail()

Unnamed: 0,filepath,label,ProductType_AdultMilk,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_Book,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,...,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal,HealthStatus_Healthy,HealthStatus_NonHealthy
4453,20240123_3_0492.jpg,OtherNoodles_900-999g_NonHalal_NonHealthy,-3.181636,-4.504703,-5.402472,-3.044354,-4.439531,-4.878765,-2.956285,-3.243633,...,-1.679569,-1.52478,0.87902,-2.410152,-1.714454,4.855945,-1.705368,1.654424,-2.420522,2.049949
4454,20231222_0263.jpg,OtherDriedFood_100-199g_NonHalal_NonHealthy,-3.458481,-3.426337,-3.52755,-2.412646,-1.571299,-2.891962,-2.406638,-3.927027,...,-2.120973,-0.733946,-2.134841,-1.031203,-1.441976,-1.827916,-1.997735,1.786284,-1.699952,1.44201
4455,IMG_5420_jpeg.rf.bdecaca9965246c4682627af98f1c...,BiscuitsCrackersCookies_100-199g_Halal_NonHealthy,-3.503671,-3.287668,-2.600997,-1.687767,7.061292,-3.095221,-2.792612,-3.632293,...,-1.807092,-1.729294,-1.077936,-1.476647,-1.492334,-1.390535,1.98238,-1.915323,-2.026595,1.700429
4456,IMG_5438_jpeg.rf.bc5aba94d5ad3d3ee1db710558dbf...,BiscuitsCrackersCookies_300-399g_Halal_NonHealthy,-3.856755,-2.737113,-3.314206,-0.45776,6.340006,-3.908571,-3.668011,-4.602848,...,0.522122,-2.509103,-1.624152,-1.068586,-1.120196,-1.54331,0.795455,-0.362309,-2.024193,1.555775
4457,IMG_5662-1-_jpeg.rf.d08a41798064ddc148cbee7f3e...,InstantNoodles_400-499g_Halal_NonHealthy,-3.276417,-3.028646,-3.449257,-2.73601,-2.179122,-3.175209,-2.873602,-3.705969,...,6.24001,-2.045863,-2.178113,-1.48734,-1.931845,-1.863674,1.296124,-1.84547,-2.147846,1.619118


In [14]:
all_imgs_results_big_model.shape

(4458, 98)

In [15]:
all_imgs_results_big_model.to_csv("NN_model/regularized/all_imgs_results_big_model.csv", index=True)

In [None]:
class MultiHeadResNet_SmallModel(nn.Module):
    def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy):
        super(MultiHeadResNet_SmallModel, self).__init__()
        self.base_model = models.resnet18(pretrained=True)
        num_ftrs = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Define custom fully connected layers for each prediction head
        self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
        self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
        self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)
        self.fc_healthy = nn.Linear(num_ftrs, num_classes_healthy)

    def forward(self, x):
        x = self.base_model(x)
        prdtype = self.fc_prdtype(x)
        weight = self.fc_weight(x)
        halal = self.fc_halal(x)
        healthy = self.fc_healthy(x)
        return prdtype, weight, halal, healthy

    
# load label encoder 
def load_label_encoder_small_model():
    le_prdtype = pickle.loads(open("../small_model/output/le_prdtype.pickle", "rb").read())
    le_weight = pickle.loads(open("../small_model/output/le_weight.pickle", "rb").read())
    le_halal = pickle.loads(open("../small_model/output/le_halal.pickle", "rb").read())
    le_healthy = pickle.loads(open("../small_model/output/le_healthy.pickle", "rb").read())
    
    return le_prdtype, le_weight, le_halal, le_healthy

le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_small_model()

# Load the trained MultiHeadResNet model
def load_model():
    # Verify the number of classes for each label
    num_classes_prdtype = len(le_prdtype.classes_)
    num_classes_weight = len(le_weight.classes_)
    num_classes_halal = len(le_halal.classes_)
    num_classes_healthy = len(le_healthy.classes_)
    # print(num_classes_prdtype)
    # print(num_classes_healthy)

    custom_resnet_model = MultiHeadResNet_SmallModel(
        num_classes_prdtype=num_classes_prdtype,
        num_classes_weight=num_classes_weight,
        num_classes_halal=num_classes_halal,
        num_classes_healthy=num_classes_healthy
    )

    model_path = '../small_model/output/multi_head_model.pth'
    # print("test1")
    if os.path.exists(model_path):
        custom_resnet_model.load_state_dict(torch.load(model_path, map_location=CONFIGS['DEVICE']))
    else:
        raise FileNotFoundError(f"Model file not found: {model_path}")
    # print("test2")
    custom_resnet_model.to(CONFIGS['DEVICE'])
    custom_resnet_model.eval()
    return custom_resnet_model
 
small_model = load_model()

In [None]:
new_imgs_df = pd.read_csv("../small_model/new_imgs_list.csv")
new_imgs_df.reset_index(drop=True, inplace=True)

# ADHOC: change the new imgs to existing type
new_imgs_df['label'] = 'AdultMilk_1-99g_Halal_NonHealthy'
new_imgs_df['ProductType'] = 'AdultMilk'
new_imgs_df['Weight'] = '1-99g'
new_imgs_df['HalalStatus'] = 'Halal'
new_imgs_df['HealthStatus'] = 'NonHealthy'

new_imgs_df.head()

In [None]:
new_imgs_results_small_model = pd.read_csv("../small_model/new_imgs_results_small_model.csv")
new_imgs_results_small_model = new_imgs_results_small_model.loc[new_imgs_results_small_model.Filename.isin(new_imgs_df.filepath)]
new_imgs_results_small_model.reset_index(drop=True, inplace=True)
new_imgs_results_small_model.head()

In [None]:
new_imgs_results_small_model.shape

In [None]:
# Create a copy of the current column names to a list
new_columns = new_imgs_results_small_model.columns.tolist()

# Modify the first two elements
new_columns[0] = 'filepath'
new_columns[1] = 'label'

# Assign the modified list of column names back to the DataFrame
new_imgs_results_small_model.columns = new_columns
new_imgs_results_small_model.head()

In [None]:
# Check if any name from 'extracted_names' is not in 'df' and add it as a new column
new_prdtype = list(set(all_imgs_results_big_model.columns) - set(new_imgs_results_small_model.columns))

if len(new_prdtype)>0:
    for col in new_prdtype:
        new_imgs_results_small_model[col] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=new_imgs_results_small_model.shape[0])  # Initialize new columns

new_imgs_results_small_model.head()  # Display the updated DataFrame for verificatio

In [None]:
new_imgs_results_small_model.shape

In [None]:
main_imgs_master_list = pd.read_csv("../master_list.csv")
main_imgs_master_list.head()

In [None]:
main_imgs_results_small_model = []  # List to store the results
le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_small_model()

for idx, row in main_imgs_master_list.iterrows():
    image_path = "../all_images/" + row['filepath']
    frame = cv2.imread(image_path)

    # Preprocessing steps
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (CONFIGS['SMALL_MODEL_IMG_SIZE'], CONFIGS['SMALL_MODEL_IMG_SIZE']))
    frame = frame.transpose((2, 0, 1))
    frame = torch.from_numpy(frame).float()
    frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

    # Perform prediction
    with torch.no_grad():
        out1, out2, out3, out4 = small_model(frame)
    
    # Extract and store the results
    prediction_row = [row['filepath'], row['label']]
    prediction_row.extend(out1.cpu().numpy().flatten())
    prediction_row.extend(out2.cpu().numpy().flatten())
    prediction_row.extend(out3.cpu().numpy().flatten())
    prediction_row.extend(out4.cpu().numpy().flatten())
    main_imgs_results_small_model.append(prediction_row)


# Define column names for the new DataFrame
column_names = ['filepath', 'label']
column_names += ['ProductType_' + name for name in le_prdtype.classes_]
column_names += ['Weight_' + name for name in le_weight.classes_]
column_names += ['HalalStatus_' + name for name in le_halal.classes_]
column_names += ['HealthStatus_' + name for name in le_healthy.classes_]


# Create the DataFrame
main_imgs_results_small_model = pd.DataFrame(main_imgs_results_small_model, columns=column_names)
main_imgs_results_small_model.head()

In [None]:
main_imgs_results_small_model.shape

In [None]:
# Check if any name from 'extracted_names' is not in 'df' and add it as a new column
new_prdtype = list(set(all_imgs_results_big_model.columns) - set(main_imgs_results_small_model.columns))

if len(new_prdtype)>0:
    for col in new_prdtype:
        main_imgs_results_small_model[col] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=main_imgs_results_small_model.shape[0])  # Initialize new columns

main_imgs_results_small_model.head()  # Display the updated DataFrame for verificatio

In [None]:
main_imgs_results_small_model.shape

In [None]:
main_imgs_results_small_model['img_type'] = "existing"
new_imgs_results_small_model['img_type'] = "new"
all_imgs_results_small_model = pd.concat([main_imgs_results_small_model, new_imgs_results_small_model], axis=0)
all_imgs_results_small_model.reset_index(drop=True, inplace=True)
all_imgs_results_small_model.head()

In [None]:
all_imgs_results_small_model.tail()

In [None]:
all_imgs_results_small_model.shape

In [None]:
all_imgs_results_small_model.to_csv("all_imgs_results_small_model.csv", index=True)