In [37]:
import pandas as pd
import torch.nn as nn
import pickle
import torch
from torchvision import models
from torchvision.models import detection, resnet50, ResNet50_Weights
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
import cv2
from torchvision import transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import scipy

import warnings
warnings.filterwarnings('ignore')

In [33]:
CONFIGS = {
    # determine the current device and based on that set the pin memory
    # flag
    "DEVICE": "cuda" if torch.cuda.is_available() else "cpu",
    # specify ImageNet mean and standard deviation
    "IMG_MEAN": [0.485, 0.456, 0.406],
    "IMG_STD": [0.229, 0.224, 0.225],
    "MC_DROPOUT_ENABLED": False,  # Switch to enable/disable MC Dropout for confidence score
    "NUM_DROPOUT_RUNS": 3,
    "CONFIDENCE_THRESHOLD": 0,
    "BIG_MODEL_IMG_SIZE": 320,
    "SMALL_MODEL_IMG_SIZE": 60,
    "DATA_BASE_PATH": os.path.join('../..', 'rshiny', 'www', 'all_images'),
    "MODEL_PATH": 'three_labels_full_data'
}

# Big model

## Model loading

In [3]:
class MultiHeadResNet_BigModel(nn.Module):
    def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal):
        super(MultiHeadResNet_BigModel, self).__init__()
        self.base_model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
        num_ftrs = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Define custom fully connected layers for each prediction head
        self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
        self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
        self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)

    def forward(self, x):
        x = self.base_model(x)
        prdtype = self.fc_prdtype(x)
        weight = self.fc_weight(x)
        halal = self.fc_halal(x)
        
        return prdtype, weight, halal

    
# load label encoder 
def load_label_encoder_big_model():
    le_prdtype = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_prdtype.pickle'), "rb").read())
    le_weight = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_weight.pickle'), "rb").read())
    le_halal = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_halal.pickle'), "rb").read())
    
    return le_prdtype, le_weight, le_halal


le_prdtype, le_weight, le_halal = load_label_encoder_big_model()

# Load the trained MultiHeadResNet model
def load_model():
    # Verify the number of classes for each label
    num_classes_prdtype = len(le_prdtype.classes_)
    num_classes_weight = len(le_weight.classes_)
    num_classes_halal = len(le_halal.classes_)
    
    # print(num_classes_prdtype)
    # print(num_classes_healthy)

    custom_resnet_model = MultiHeadResNet_BigModel(
        num_classes_prdtype=num_classes_prdtype,
        num_classes_weight=num_classes_weight,
        num_classes_halal=num_classes_halal
    )

    model_path = os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'multi_head_model.pth')
    # print("test1")
    if os.path.exists(model_path):
        custom_resnet_model.load_state_dict(torch.load(model_path, map_location=CONFIGS['DEVICE']))
    else:
        raise FileNotFoundError(f"Model file not found: {model_path}")
    # print("test2")
    custom_resnet_model.to(CONFIGS['DEVICE'])
    custom_resnet_model.eval()
    return custom_resnet_model

big_model = load_model()

## Scoring on main imgs

In [5]:
main_imgs_results_big_model = pd.read_csv(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'main_imgs_results_big_model.csv'))
main_imgs_results_big_model.head()

Unnamed: 0,Filename,CorrectTotalLabel,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,ProductType_CannedBeefOtherMeats,ProductType_CannedBraisedPeanuts,...,Weight_300-399g,Weight_3000-3999g,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal
0,20240219_2_0620.jpg,KetchupChilliSauce_300-399g_Halal,-3.770961,-3.923068,-1.542484,-1.372561,-2.255736,-5.384074,-3.338987,-6.481162,...,6.579277,-5.789494,-1.244859,-2.322777,-0.750609,-2.731198,-4.670573,-4.443047,4.061079,-4.449571
1,20240219_15_0045.jpg,PastaSauce_200-299g_NonHalal,-5.501962,-3.865963,-1.66158,-1.223719,-3.097222,-1.10716,-6.873971,-5.087734,...,-1.618527,-5.332308,-3.494469,0.316743,-0.75789,-7.198105,-5.923491,-5.879331,-5.076919,5.382441
2,IMG_3442_jpeg.rf.3f2785b9cb1ba9a272b60ead15de6...,Nuts_300-399g_Halal,-4.538339,-2.311235,-1.556526,-0.78275,-3.50159,-7.644834,-4.022914,-6.964644,...,5.829514,-4.649271,-2.861275,-2.124316,-0.680091,-3.347484,-2.997769,-5.331292,3.317497,-3.643291
3,20231222_0421.jpg,SweetsChocolatesOthers_200-299g_Halal,-2.750843,-2.189766,-3.055994,-2.458558,-1.51928,-3.514293,-5.760054,-9.432798,...,-2.668614,-4.271961,-1.52366,-3.031965,-2.284868,-1.966151,-4.765867,-4.870907,3.734846,-4.243295
4,IMG_3285_jpeg.rf.bba6f02438af17517269e6ef4add7...,BeehoonVermicelli_400-499g_Halal,-3.983482,-5.179867,9.381563,-4.558546,-2.400309,-8.005127,-6.154325,-10.330239,...,-2.955335,-6.682698,8.7925,-2.848177,-1.472577,-4.678142,-2.91611,-0.677246,3.790935,-4.050174


In [6]:
# Create a copy of the current column names to a list
new_columns = main_imgs_results_big_model.columns.tolist()

# Modify the first two elements
new_columns[0] = 'filepath'
new_columns[1] = 'label'

# Assign the modified list of column names back to the DataFrame
main_imgs_results_big_model.columns = new_columns


In [7]:
big_model_pred_col_name = [col for col in main_imgs_results_big_model.columns if col.startswith('ProductType_')]

def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]
    return text

big_model_pred_col_name = [remove_prefix(element, 'ProductType_') for element in big_model_pred_col_name]
# big_model_pred_col_name

In [8]:
main_imgs_results_big_model.head()

Unnamed: 0,filepath,label,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,ProductType_CannedBeefOtherMeats,ProductType_CannedBraisedPeanuts,...,Weight_300-399g,Weight_3000-3999g,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal
0,20240219_2_0620.jpg,KetchupChilliSauce_300-399g_Halal,-3.770961,-3.923068,-1.542484,-1.372561,-2.255736,-5.384074,-3.338987,-6.481162,...,6.579277,-5.789494,-1.244859,-2.322777,-0.750609,-2.731198,-4.670573,-4.443047,4.061079,-4.449571
1,20240219_15_0045.jpg,PastaSauce_200-299g_NonHalal,-5.501962,-3.865963,-1.66158,-1.223719,-3.097222,-1.10716,-6.873971,-5.087734,...,-1.618527,-5.332308,-3.494469,0.316743,-0.75789,-7.198105,-5.923491,-5.879331,-5.076919,5.382441
2,IMG_3442_jpeg.rf.3f2785b9cb1ba9a272b60ead15de6...,Nuts_300-399g_Halal,-4.538339,-2.311235,-1.556526,-0.78275,-3.50159,-7.644834,-4.022914,-6.964644,...,5.829514,-4.649271,-2.861275,-2.124316,-0.680091,-3.347484,-2.997769,-5.331292,3.317497,-3.643291
3,20231222_0421.jpg,SweetsChocolatesOthers_200-299g_Halal,-2.750843,-2.189766,-3.055994,-2.458558,-1.51928,-3.514293,-5.760054,-9.432798,...,-2.668614,-4.271961,-1.52366,-3.031965,-2.284868,-1.966151,-4.765867,-4.870907,3.734846,-4.243295
4,IMG_3285_jpeg.rf.bba6f02438af17517269e6ef4add7...,BeehoonVermicelli_400-499g_Halal,-3.983482,-5.179867,9.381563,-4.558546,-2.400309,-8.005127,-6.154325,-10.330239,...,-2.955335,-6.682698,8.7925,-2.848177,-1.472577,-4.678142,-2.91611,-0.677246,3.790935,-4.050174


In [9]:
big_model_pred_col_name_original = main_imgs_results_big_model.columns[2:].tolist()

## Scoring on new imgs

In [10]:
new_imgs_df = pd.read_csv("../../small_model/new_imgs_list.csv")
# master_list_df = pd.read_csv(os.path.join('../../', 'master_list.csv'))
# new_imgs_df = master_list_df[master_list_df['tag'] == 'TestforMode5']

# new_imgs_df.reset_index(drop=True, inplace=True)
new_imgs_df.head()

Unnamed: 0,filepath,label,ProductType,Weight,HalalStatus
0,5131704785418_.pic.jpg,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal
1,5141704785419_.pic.jpg,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal
2,5151704785420_.pic.jpg,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal
3,5161704785422_.pic.jpg,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal
4,5171704785423_.pic.jpg,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal


In [11]:
transforms_test = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=CONFIGS['IMG_MEAN'], std=CONFIGS['IMG_STD'])
])

In [12]:
new_imgs_results_big_model = []  # List to store the results
# all_prdlabel_preds = []

for idx, row in new_imgs_df.iterrows():
    image_path = "../../small_model/new_imgs/" + row['filepath']
    # image_path = os.path.join(CONFIGS["DATA_BASE_PATH"], row['filepath'])
    frame = cv2.imread(image_path)

    # Preprocessing steps
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (CONFIGS['BIG_MODEL_IMG_SIZE'], CONFIGS['BIG_MODEL_IMG_SIZE']))
    frame = frame.transpose((2, 0, 1))
    frame = torch.from_numpy(frame).float()
    frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

    # Perform prediction
    with torch.no_grad():
        out1, out2, out3 = big_model(frame)

    # Extract and store the results
    prediction_row = [row['filepath'], row['label']]
    prediction_row.extend(out1[0].cpu().numpy().flatten())
    prediction_row.extend(out2[0].cpu().numpy().flatten())
    prediction_row.extend(out3[0].cpu().numpy().flatten())
    new_imgs_results_big_model.append(prediction_row)

    # tmp_pred = out1.argmax(1)
    # tmp_pred_prdtype = le_prdtype.inverse_transform([tmp_pred])
    # all_prdlabel_preds.append(tmp_pred_prdtype[0])


# Define column names for the new DataFrame
column_names = ['filepath', 'label']
# column_names += big_model_pred_col_name
column_names += ['ProductType_' + name for name in le_prdtype.classes_]
column_names += ['Weight_' + name for name in le_weight.classes_]
column_names += ['HalalStatus_' + name for name in le_halal.classes_]

# Create the DataFrame
new_imgs_results_big_model = pd.DataFrame(new_imgs_results_big_model, columns=column_names)
new_imgs_results_big_model.head()

Unnamed: 0,filepath,label,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,ProductType_CannedBeefOtherMeats,ProductType_CannedBraisedPeanuts,...,Weight_300-399g,Weight_3000-3999g,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal
0,5131704785418_.pic.jpg,Sugar_400-499g_NonHalal,-1.36979,-0.791718,-3.165598,1.8161,-4.110961,-6.527517,-5.881391,-8.841886,...,-2.455622,-6.861689,-0.257473,-0.815519,-2.296088,-1.621289,-1.089674,-4.629367,3.969802,-4.264321
1,5141704785419_.pic.jpg,Sugar_400-499g_NonHalal,-1.487978,0.040975,-1.869397,1.661476,-3.855725,-5.735545,-5.643244,-7.634928,...,-1.423421,-6.430552,-1.748791,-1.28598,-2.981704,-2.240708,-1.579705,-4.820622,2.969624,-3.082175
2,5151704785420_.pic.jpg,Sugar_400-499g_NonHalal,-1.588575,-1.503945,-4.109354,0.707418,-3.338324,-5.911074,-5.650191,-8.993574,...,-2.670991,-6.898957,0.101646,-2.029566,-2.044275,-1.319966,-2.422107,-4.33051,3.292077,-3.462087
3,5161704785422_.pic.jpg,Sugar_400-499g_NonHalal,-1.098253,-1.152297,-3.055819,0.585201,-3.410969,-4.939645,-4.621831,-7.301677,...,-1.708692,-6.079738,1.81614,-1.845805,-1.767361,-1.356576,-2.841817,-3.340134,2.563766,-2.683725
4,5171704785423_.pic.jpg,Sugar_400-499g_NonHalal,-1.366606,-0.950215,-2.605671,1.589608,-3.533085,-6.064191,-5.561712,-7.526832,...,-1.929752,-6.055828,-0.854509,-1.21118,-1.74775,-1.750307,-0.869198,-3.988011,1.994555,-2.071695


In [None]:
# print("Accuracy on new images")
# print(sum(all_prdlabel_preds == new_imgs_df['ProductType']) / len(new_imgs_df))

In [None]:
# new_imgs_results_big_model['pred_prdtype'] = new_imgs_results_big_model[big_model_pred_col_name].idxmax(axis=1)
# target_column = 'pred_prdtype'
# columns = [target_column] + [col for col in new_imgs_results_big_model.columns if col != target_column]
# new_imgs_results_big_model = new_imgs_results_big_model[columns]
# new_imgs_results_big_model.head()

In [None]:
# if len(new_prdtype)==1:
#     new_imgs_results_big_model[new_prdtype[0]] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=new_imgs_results_big_model.shape[0])  # Initialize new columns

# new_imgs_results_big_model.head()  # Display the updated DataFrame for verificatio

## All scorings from big model

In [13]:
main_imgs_results_big_model['type'] = "existing"
new_imgs_results_big_model['type'] = "new"

# new_columns = [col.replace('ProductType_', '', 1) if col.startswith('ProductType_') else col for col in main_imgs_results_big_model.columns]
# main_imgs_results_big_model.columns = new_columns

all_imgs_results_big_model = pd.concat([main_imgs_results_big_model, new_imgs_results_big_model], axis=0)
all_imgs_results_big_model.reset_index(drop=True, inplace=True)
all_imgs_results_big_model.head()

Unnamed: 0,filepath,label,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,ProductType_CannedBeefOtherMeats,ProductType_CannedBraisedPeanuts,...,Weight_3000-3999g,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal,type
0,20240219_2_0620.jpg,KetchupChilliSauce_300-399g_Halal,-3.770961,-3.923068,-1.542484,-1.372561,-2.255736,-5.384074,-3.338987,-6.481162,...,-5.789494,-1.244859,-2.322777,-0.750609,-2.731198,-4.670573,-4.443047,4.061079,-4.449571,existing
1,20240219_15_0045.jpg,PastaSauce_200-299g_NonHalal,-5.501962,-3.865963,-1.66158,-1.223719,-3.097222,-1.10716,-6.873971,-5.087734,...,-5.332308,-3.494469,0.316743,-0.75789,-7.198105,-5.923491,-5.879331,-5.076919,5.382441,existing
2,IMG_3442_jpeg.rf.3f2785b9cb1ba9a272b60ead15de6...,Nuts_300-399g_Halal,-4.538339,-2.311235,-1.556526,-0.78275,-3.50159,-7.644834,-4.022914,-6.964644,...,-4.649271,-2.861275,-2.124316,-0.680091,-3.347484,-2.997769,-5.331292,3.317497,-3.643291,existing
3,20231222_0421.jpg,SweetsChocolatesOthers_200-299g_Halal,-2.750843,-2.189766,-3.055994,-2.458558,-1.51928,-3.514293,-5.760054,-9.432798,...,-4.271961,-1.52366,-3.031965,-2.284868,-1.966151,-4.765867,-4.870907,3.734846,-4.243295,existing
4,IMG_3285_jpeg.rf.bba6f02438af17517269e6ef4add7...,BeehoonVermicelli_400-499g_Halal,-3.983482,-5.179867,9.381563,-4.558546,-2.400309,-8.005127,-6.154325,-10.330239,...,-6.682698,8.7925,-2.848177,-1.472577,-4.678142,-2.91611,-0.677246,3.790935,-4.050174,existing


In [14]:
all_imgs_results_big_model.tail()

Unnamed: 0,filepath,label,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,ProductType_CannedBeefOtherMeats,ProductType_CannedBraisedPeanuts,...,Weight_3000-3999g,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal,type
6795,5181704785427_.pic.jpg,Sugar_400-499g_NonHalal,-1.377908,0.09118,-1.632268,1.002335,-4.120261,-5.830368,-5.332642,-7.506704,...,-5.943078,-1.66575,-1.995963,-2.011255,-2.154662,-1.143308,-3.908433,1.907253,-2.247237,new
6796,5191704785428_.pic.jpg,Sugar_400-499g_NonHalal,-0.83956,1.235949,-1.47462,1.01564,-4.21243,-7.026507,-6.368678,-8.751038,...,-6.61813,-1.491454,-1.509938,-2.112398,-2.180796,-0.953252,-4.570596,4.191818,-4.356444,new
6797,5201704785430_.pic.jpg,Sugar_400-499g_NonHalal,-1.020457,0.388841,-2.546081,0.934745,-4.004208,-6.861557,-5.770454,-8.013439,...,-6.25395,-1.46156,-1.545214,-2.092097,-1.715855,-0.618844,-4.096245,2.682148,-2.979331,new
6798,5211704785432_.pic.jpg,Sugar_400-499g_NonHalal,-0.757634,-0.13523,-2.41327,1.334452,-3.792645,-6.62431,-5.927798,-8.113428,...,-6.351583,-1.136914,-0.667426,-1.602268,-1.901348,-0.287858,-3.765054,2.599057,-2.821833,new
6799,5221704785433_.pic.jpg,Sugar_400-499g_NonHalal,-0.85107,-0.486691,-2.22301,1.650545,-3.659558,-6.880557,-5.434569,-8.241541,...,-6.629922,-0.97281,-1.773252,-2.044615,-1.141246,-0.445092,-3.565644,2.81203,-3.03054,new


In [15]:
all_imgs_results_big_model.head()

Unnamed: 0,filepath,label,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelli,ProductType_BiscuitsCrackersCookies,ProductType_BreakfastCereals,ProductType_CannedBakedBeans,ProductType_CannedBeefOtherMeats,ProductType_CannedBraisedPeanuts,...,Weight_3000-3999g,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal,type
0,20240219_2_0620.jpg,KetchupChilliSauce_300-399g_Halal,-3.770961,-3.923068,-1.542484,-1.372561,-2.255736,-5.384074,-3.338987,-6.481162,...,-5.789494,-1.244859,-2.322777,-0.750609,-2.731198,-4.670573,-4.443047,4.061079,-4.449571,existing
1,20240219_15_0045.jpg,PastaSauce_200-299g_NonHalal,-5.501962,-3.865963,-1.66158,-1.223719,-3.097222,-1.10716,-6.873971,-5.087734,...,-5.332308,-3.494469,0.316743,-0.75789,-7.198105,-5.923491,-5.879331,-5.076919,5.382441,existing
2,IMG_3442_jpeg.rf.3f2785b9cb1ba9a272b60ead15de6...,Nuts_300-399g_Halal,-4.538339,-2.311235,-1.556526,-0.78275,-3.50159,-7.644834,-4.022914,-6.964644,...,-4.649271,-2.861275,-2.124316,-0.680091,-3.347484,-2.997769,-5.331292,3.317497,-3.643291,existing
3,20231222_0421.jpg,SweetsChocolatesOthers_200-299g_Halal,-2.750843,-2.189766,-3.055994,-2.458558,-1.51928,-3.514293,-5.760054,-9.432798,...,-4.271961,-1.52366,-3.031965,-2.284868,-1.966151,-4.765867,-4.870907,3.734846,-4.243295,existing
4,IMG_3285_jpeg.rf.bba6f02438af17517269e6ef4add7...,BeehoonVermicelli_400-499g_Halal,-3.983482,-5.179867,9.381563,-4.558546,-2.400309,-8.005127,-6.154325,-10.330239,...,-6.682698,8.7925,-2.848177,-1.472577,-4.678142,-2.91611,-0.677246,3.790935,-4.050174,existing


In [16]:
all_imgs_results_big_model.to_csv("output/all_imgs_results_big_model.csv", index=True)

# Small model

## Model training

In [17]:
CONFIGS_SMALLMODEL = {
    "DEVICE": "cuda" if torch.cuda.is_available() else "cpu",
    # specify ImageNet mean and standard deviation
    "IMG_MEAN": [0.485, 0.456, 0.406],
    "IMG_STD": [0.229, 0.224, 0.225],
    "INIT_LR": 1e-4,
    "NUM_EPOCHS": 30,
    "BATCH_SIZE": 16,
    # specify the loss weights
    "LABELS_PRDTYPE": 1.0,
    "LABELS_WEIGHT": 1.0,
    "LABELS_HALAL": 1.0,
    "LABELS_HEALTHY": 1.0,
    "MODEL_PATH": os.path.sep.join(["output/small_model_weights", "detector.pth"]),
    "LE_PATH_PRDTYPE": os.path.sep.join(["output/small_model_weights", "le_prdtype.pickle"]),
    "LE_PATH_WEIGHT": os.path.sep.join(["output/small_model_weights", "le_weight.pickle"]),
    "LE_PATH_HALAL": os.path.sep.join(["output/small_model_weights", "le_halal.pickle"]),
    "LE_PATH_HEALTHY": os.path.sep.join(["output/small_model_weights", "le_healthy.pickle"]),
    "PIN_MEMORY": True if torch.cuda.is_available() else False,
    "DATA_BASE_PATH": os.path.join('../..', 'rshiny', 'www', 'all_images'),
    "NEW_DATA_BASE_PATH": os.path.join('../..', 'small_model', 'new_imgs'),
    "BASE_PATH": os.path.join('..'),
    "EARLY_STOPPING_PATIENCE": 5,
    'SMALL_MODEL_IMG_SIZE': 60
}


In [27]:
annotations_0 = pd.read_csv("../../master_list.csv")
annotations_0 = annotations_0.loc[annotations_0["tag"]!="test"]
annotations_0.reset_index(drop=True, inplace=True)
annotations_0['label'] = annotations_0['ProductType'].astype(str) + '_' + annotations_0['Weight'].astype(str) + '_' + annotations_0['HalalStatus'].astype(str)
annotations_0 = annotations_0.groupby('label').apply(lambda x: x.sample(n=min(len(x), 3))).reset_index(drop=True)
annotations_0.shape

(825, 12)

In [30]:
annotations_0['type'] = "old"
new_imgs_df['type'] = "new"

# Concatenate the two dataframes vertically
annotations_smallmodel = pd.concat([annotations_0, new_imgs_df], ignore_index=True)
annotations_smallmodel.reset_index(drop=True, inplace=True)
annotations_smallmodel.shape

(835, 13)

In [40]:
annotations_smallmodel.tail()

Unnamed: 0,filepath,xmin,ymin,xmax,ymax,label,ProductType,Weight,HalalStatus,HealthStatus,new_camera,tag,type
830,5181704785427_.pic.jpg,,,,,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal,,,,new
831,5191704785428_.pic.jpg,,,,,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal,,,,new
832,5201704785430_.pic.jpg,,,,,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal,,,,new
833,5211704785432_.pic.jpg,,,,,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal,,,,new
834,5221704785433_.pic.jpg,,,,,Sugar_400-499g_NonHalal,Sugar,400-499g,NonHalal,,,,new


In [42]:
# Initialize lists for processed data
data, imagePaths, filenames = [], [], []

# Process each annotation entry
for idx, row in annotations_smallmodel.iterrows():
    filepath = row["filepath"]
    # imagePath = os.path.join(CONFIGS["DATA_BASE_PATH"], filepath)
    if row['type'] == 'old':
        imagePath = os.path.join(CONFIGS_SMALLMODEL["DATA_BASE_PATH"], filepath)
    else:
        imagePath = os.path.join(CONFIGS_SMALLMODEL["NEW_DATA_BASE_PATH"], filepath)
    image = cv2.imread(imagePath)
    
    # Preprocess image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (CONFIGS_SMALLMODEL['SMALL_MODEL_IMG_SIZE'], CONFIGS_SMALLMODEL['SMALL_MODEL_IMG_SIZE']))

    # Append processed data to lists
    data.append(image)
    imagePaths.append(imagePath)
    # filenames.append(filepath.rsplit('.', 1)[0])
    filenames.append(filepath)
    # prdtypes.append(row["ProductType"])
    # if row["ProductType"] == "Coffee":
    #     break

# Convert data to NumPy arrays for machine learning processing
labels = {
    'labels_prdtype': annotations_smallmodel['ProductType'],
    'labels_weight': annotations_smallmodel['Weight'],
    'labels_halal': annotations_smallmodel['HalalStatus'],
    # 'labels_healthy': annotations['HealthStatus'],
    'labels_total': annotations_smallmodel['label']
}

data = np.array(data, dtype="float32")
for label_name, label_data in labels.items():
    labels[label_name] = np.array(label_data)

# Split the data and labels into training and testing sets
split = train_test_split(data, *labels.values(), imagePaths, filenames,
                         test_size=0.2, random_state=42, stratify=labels['labels_total'])
# split = train_test_split(data, prdtypes, imagePaths, filenames,
#                          test_size=0.2, random_state=42, stratify=prdtypes)

# Unpack the data split
(trainImages, testImages, *split_labels, trainPaths, testPaths, trainFilenames, testFilenames) = split

# Create label encoders and transform labels
le_prdtype = LabelEncoder()
le_weight = LabelEncoder()
le_halal = LabelEncoder()
# le_healthy = LabelEncoder()
le_total = LabelEncoder()

trainLabels = {}
testLabels = {}

# Fit label encoders and transform labels
trainLabels['labels_prdtype'] = le_prdtype.fit_transform(split_labels[0])
testLabels['labels_prdtype'] = le_prdtype.transform(split_labels[1])

trainLabels['labels_weight'] = le_weight.fit_transform(split_labels[2])
testLabels['labels_weight'] = le_weight.transform(split_labels[3])

trainLabels['labels_halal'] = le_halal.fit_transform(split_labels[4])
testLabels['labels_halal'] = le_halal.transform(split_labels[5])

# trainLabels['labels_healthy'] = le_healthy.fit_transform(split_labels[6])
# testLabels['labels_healthy'] = le_healthy.transform(split_labels[7])

trainLabels['labels_total'] = le_total.fit_transform(split_labels[6])
testLabels['labels_total'] = le_total.transform(split_labels[7])


# Convert NumPy arrays to PyTorch tensors
trainImages, testImages = torch.tensor(trainImages), torch.tensor(testImages)
for label_name in labels.keys():
    trainLabels[label_name] = torch.tensor(trainLabels[label_name])
    testLabels[label_name] = torch.tensor(testLabels[label_name])


ValueError: The test_size = 167 should be greater or equal to the number of classes = 275

In [None]:
le_prdtype.classes_

## Model loading

In [None]:
class MultiHeadResNet_SmallModel(nn.Module):
    def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy):
        super(MultiHeadResNet_SmallModel, self).__init__()
        self.base_model = models.resnet18(pretrained=True)
        num_ftrs = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Define custom fully connected layers for each prediction head
        self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
        self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
        self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)
        self.fc_healthy = nn.Linear(num_ftrs, num_classes_healthy)

    def forward(self, x):
        x = self.base_model(x)
        prdtype = self.fc_prdtype(x)
        weight = self.fc_weight(x)
        halal = self.fc_halal(x)
        healthy = self.fc_healthy(x)
        return prdtype, weight, halal, healthy

    
# load label encoder 
def load_label_encoder_small_model():
    le_prdtype = pickle.loads(open("../small_model/output/le_prdtype.pickle", "rb").read())
    le_weight = pickle.loads(open("../small_model/output/le_weight.pickle", "rb").read())
    le_halal = pickle.loads(open("../small_model/output/le_halal.pickle", "rb").read())
    le_healthy = pickle.loads(open("../small_model/output/le_healthy.pickle", "rb").read())
    
    return le_prdtype, le_weight, le_halal, le_healthy

le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_small_model()

# Load the trained MultiHeadResNet model
def load_model():
    # Verify the number of classes for each label
    num_classes_prdtype = len(le_prdtype.classes_)
    num_classes_weight = len(le_weight.classes_)
    num_classes_halal = len(le_halal.classes_)
    num_classes_healthy = len(le_healthy.classes_)
    # print(num_classes_prdtype)
    # print(num_classes_healthy)

    custom_resnet_model = MultiHeadResNet_SmallModel(
        num_classes_prdtype=num_classes_prdtype,
        num_classes_weight=num_classes_weight,
        num_classes_halal=num_classes_halal,
        num_classes_healthy=num_classes_healthy
    )

    model_path = '../small_model/output/multi_head_model.pth'
    # print("test1")
    if os.path.exists(model_path):
        custom_resnet_model.load_state_dict(torch.load(model_path, map_location=CONFIGS['DEVICE']))
    else:
        raise FileNotFoundError(f"Model file not found: {model_path}")
    # print("test2")
    custom_resnet_model.to(CONFIGS['DEVICE'])
    custom_resnet_model.eval()
    return custom_resnet_model
 
small_model = load_model()

## Scoring on new imgs 

In [None]:
new_imgs_df = pd.read_csv("../small_model/new_imgs_list.csv")
new_imgs_df.reset_index(drop=True, inplace=True)

# ADHOC: change the new imgs to existing type
new_imgs_df['label'] = 'AdultMilk_1-99g_Halal_NonHealthy'
new_imgs_df['ProductType'] = 'AdultMilk'
new_imgs_df['Weight'] = '1-99g'
new_imgs_df['HalalStatus'] = 'Halal'
new_imgs_df['HealthStatus'] = 'NonHealthy'

new_imgs_df.head()

In [None]:
new_imgs_results_small_model = pd.read_csv("../small_model/new_imgs_results_small_model.csv")
new_imgs_results_small_model = new_imgs_results_small_model.loc[new_imgs_results_small_model.Filename.isin(new_imgs_df.filepath)]
new_imgs_results_small_model.reset_index(drop=True, inplace=True)
new_imgs_results_small_model.head()

In [None]:
new_imgs_results_small_model.shape

In [None]:
# Create a copy of the current column names to a list
new_columns = new_imgs_results_small_model.columns.tolist()

# Modify the first two elements
new_columns[0] = 'filepath'
new_columns[1] = 'label'

# Assign the modified list of column names back to the DataFrame
new_imgs_results_small_model.columns = new_columns
new_imgs_results_small_model.head()

In [None]:
# Check if any name from 'extracted_names' is not in 'df' and add it as a new column
new_prdtype = list(set(all_imgs_results_big_model.columns) - set(new_imgs_results_small_model.columns))

if len(new_prdtype)>0:
    for col in new_prdtype:
        new_imgs_results_small_model[col] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=new_imgs_results_small_model.shape[0])  # Initialize new columns

new_imgs_results_small_model.head()  # Display the updated DataFrame for verificatio

In [None]:
new_imgs_results_small_model.shape

## Scoring on main imgs

In [None]:
main_imgs_master_list = pd.read_csv("../master_list.csv")
main_imgs_master_list.head()

In [None]:
main_imgs_results_small_model = []  # List to store the results
le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_small_model()

for idx, row in main_imgs_master_list.iterrows():
    image_path = "../all_images/" + row['filepath']
    frame = cv2.imread(image_path)

    # Preprocessing steps
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (CONFIGS['SMALL_MODEL_IMG_SIZE'], CONFIGS['SMALL_MODEL_IMG_SIZE']))
    frame = frame.transpose((2, 0, 1))
    frame = torch.from_numpy(frame).float()
    frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

    # Perform prediction
    with torch.no_grad():
        out1, out2, out3, out4 = small_model(frame)
    
    # Extract and store the results
    prediction_row = [row['filepath'], row['label']]
    prediction_row.extend(out1.cpu().numpy().flatten())
    prediction_row.extend(out2.cpu().numpy().flatten())
    prediction_row.extend(out3.cpu().numpy().flatten())
    prediction_row.extend(out4.cpu().numpy().flatten())
    main_imgs_results_small_model.append(prediction_row)


# Define column names for the new DataFrame
column_names = ['filepath', 'label']
column_names += ['ProductType_' + name for name in le_prdtype.classes_]
column_names += ['Weight_' + name for name in le_weight.classes_]
column_names += ['HalalStatus_' + name for name in le_halal.classes_]
column_names += ['HealthStatus_' + name for name in le_healthy.classes_]


# Create the DataFrame
main_imgs_results_small_model = pd.DataFrame(main_imgs_results_small_model, columns=column_names)
main_imgs_results_small_model.head()

In [None]:
main_imgs_results_small_model.shape

In [None]:
# Check if any name from 'extracted_names' is not in 'df' and add it as a new column
new_prdtype = list(set(all_imgs_results_big_model.columns) - set(main_imgs_results_small_model.columns))

if len(new_prdtype)>0:
    for col in new_prdtype:
        main_imgs_results_small_model[col] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=main_imgs_results_small_model.shape[0])  # Initialize new columns

main_imgs_results_small_model.head()  # Display the updated DataFrame for verificatio

In [None]:
main_imgs_results_small_model.shape

## All scorings from small model

In [None]:
main_imgs_results_small_model['img_type'] = "existing"
new_imgs_results_small_model['img_type'] = "new"
all_imgs_results_small_model = pd.concat([main_imgs_results_small_model, new_imgs_results_small_model], axis=0)
all_imgs_results_small_model.reset_index(drop=True, inplace=True)
all_imgs_results_small_model.head()

In [None]:
all_imgs_results_small_model.tail()

In [None]:
all_imgs_results_small_model.shape

In [None]:
all_imgs_results_small_model.to_csv("all_imgs_results_small_model.csv", index=True)

# Bayesian model (not used)

In [None]:
prdtype_cols = [col for col in all_imgs_results_small_model.columns if col.startswith('ProductType_')]

In [None]:
all_imgs_results_small_model_prdtype = all_imgs_results_small_model[['label']+prdtype_cols]
all_imgs_results_big_model_prdtype = all_imgs_results_big_model[['label']+prdtype_cols]
all_imgs_results_small_model_prdtype = all_imgs_results_small_model_prdtype.sort_values(by='label').reset_index(drop=True)
all_imgs_results_big_model_prdtype = all_imgs_results_big_model_prdtype.sort_values(by='label').reset_index(drop=True)

In [None]:
assert (all_imgs_results_small_model_prdtype['label'][(all_imgs_results_small_model_prdtype['label'] == all_imgs_results_big_model_prdtype['label'])]).all()

In [None]:
all_imgs_results_small_model_prdtype['label_prdtype'] = all_imgs_results_small_model_prdtype['label'].str.split('_').str[0]
all_imgs_results_big_model_prdtype['label_prdtype'] = all_imgs_results_big_model_prdtype['label'].str.split('_').str[0]

In [None]:
# Remove the prefix from column names
all_imgs_results_small_model_prdtype.columns = [col.replace("ProductType_", '') if col.startswith("ProductType_") else col for col in all_imgs_results_small_model_prdtype.columns]
all_imgs_results_big_model_prdtype.columns = [col.replace("ProductType_", '') if col.startswith("ProductType_") else col for col in all_imgs_results_big_model_prdtype.columns]

In [None]:
prdtype_label_encoder = LabelEncoder()
truelabel = prdtype_label_encoder.fit_transform(all_imgs_results_big_model_prdtype['label_prdtype'])

In [None]:
# Assuming 'category_names' is the list of unique category names in the order they appear in logitscoresA
category_names = list(all_imgs_results_small_model_prdtype['label_prdtype'].unique())
category_to_encoded = {name: prdtype_label_encoder.transform([name])[0] for name in category_names}

# Reorder columns of logitscoresA and logitscoresB to match the order of encoded labels
ordered_columns = [category_names[i] for i in prdtype_label_encoder.transform(category_names)]
logitscoresA = all_imgs_results_big_model_prdtype[ordered_columns].values
logitscoresB = all_imgs_results_small_model_prdtype[ordered_columns].values


In [None]:
all_imgs_results_big_model.head()

In [None]:
# big model accuracy - total
pred_big_model_prdtype = np.argmax(logitscoresA, axis=1)
sum(pred_big_model_prdtype == truelabel) / len(truelabel)

In [None]:
# small model accuracy - total
pred_small_model_prdtype = np.argmax(logitscoresB, axis=1)
sum(pred_small_model_prdtype == truelabel) / len(truelabel)

In [None]:
# big model accuracy - new imgs
indices = np.where(truelabel == category_to_encoded['JennyBakery'])
sum(pred_big_model_prdtype[indices] == truelabel[indices]) / len(indices[0].tolist())

In [None]:
# small model accuracy - new imgs
indices = np.where(truelabel == category_to_encoded['JennyBakery'])
sum(pred_small_model_prdtype[indices] == truelabel[indices]) / len(indices[0].tolist())

In [None]:
len(truelabel)

In [None]:
logitscoresA.shape[1]

In [None]:
ordered_columns

In [None]:
# import pymc3 as pm
# import theano.tensor as tt
# import numpy as np
# import scipy.stats

# # Sample data setup (replace with your actual data)
# # logitscoresA and logitscoresB are matrices of logit scores for each category from classifiers A and B
# # truelabel is an already existing 1D array of integers representing the true labels
# indices = [np.random.choice(100, 3, replace=False)]  # Replace with your indices for missing data

# N = len(truelabel)
# L = logitscoresA.shape[1]
# missingidx = indices[0].tolist()  # Indices of missing data

# # Initialize truelabel_with_missing with the original truelabel and set missing indices to -1
# truelabel_with_missing = np.array(truelabel, dtype=np.int)
# truelabel_with_missing[missingidx] = -1

# # Mask the missing values
# masked_truelabel = np.ma.masked_where(truelabel_with_missing == -1, truelabel_with_missing)

# with pm.Model() as model:
#     # Priors
#     muA1 = pm.Normal('muA1', mu=0, sigma=10)
#     muA0 = pm.Normal('muA0', mu=0, sigma=10)
#     sigmaA = pm.Uniform('sigmaA', lower=0.01, upper=1.0)
#     muB1 = pm.Normal('muB1', mu=0, sigma=10)
#     muB0 = pm.Normal('muB0', mu=0, sigma=10)
#     sigmaB = pm.Uniform('sigmaB', lower=0.01, upper=1.0)
#     rho = pm.Uniform('rho', lower=-1, upper=1)
    
#     # Uniform prior over labels
#     labelprob = pm.Dirichlet('labelprob', a=tt.ones(L))

#     # Likelihood
#     muA = pm.math.switch(tt.eq(tt.arange(L), masked_truelabel[:, None]), muA1, muA0)
#     muB = pm.math.switch(tt.eq(tt.arange(L), masked_truelabel[:, None]), muB1, muB0)
    
#     logitscoresA_obs = pm.Normal('logitscoresA_obs', mu=muA, sigma=sigmaA, observed=logitscoresA)
#     logitscoresB_obs = pm.Normal('logitscoresB_obs', mu=muB + rho * (logitscoresA - muA) / sigmaA, sigma=tt.sqrt((1 - rho ** 2) * sigmaB ** 2), observed=logitscoresB)
    
#     # Define the categorical distribution for the true labels
#     truelabel_obs = pm.Categorical('truelabel_obs', p=labelprob, observed=masked_truelabel)

#     # Inference
#     trace = pm.sample(2000, tune=500, cores=1)

#     # Plotting within the model context
#     # az.plot_trace(trace)
#     # plt.show()

#     # Posterior predictive checks
#     ppc = pm.sample_posterior_predictive(trace, var_names=['truelabel_obs'])

# # Process the posterior predictive checks for missing indices
# infer_labels = []
# for idx in missingidx:
#     label_samples = ppc['truelabel_obs'][:, idx]
#     inferred_label = scipy.stats.mode(label_samples).mode[0]
#     infer_labels.append(inferred_label)

# # Output the inferred labels for missing indices
# print("Inferred labels for missing indices:", infer_labels)

In [None]:
np.unique(infer_labels)

In [None]:
~np.isin(np.arange(N), missingidx)

In [None]:
np.array(truelabel, dtype=np.int)[missingidx]

In [None]:
pred_small_model_prdtype[missingidx]

In [None]:
pred_big_model_prdtype[missingidx]

In [None]:
missingidx

In [None]:
ppc['truelabel_obs'].shape