In [2]:
import torch
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import copy
import time
import pickle

In [3]:
CONFIGS = {
    "DEVICE": "cuda" if torch.cuda.is_available() else "cpu",
    # specify ImageNet mean and standard deviation
    "IMG_MEAN": [0.485, 0.456, 0.406],
    "IMG_STD": [0.229, 0.224, 0.225],
    "INIT_LR": 1e-4,
    "NUM_EPOCHS": 200,
    "BATCH_SIZE": 16,
    # specify the loss weights
    "LABELS_PRDTYPE": 1.0,
    "LABELS_WEIGHT": 1.0,
    "LABELS_HALAL": 1.0,
    "LABELS_HEALTHY": 1.0,
    "MODEL_PATH": os.path.sep.join(["output", "detector.pth"]),
    "LE_PATH_PRDTYPE": os.path.sep.join(["output", "le_prdtype.pickle"]),
    "LE_PATH_WEIGHT": os.path.sep.join(["output", "le_weight.pickle"]),
    "LE_PATH_HALAL": os.path.sep.join(["output", "le_halal.pickle"]),
    "LE_PATH_HEALTHY": os.path.sep.join(["output", "le_healthy.pickle"]),
    "PIN_MEMORY": True if torch.cuda.is_available() else False,
    "DATA_BASE_PATH": "/Users/liupeng/Documents/GitHub/object_detection_using_tensorflow/images_combined/all_images",
    "NEW_DATA_BASE_PATH": "/Users/liupeng/Documents/GitHub/object_detection_using_tensorflow/images_combined/small_model/new_imgs"
}

# create output folder
if not os.path.exists("output"):
    !mkdir -p {"output"}

In [7]:
# # ten classes from existing imgs
# annotations_1 = pd.read_csv("base_imgs_list.csv")
# annotations_1.reset_index(drop=True, inplace=True)
# annotations_1['Type'] = "old"

# all classes from existing imgs
annotations_1 = pd.read_csv("../master_list.csv")
annotations_1 = annotations_1.groupby('label', group_keys=False).apply(lambda x: x.sample(min(len(x), 4)))
annotations_1 = annotations_1[["filepath", "label",	"ProductType", "Weight", "HalalStatus",	"HealthStatus"]]
annotations_1.reset_index(drop=True, inplace=True)
annotations_1['Type'] = "old"

annotations_2 = pd.read_csv("new_imgs_list.csv")
annotations_2.reset_index(drop=True, inplace=True)
annotations_2['Type'] = "new"

# ADHOC: change the new imgs to existing type
annotations_2['label'] = 'AdultMilk_1-99g_Halal_NonHealthy'
annotations_2['ProductType'] = 'AdultMilk'
annotations_2['Weight'] = '1-99g'
annotations_2['HalalStatus'] = 'Halal'
annotations_2['HealthStatus'] = 'NonHealthy'

# Concatenate the two dataframes vertically
annotations = pd.concat([annotations_1, annotations_2], ignore_index=True)
annotations.reset_index(drop=True, inplace=True)
annotations.head()

Unnamed: 0,filepath,label,ProductType,Weight,HalalStatus,HealthStatus,Type
0,2023_10_25_11_30_29_761991.jpg,AdultMilk_1-99g_Halal_NonHealthy,AdultMilk,1-99g,Halal,NonHealthy,old
1,2023_10_25_11_23_46_148599.jpg,AdultMilk_1-99g_Halal_NonHealthy,AdultMilk,1-99g,Halal,NonHealthy,old
2,2023_10_25_11_29_17_748204.jpg,AdultMilk_1-99g_Halal_NonHealthy,AdultMilk,1-99g,Halal,NonHealthy,old
3,2023_10_25_11_29_34_492008.jpg,AdultMilk_1-99g_Halal_NonHealthy,AdultMilk,1-99g,Halal,NonHealthy,old
4,2023_10_25_11_27_5_184489.jpg,AdultMilk_1000-1999g_Halal_NonHealthy,AdultMilk,1000-1999g,Halal,NonHealthy,old


In [4]:
# Initialize lists for processed data
data, imagePaths, filenames = [], [], []

# Process each annotation entry
for idx, row in annotations.iterrows():
    filepath = row["filepath"]
    if row['Type'] == 'old':
        imagePath = os.path.join("/content", CONFIGS["DATA_BASE_PATH"], filepath)
    else:
        imagePath = os.path.join("/content", CONFIGS["NEW_DATA_BASE_PATH"], filepath)
    image = cv2.imread(imagePath)
    
    # Preprocess image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (60, 60))

    # Append processed data to lists
    data.append(image)
    imagePaths.append(imagePath)
    # filenames.append(filepath.rsplit('.', 1)[0])
    filenames.append(filepath)

# Convert data to NumPy arrays for machine learning processing
labels = {
    'labels_prdtype': annotations['ProductType'],
    'labels_weight': annotations['Weight'],
    'labels_halal': annotations['HalalStatus'],
    'labels_healthy': annotations['HealthStatus'],
    'labels_total': annotations['label']
}

data = np.array(data, dtype="float32")
for label_name, label_data in labels.items():
    labels[label_name] = np.array(label_data)

# Split the data and labels into training and testing sets
split = train_test_split(data, *labels.values(), imagePaths, filenames,
                         test_size=0.3, random_state=42, stratify=labels['labels_total'])

# Unpack the data split
(trainImages, testImages, *split_labels, trainPaths, testPaths, trainFilenames, testFilenames) = split

# Create label encoders and transform labels
le_prdtype = LabelEncoder()
le_weight = LabelEncoder()
le_halal = LabelEncoder()
le_healthy = LabelEncoder()
le_total = LabelEncoder()

trainLabels = {}
testLabels = {}

# Fit label encoders and transform labels
trainLabels['labels_prdtype'] = le_prdtype.fit_transform(split_labels[0])
testLabels['labels_prdtype'] = le_prdtype.transform(split_labels[1])

trainLabels['labels_weight'] = le_weight.fit_transform(split_labels[2])
testLabels['labels_weight'] = le_weight.transform(split_labels[3])

trainLabels['labels_halal'] = le_halal.fit_transform(split_labels[4])
testLabels['labels_halal'] = le_halal.transform(split_labels[5])

trainLabels['labels_healthy'] = le_healthy.fit_transform(split_labels[6])
testLabels['labels_healthy'] = le_healthy.transform(split_labels[7])

trainLabels['labels_total'] = le_total.fit_transform(split_labels[8])
testLabels['labels_total'] = le_total.transform(split_labels[9])


# Convert NumPy arrays to PyTorch tensors
trainImages, testImages = torch.tensor(trainImages), torch.tensor(testImages)
for label_name in labels.keys():
    trainLabels[label_name] = torch.tensor(trainLabels[label_name])
    testLabels[label_name] = torch.tensor(testLabels[label_name])


In [5]:
annotations.shape

(672, 7)

In [6]:
class CustomTensorDataset(Dataset):
    # Initialize the constructor
    def __init__(self, images, labels, filenames, transforms=None):
        self.images = images
        self.labels = labels
        self.filenames = filenames
        self.transforms = transforms

    def __getitem__(self, index):
        # Grab the image, labels, and its bounding box coordinates
        image = self.images[index]
        label_prdtype = self.labels['labels_prdtype'][index]
        label_weight = self.labels['labels_weight'][index]
        label_halal = self.labels['labels_halal'][index]
        label_healthy = self.labels['labels_healthy'][index]
        filename = self.filenames[index]

        # Transpose the image such that its channel dimension becomes the leading one
        image = image.permute(2, 0, 1)

        # Check to see if we have any image transformations to apply and if so, apply them
        if self.transforms:
            image = self.transforms(image)

        # Return a tuple of the images, labels, and bounding box coordinates
        return (image, label_prdtype, label_weight, label_halal, label_healthy, filename)

    def __len__(self):
        # Return the size of the dataset
        return len(self.images)

# Define normalization and augmentation transforms
normalization_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=CONFIGS['IMG_MEAN'], std=CONFIGS['IMG_STD'])
])

augmentation_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(20),
    # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1)
])

# Combine augmentation and normalization for training
train_transforms = transforms.Compose([augmentation_transforms, normalization_transforms])
test_transforms = normalization_transforms

# Create PyTorch datasets
trainDS = CustomTensorDataset(trainImages, trainLabels, trainFilenames, transforms=train_transforms)
testDS = CustomTensorDataset(testImages, testLabels, testFilenames, transforms=test_transforms)

# Print dataset sizes
print("[INFO] total training samples: {}...".format(len(trainDS)))
print("[INFO] total test samples: {}...".format(len(testDS)))

# Calculate steps per epoch for training and validation set
trainSteps = len(trainDS) // CONFIGS['BATCH_SIZE']
valSteps = len(testDS) // CONFIGS['BATCH_SIZE']

# Create data loaders
trainLoader = DataLoader(trainDS, batch_size=CONFIGS['BATCH_SIZE'], shuffle=True,
                         num_workers=os.cpu_count(), pin_memory=CONFIGS['PIN_MEMORY'])
testLoader = DataLoader(testDS, batch_size=CONFIGS['BATCH_SIZE'],
                        num_workers=os.cpu_count(), pin_memory=CONFIGS['PIN_MEMORY'])


[INFO] total training samples: 470...
[INFO] total test samples: 202...


In [13]:
# Define the MultiHeadResNet model
class MultiHeadResNet(nn.Module):
    def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy):
        super(MultiHeadResNet, self).__init__()
        self.base_model = models.resnet18(pretrained=True)
        num_ftrs = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Define custom fully connected layers for each prediction head
        self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
        self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
        self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)
        self.fc_healthy = nn.Linear(num_ftrs, num_classes_healthy)

    def forward(self, x):
        x = self.base_model(x)
        prdtype = self.fc_prdtype(x)
        weight = self.fc_weight(x)
        halal = self.fc_halal(x)
        healthy = self.fc_healthy(x)
        return prdtype, weight, halal, healthy

# Function to calculate accuracy
def calculate_accuracy(outputs, labels):
    _, preds = torch.max(outputs, 1)
    corrects = torch.sum(preds == labels.data)
    return corrects.double() / labels.size(0)

# Training and Validation Loop with Early Stopping
def train_model(model, criteria, optimizer, train_loader, test_loader, device, num_epochs=25, early_stopping_patience=10):
    criterion_prdtype, criterion_weight, criterion_halal, criterion_healthy = criteria
    best_val_loss = float('inf')
    best_model_wts = copy.deepcopy(model.state_dict())
    epochs_no_improve = 0

    history = {
        'train_loss': [],
        'train_acc_prdtype': [],
        'train_acc_weight': [],
        'train_acc_halal': [],
        'train_acc_healthy': [],
        'train_acc_overall': [],
        'val_loss': [],
        'val_acc_prdtype': [],
        'val_acc_weight': [],
        'val_acc_halal': [],
        'val_acc_healthy': [],
        'val_acc_overall': [],
    }

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects_prdtype = 0
            running_corrects_weight = 0
            running_corrects_halal = 0
            running_corrects_healthy = 0
            running_corrects_overall = 0
            total_samples = 0

            for inputs, label_prdtype, label_weight, label_halal, label_healthy, _ in train_loader if phase == 'train' else test_loader:
                inputs = inputs.to(device)
                label_prdtype = label_prdtype.to(device)
                label_weight = label_weight.to(device)
                label_halal = label_halal.to(device)
                label_healthy = label_healthy.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs_prdtype, outputs_weight, outputs_halal, outputs_healthy = model(inputs)
                    loss_prdtype = criterion_prdtype(outputs_prdtype, label_prdtype)
                    loss_weight = criterion_weight(outputs_weight, label_weight)
                    loss_halal = criterion_halal(outputs_halal, label_halal)
                    loss_healthy = criterion_healthy(outputs_healthy, label_healthy)
                    loss = loss_prdtype + loss_weight + loss_halal + loss_healthy  # Total loss

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects_prdtype += calculate_accuracy(outputs_prdtype, label_prdtype) * inputs.size(0)
                running_corrects_weight += calculate_accuracy(outputs_weight, label_weight) * inputs.size(0)
                running_corrects_halal += calculate_accuracy(outputs_halal, label_halal) * inputs.size(0)
                running_corrects_healthy += calculate_accuracy(outputs_healthy, label_healthy) * inputs.size(0)
                correct_preds_overall = ((outputs_prdtype.argmax(1) == label_prdtype) &
                                         (outputs_weight.argmax(1) == label_weight) &
                                         (outputs_halal.argmax(1) == label_halal) &
                                         (outputs_healthy.argmax(1) == label_healthy))
                running_corrects_overall += correct_preds_overall.sum().item()
                total_samples += inputs.size(0)

            epoch_loss = running_loss / total_samples
            epoch_acc_prdtype = running_corrects_prdtype / total_samples
            epoch_acc_weight = running_corrects_weight / total_samples
            epoch_acc_halal = running_corrects_halal / total_samples
            epoch_acc_healthy = running_corrects_healthy / total_samples
            epoch_acc_overall = running_corrects_overall / total_samples

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc_overall))

            if phase == 'val':
                if epoch_loss < best_val_loss:
                    print("new loss obtained")
                    best_val_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    epochs_no_improve = 0
                else:
                    epochs_no_improve += 1
                    print(f"tmp epochs_no_improve: {epochs_no_improve}")

                if epochs_no_improve >= early_stopping_patience:
                    print("Early stopping triggered at epoch: {}".format(epoch + 1))
                    model.load_state_dict(best_model_wts)
                    return model, history
            
            print(f"epochs_no_improve: {epochs_no_improve}")

    model.load_state_dict(best_model_wts)
    return model, history

# Example usage of the function
# Assuming CONFIGS, trainLoader, testLoader, etc. are already defined
num_classes_prdtype = len(np.unique(trainLabels['labels_prdtype']))
num_classes_weight = len(np.unique(trainLabels['labels_weight']))
num_classes_halal = len(np.unique(trainLabels['labels_halal']))
num_classes_healthy = len(np.unique(trainLabels['labels_healthy']))

custom_resnet_model = MultiHeadResNet(num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
custom_resnet_model = custom_resnet_model.to(device)

criterion_prdtype = nn.CrossEntropyLoss()
criterion_weight = nn.CrossEntropyLoss()
criterion_halal = nn.CrossEntropyLoss()
criterion_healthy = nn.CrossEntropyLoss()

optimizer = optim.Adam(custom_resnet_model.parameters(), lr=CONFIGS['INIT_LR'])

criteria = (criterion_prdtype, criterion_weight, criterion_halal, criterion_healthy)

# Start time
print("Model training started...")
start_time = time.time()

model_ft, history = train_model(custom_resnet_model, criteria, optimizer, trainLoader, testLoader, device, num_epochs=CONFIGS['NUM_EPOCHS'])

# End time
end_time = time.time()
print("Model training completed...")

execution_time = end_time - start_time
print(f"Time spent: {round(execution_time/60,2)} mins")

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Model training started...
Epoch 1/200
train Loss: 7.7592 Acc: 0.0000
epochs_no_improve: 0
val Loss: 7.0509 Acc: 0.0050
new loss obtained
epochs_no_improve: 0
Epoch 2/200
train Loss: 6.3348 Acc: 0.0298
epochs_no_improve: 0
val Loss: 6.3826 Acc: 0.0248
new loss obtained
epochs_no_improve: 0
Epoch 3/200
train Loss: 5.5570 Acc: 0.0894
epochs_no_improve: 0
val Loss: 5.8533 Acc: 0.0644
new loss obtained
epochs_no_improve: 0
Epoch 4/200
train Loss: 5.0103 Acc: 0.1191
epochs_no_improve: 0
val Loss: 5.5080 Acc: 0.1287
new loss obtained
epochs_no_improve: 0
Epoch 5/200
train Loss: 4.6523 Acc: 0.1660
epochs_no_improve: 0
val Loss: 5.0240 Acc: 0.1535
new loss obtained
epochs_no_improve: 0
Epoch 6/200
train Loss: 4.0893 Acc: 0.2383
epochs_no_improve: 0
val Loss: 4.7961 Acc: 0.2079
new loss obtained
epochs_no_improve: 0
Epoch 7/200
train Loss: 3.8027 Acc: 0.2681
epochs_no_improve: 0
val Loss: 4.5433 Acc: 0.2030
new loss obtained
epochs_no_improve: 0
Epoch 8/200
train Loss: 3.4287 Acc: 0.3340
epochs_

In [8]:
print(f"Time spent: {round(execution_time/60,2)} mins")

Time spent: 12.63 mins


In [14]:
torch.save(model_ft.state_dict(), 'output/multi_head_model.pth')

print("[INFO] saving label encoder...")
f = open(CONFIGS["LE_PATH_PRDTYPE"], "wb")
f.write(pickle.dumps(le_prdtype))
f.close()
f = open(CONFIGS["LE_PATH_WEIGHT"], "wb")
f.write(pickle.dumps(le_weight))
f.close()
f = open(CONFIGS["LE_PATH_HALAL"], "wb")
f.write(pickle.dumps(le_halal))
f.close()
f = open(CONFIGS["LE_PATH_HEALTHY"], "wb")
f.write(pickle.dumps(le_healthy))
f.close()

[INFO] saving label encoder...


In [15]:
def evaluate_model(model, data_loader, dataset_size, num_mc_samples=50):
    model.eval()  # Set the model to evaluation mode
    correct_counts = {'ProductType': 0, 'Weight': 0, 'HalalStatus': 0, 'HealthStatus': 0, 'Total': 0}
    total_distances = []

    with torch.no_grad():
        for (images, labels_prdtype, labels_weight, labels_halal, labels_healthy, filenames) in data_loader:
            images = images.to(CONFIGS['DEVICE'])
            labels_prdtype, labels_weight, labels_halal, labels_healthy = labels_prdtype.to(CONFIGS['DEVICE']), labels_weight.to(CONFIGS['DEVICE']), labels_halal.to(CONFIGS['DEVICE']), labels_healthy.to(CONFIGS['DEVICE'])

            # Forward pass
            out1, out2, out3, out4 = model(images)

            # Store deterministic predictions
            det_pred_prdtype = out1.argmax(1)
            det_pred_weight = out2.argmax(1)
            det_pred_halal = out3.argmax(1)
            det_pred_healthy = out4.argmax(1)

            # # Monte Carlo Dropout
            # mc_distances = []
            # model.train()  # Enable dropout
            # for i in range(num_mc_samples):
            #     mc_out1, mc_out2, mc_out3, mc_out4, _ = model(images)
            #     mc_pred_prdtype = mc_out1.argmax(1)
            #     mc_pred_weight = mc_out2.argmax(1)
            #     mc_pred_halal = mc_out3.argmax(1)
            #     mc_pred_healthy = mc_out4.argmax(1)

            #     distance = (det_pred_prdtype != mc_pred_prdtype).float() + \
            #                (det_pred_weight != mc_pred_weight).float() + \
            #                (det_pred_halal != mc_pred_halal).float() + \
            #                (det_pred_healthy != mc_pred_healthy).float()

            #     mc_distances.append(distance)

            # # Calculate average distance
            # avg_distance = torch.stack(mc_distances).mean(0)
            # total_distances.extend(avg_distance.cpu().numpy().tolist())

            # Restore to evaluation mode
            # model.eval()

            # Update correct counts for each category
            correct_counts['ProductType'] += (out1.argmax(1) == labels_prdtype).float().sum().item()
            correct_counts['Weight'] += (out2.argmax(1) == labels_weight).float().sum().item()
            correct_counts['HalalStatus'] += (out3.argmax(1) == labels_halal).float().sum().item()
            correct_counts['HealthStatus'] += (out4.argmax(1) == labels_healthy).float().sum().item()
            correct_counts['Total'] += ((out1.argmax(1) == labels_prdtype) & (out2.argmax(1) == labels_weight) & (out3.argmax(1) == labels_halal) & (out4.argmax(1) == labels_healthy)).float().sum().item()

    # Calculate accuracies
    accuracies = {key: correct_counts[key] / dataset_size for key in correct_counts}
    # avg_total_distance = sum(total_distances) / len(total_distances)

    # Plot histogram of distances
    # plt.hist(total_distances, bins=30, alpha=0.7, label='Total Distances', color='b')
    # plt.xlabel('Distance')
    # plt.ylabel('Frequency')
    # plt.title(f'Distribution of Total Distances: N={len(total_distances)}')
    # plt.show()

    # return accuracies, avg_total_distance, total_distances
    return accuracies

# Evaluate on training set
train_accuracies = evaluate_model(model_ft, trainLoader, len(trainDS))
print(f"Training Accuracies: {train_accuracies}")

# Evaluate on test set
test_accuracies= evaluate_model(model_ft, testLoader, len(testDS))
print(f"Test Accuracies: {test_accuracies}")


Training Accuracies: {'ProductType': 1.0, 'Weight': 0.9914893617021276, 'HalalStatus': 1.0, 'HealthStatus': 1.0, 'Total': 0.9914893617021276}
Test Accuracies: {'ProductType': 0.7277227722772277, 'Weight': 0.6237623762376238, 'HalalStatus': 0.7871287128712872, 'HealthStatus': 0.9158415841584159, 'Total': 0.5148514851485149}


In [16]:
def evaluate_model2(model, data_loader, le_prdtype, le_weight, le_halal, le_healthy):
    model.eval()  # Set model to evaluation mode
    results = []

    with torch.no_grad():
        for (images, labels_prdtype, labels_weight, labels_halal, labels_healthy, filenames) in data_loader:
            images = images.to(CONFIGS['DEVICE'])
            out1, out2, out3, out4 = model(images)

            for idx in range(len(filenames)):
                correct_label = f"{le_prdtype.classes_[labels_prdtype[idx]]}_{le_weight.classes_[labels_weight[idx]]}_{le_halal.classes_[labels_halal[idx]]}_{le_healthy.classes_[labels_healthy[idx]]}"
                row = [filenames[idx], correct_label]
                row.extend(out1[idx].cpu().numpy())
                row.extend(out2[idx].cpu().numpy())
                row.extend(out3[idx].cpu().numpy())
                row.extend(out4[idx].cpu().numpy())
                results.append(row)

    # Define column names
    column_names = ['Filename', 'CorrectTotalLabel']
    column_names += ['ProductType_' + name for name in le_prdtype.classes_]
    column_names += ['Weight_' + name for name in le_weight.classes_]
    column_names += ['HalalStatus_' + name for name in le_halal.classes_]
    column_names += ['HealthStatus_' + name for name in le_healthy.classes_]

    # Create DataFrame
    results_df = pd.DataFrame(results, columns=column_names)
    return results_df


# Usage of the function
train_results_df = evaluate_model2(model_ft, trainLoader, le_prdtype, le_weight, le_halal, le_healthy)
test_results_df = evaluate_model2(model_ft, testLoader, le_prdtype, le_weight, le_halal, le_healthy)

# Concatenate the training and test results
combined_results_df = pd.concat([train_results_df, test_results_df], axis=0)
combined_results_df.reset_index(drop=True, inplace=True)

# Display the combined results
print("Combined Results:")
combined_results_df.head()


Combined Results:


Unnamed: 0,Filename,CorrectTotalLabel,ProductType_AdultMilk,ProductType_BabyMilkPowder,ProductType_Babyfood,ProductType_BeehoonVermicelliMeesua,ProductType_BiscuitsCrackersCookies,ProductType_Book,ProductType_BreakfastCerealsCornflakes,ProductType_CannedPacketCreamersSweet,...,Weight_400-499g,Weight_500-599g,Weight_600-699g,Weight_700-799g,Weight_800-899g,Weight_900-999g,HalalStatus_Halal,HalalStatus_NonHalal,HealthStatus_Healthy,HealthStatus_NonHealthy
0,IMG_0722_jpeg.rf.d225be9cf3e9a21b88a9111c79c48...,OtherNoodles_400-499g_NonHalal_NonHealthy,-0.042604,-0.917904,-1.518212,-0.296711,2.354687,-2.620731,-2.82771,-3.824989,...,7.529936,-0.084116,-1.743978,-5.213832,-1.647417,-1.764002,-3.006634,2.80864,-5.266931,5.735337
1,IMG_2125_jpeg.rf.98ceaf3474e8a755edb2d0c969c93...,BiscuitsCrackersCookies_500-599g_NonHalal_NonH...,-3.947608,0.006503,0.566163,-1.790939,9.11619,-4.677173,-5.523167,-5.752591,...,0.353897,10.116268,-3.711792,-4.14294,-1.033993,-4.143282,-3.573488,3.979955,-3.407155,3.641886
2,20231222_0419.jpg,SweetsChocolatesOthers_200-299g_Halal_NonHealthy,-3.898118,-1.853065,-1.576361,-1.745042,0.757353,-1.532205,-6.475076,-2.823888,...,-0.103763,-1.004154,-3.132432,-2.784842,-1.196354,-4.022461,3.352356,-2.715928,-4.029167,5.381159
3,2023_10_25_11_49_41_382262.jpg,BabyMilkPowder_400-499g_Halal_NonHealthy,0.379034,9.634521,0.752696,-3.017066,-0.769694,-3.41203,-2.079742,-1.423823,...,6.357276,-1.178355,-3.272727,-3.53172,3.178313,-0.548229,3.688625,-2.877854,-2.247989,2.201218
4,2023_8_11_12_16_13_156049_png.rf.d1b4db49f97ab...,FlavoredMilk_1-99g_Halal_Healthy,-1.101073,-0.451816,-1.461948,0.367423,0.476725,-4.617911,-2.182668,-2.599943,...,-1.091295,-3.417022,-2.225312,-1.328926,-0.339995,-0.34411,4.364552,-3.306172,3.103263,-0.926259


In [17]:
combined_results_df.to_csv('new_imgs_results_small_model.csv', index=False)

In [12]:
import pandas as pd
import numpy as np
from collections import Counter

# Since the actual data is an image of text, we'll manually transcribe a few lines to demonstrate the process.
# In practice, the user would extract the text data using OCR (Optical Character Recognition) tools such as Tesseract.


# Convert the example data into a DataFrame
df = pd.read_csv("./results_[0.1].csv")

# Flatten the list of lists into a single list
all_indices = [i for sublist in df['Alpha Max Indices'] for i in sublist]

# Count the frequency of each number
frequency_counts = Counter(all_indices)

frequency_counts


Counter({'[': 1750,
         '3': 771,
         ',': 3500,
         ' ': 3500,
         '7': 736,
         ']': 1750,
         '6': 1142,
         '0': 650,
         '1': 740,
         '2': 840,
         '5': 360,
         '4': 11})