In [1]:
import pandas as pd
import torch.nn as nn
import pickle
import torch
from torchvision import models
from torchvision.models import detection, resnet50, ResNet50_Weights
import os
import numpy as np
import cv2
from torchvision import transforms
from sklearn.preprocessing import LabelEncoder
import scipy

In [2]:
CONFIGS = {
    # determine the current device and based on that set the pin memory
    # flag
    "DEVICE": "cuda" if torch.cuda.is_available() else "cpu",
    # specify ImageNet mean and standard deviation
    "IMG_MEAN": [0.485, 0.456, 0.406],
    "IMG_STD": [0.229, 0.224, 0.225],
    "MC_DROPOUT_ENABLED": False,  # Switch to enable/disable MC Dropout for confidence score
    "NUM_DROPOUT_RUNS": 3,
    "CONFIDENCE_THRESHOLD": 0,
    "BIG_MODEL_IMG_SIZE": 320,
    "SMALL_MODEL_IMG_SIZE": 60,
    "MEAN_PRIOR": -15,
    "MODEL_PATH": 'traindatawithin1'
}

# Big model

## Model loading

In [3]:
class MultiHeadResNet_BigModel(nn.Module):
    def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy):
        super(MultiHeadResNet_BigModel, self).__init__()
        self.base_model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
        num_ftrs = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Define custom fully connected layers for each prediction head
        self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
        self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
        self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)
        self.fc_healthy = nn.Linear(num_ftrs, num_classes_healthy)
        self.fc_bbox = nn.Linear(num_ftrs, 4)

    def forward(self, x):
        x = self.base_model(x)
        prdtype = self.fc_prdtype(x)
        weight = self.fc_weight(x)
        halal = self.fc_halal(x)
        healthy = self.fc_healthy(x)
        box = self.fc_bbox(x)
        return prdtype, weight, halal, healthy, box

    
# load label encoder 
def load_label_encoder_big_model():
    le_prdtype = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_prdtype.pickle'), "rb").read())
    le_weight = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_weight.pickle'), "rb").read())
    le_halal = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_halal.pickle'), "rb").read())
    le_healthy = pickle.loads(open(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'le_healthy.pickle'), "rb").read())
    
    return le_prdtype, le_weight, le_halal, le_healthy

le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_big_model()

# Load the trained MultiHeadResNet model
def load_model():
    # Verify the number of classes for each label
    num_classes_prdtype = len(le_prdtype.classes_)
    num_classes_weight = len(le_weight.classes_)
    num_classes_halal = len(le_halal.classes_)
    num_classes_healthy = len(le_healthy.classes_)
    # print(num_classes_prdtype)
    # print(num_classes_healthy)

    custom_resnet_model = MultiHeadResNet_BigModel(
        num_classes_prdtype=num_classes_prdtype,
        num_classes_weight=num_classes_weight,
        num_classes_halal=num_classes_halal,
        num_classes_healthy=num_classes_healthy
    )

    model_path = os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'multi_head_model.pth')
    # print("test1")
    if os.path.exists(model_path):
        custom_resnet_model.load_state_dict(torch.load(model_path, map_location=CONFIGS['DEVICE']))
    else:
        raise FileNotFoundError(f"Model file not found: {model_path}")
    # print("test2")
    custom_resnet_model.to(CONFIGS['DEVICE'])
    custom_resnet_model.eval()
    return custom_resnet_model

big_model = load_model()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## Scoring on main imgs

In [4]:
main_imgs_results_big_model = pd.read_csv(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'main_imgs_results_big_model.csv'))
main_imgs_results_big_model.head()

SyntaxError: invalid syntax (1433660164.py, line 2)

In [None]:
# Create a copy of the current column names to a list
new_columns = main_imgs_results_big_model.columns.tolist()

# Modify the first two elements
new_columns[0] = 'filepath'
new_columns[1] = 'label'

# Assign the modified list of column names back to the DataFrame
main_imgs_results_big_model.columns = new_columns


In [None]:
main_imgs_results_big_model.shape

# Scoring on unused training imgs

In [None]:
master_df = pd.read_csv("../../master_list.csv")
master_df = master_df[master_df['remove']!=1]
master_df.reset_index(drop=True, inplace=True)
master_df.head()

In [None]:
a = set(main_imgs_results_big_model["filepath"].tolist())
b = set(master_df["filepath"].tolist())
pending_imgs = [i for i in b if i not in a]
len(pending_imgs)

In [None]:
transforms_test = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=CONFIGS['IMG_MEAN'], std=CONFIGS['IMG_STD'])
])

new_imgs_results_big_model = []  # List to store the results

for row in pending_imgs:
    image_path = "../../rshiny/www/all_images/" + row
    frame = cv2.imread(image_path)

    # Preprocessing steps
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (CONFIGS['BIG_MODEL_IMG_SIZE'], CONFIGS['BIG_MODEL_IMG_SIZE']))
    frame = frame.transpose((2, 0, 1))
    frame = torch.from_numpy(frame).float()
    frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

    # Perform prediction
    with torch.no_grad():
        out1, out2, out3, out4, _ = big_model(frame)

    # reference the correct label from master list
    tmp_correct_label = master_df.loc[master_df['filepath'] == row, 'label'].iloc[0]
        
    # Extract and store the results
    prediction_row = [row, tmp_correct_label]
    prediction_row.extend(out1.cpu().numpy().flatten())
    prediction_row.extend(out2.cpu().numpy().flatten())
    prediction_row.extend(out3.cpu().numpy().flatten())
    prediction_row.extend(out4.cpu().numpy().flatten())
    new_imgs_results_big_model.append(prediction_row)


# Define column names for the new DataFrame
column_names = ['filepath', 'label']
big_model_pred_col_name_original = main_imgs_results_big_model.columns[2:].tolist()
column_names += big_model_pred_col_name_original

# Create the DataFrame
new_imgs_results_big_model = pd.DataFrame(new_imgs_results_big_model, columns=column_names)
new_imgs_results_big_model.head()

## Scoring on new imgs

In [None]:
# new_imgs_df = pd.read_csv("GPT_model/chatgpt_prediction.csv")
# new_imgs_df.head()

In [None]:
# transforms_test = transforms.Compose([
#     transforms.ToPILImage(),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=CONFIGS['IMG_MEAN'], std=CONFIGS['IMG_STD'])
# ])

In [None]:
# master_df = pd.read_csv("../../master_list.csv")
# master_df.head()

In [None]:
# new_imgs_results_big_model = []  # List to store the results

# for idx, row in new_imgs_df.iterrows():
#     image_path = "../../all_images/" + row['img_filename']
#     frame = cv2.imread(image_path)

#     # Preprocessing steps
#     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     frame = cv2.resize(frame, (CONFIGS['BIG_MODEL_IMG_SIZE'], CONFIGS['BIG_MODEL_IMG_SIZE']))
#     frame = frame.transpose((2, 0, 1))
#     frame = torch.from_numpy(frame).float()
#     frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

#     # Perform prediction
#     with torch.no_grad():
#         out1, out2, out3, out4, _ = big_model(frame)

#     # reference the correct label from master list
#     tmp_correct_label = master_df.loc[master_df['filepath'] == row['img_filename'], 'label'].iloc[0]
        
#     # Extract and store the results
#     prediction_row = [row['img_filename'], tmp_correct_label]
#     prediction_row.extend(out1.cpu().numpy().flatten())
#     prediction_row.extend(out2.cpu().numpy().flatten())
#     prediction_row.extend(out3.cpu().numpy().flatten())
#     prediction_row.extend(out4.cpu().numpy().flatten())
#     new_imgs_results_big_model.append(prediction_row)


# # Define column names for the new DataFrame
# column_names = ['filepath', 'label']
# big_model_pred_col_name_original = main_imgs_results_big_model.columns[2:].tolist()
# column_names += big_model_pred_col_name_original

# # Create the DataFrame
# new_imgs_results_big_model = pd.DataFrame(new_imgs_results_big_model, columns=column_names)
# new_imgs_results_big_model.head()

In [None]:
# new_imgs_results_big_model.shape

## All scorings from big model

In [None]:
main_imgs_results_big_model['img_type'] = "existing"
new_imgs_results_big_model['img_type'] = "new"
all_imgs_results_big_model = pd.concat([main_imgs_results_big_model, new_imgs_results_big_model], axis=0)
all_imgs_results_big_model.reset_index(drop=True, inplace=True)
all_imgs_results_big_model.head()
# all_imgs_results_big_model = main_imgs_results_big_model

In [None]:
new_imgs_results_big_model.shape

In [None]:
main_imgs_results_big_model.shape

In [None]:
all_imgs_results_big_model.tail()

In [None]:
all_imgs_results_big_model.shape

In [None]:
all_imgs_results_big_model.to_csv(os.path.join('../../NN_model/model_weights', CONFIGS['MODEL_PATH'], 'all_imgs_results_big_model.csv'), index=True)

In [None]:
# class MultiHeadResNet_SmallModel(nn.Module):
#     def __init__(self, num_classes_prdtype, num_classes_weight, num_classes_halal, num_classes_healthy):
#         super(MultiHeadResNet_SmallModel, self).__init__()
#         self.base_model = models.resnet18(pretrained=True)
#         num_ftrs = self.base_model.fc.in_features
#         self.base_model.fc = nn.Identity()

#         # Define custom fully connected layers for each prediction head
#         self.fc_prdtype = nn.Linear(num_ftrs, num_classes_prdtype)
#         self.fc_weight = nn.Linear(num_ftrs, num_classes_weight)
#         self.fc_halal = nn.Linear(num_ftrs, num_classes_halal)
#         self.fc_healthy = nn.Linear(num_ftrs, num_classes_healthy)

#     def forward(self, x):
#         x = self.base_model(x)
#         prdtype = self.fc_prdtype(x)
#         weight = self.fc_weight(x)
#         halal = self.fc_halal(x)
#         healthy = self.fc_healthy(x)
#         return prdtype, weight, halal, healthy

    
# # load label encoder 
# def load_label_encoder_small_model():
#     le_prdtype = pickle.loads(open("../small_model/output/le_prdtype.pickle", "rb").read())
#     le_weight = pickle.loads(open("../small_model/output/le_weight.pickle", "rb").read())
#     le_halal = pickle.loads(open("../small_model/output/le_halal.pickle", "rb").read())
#     le_healthy = pickle.loads(open("../small_model/output/le_healthy.pickle", "rb").read())
    
#     return le_prdtype, le_weight, le_halal, le_healthy

# le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_small_model()

# # Load the trained MultiHeadResNet model
# def load_model():
#     # Verify the number of classes for each label
#     num_classes_prdtype = len(le_prdtype.classes_)
#     num_classes_weight = len(le_weight.classes_)
#     num_classes_halal = len(le_halal.classes_)
#     num_classes_healthy = len(le_healthy.classes_)
#     # print(num_classes_prdtype)
#     # print(num_classes_healthy)

#     custom_resnet_model = MultiHeadResNet_SmallModel(
#         num_classes_prdtype=num_classes_prdtype,
#         num_classes_weight=num_classes_weight,
#         num_classes_halal=num_classes_halal,
#         num_classes_healthy=num_classes_healthy
#     )

#     model_path = '../small_model/output/multi_head_model.pth'
#     # print("test1")
#     if os.path.exists(model_path):
#         custom_resnet_model.load_state_dict(torch.load(model_path, map_location=CONFIGS['DEVICE']))
#     else:
#         raise FileNotFoundError(f"Model file not found: {model_path}")
#     # print("test2")
#     custom_resnet_model.to(CONFIGS['DEVICE'])
#     custom_resnet_model.eval()
#     return custom_resnet_model
 
# small_model = load_model()

In [None]:
# new_imgs_df = pd.read_csv("../small_model/new_imgs_list.csv")
# new_imgs_df.reset_index(drop=True, inplace=True)

# # ADHOC: change the new imgs to existing type
# new_imgs_df['label'] = 'AdultMilk_1-99g_Halal_NonHealthy'
# new_imgs_df['ProductType'] = 'AdultMilk'
# new_imgs_df['Weight'] = '1-99g'
# new_imgs_df['HalalStatus'] = 'Halal'
# new_imgs_df['HealthStatus'] = 'NonHealthy'

# new_imgs_df.head()

In [None]:
# new_imgs_results_small_model = pd.read_csv("../small_model/new_imgs_results_small_model.csv")
# new_imgs_results_small_model = new_imgs_results_small_model.loc[new_imgs_results_small_model.Filename.isin(new_imgs_df.filepath)]
# new_imgs_results_small_model.reset_index(drop=True, inplace=True)
# new_imgs_results_small_model.head()

In [None]:
# # Create a copy of the current column names to a list
# new_columns = new_imgs_results_small_model.columns.tolist()

# # Modify the first two elements
# new_columns[0] = 'filepath'
# new_columns[1] = 'label'

# # Assign the modified list of column names back to the DataFrame
# new_imgs_results_small_model.columns = new_columns
# new_imgs_results_small_model.head()

In [None]:
# # Check if any name from 'extracted_names' is not in 'df' and add it as a new column
# new_prdtype = list(set(all_imgs_results_big_model.columns) - set(new_imgs_results_small_model.columns))

# if len(new_prdtype)>0:
#     for col in new_prdtype:
#         new_imgs_results_small_model[col] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=new_imgs_results_small_model.shape[0])  # Initialize new columns

# new_imgs_results_small_model.head()  # Display the updated DataFrame for verificatio

In [None]:
# main_imgs_master_list = pd.read_csv("../master_list.csv")
# main_imgs_master_list.head()

In [None]:
# main_imgs_results_small_model = []  # List to store the results
# le_prdtype, le_weight, le_halal, le_healthy = load_label_encoder_small_model()

# for idx, row in main_imgs_master_list.iterrows():
#     image_path = "../all_images/" + row['filepath']
#     frame = cv2.imread(image_path)

#     # Preprocessing steps
#     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     frame = cv2.resize(frame, (CONFIGS['SMALL_MODEL_IMG_SIZE'], CONFIGS['SMALL_MODEL_IMG_SIZE']))
#     frame = frame.transpose((2, 0, 1))
#     frame = torch.from_numpy(frame).float()
#     frame = transforms_test(frame).unsqueeze(0).to(CONFIGS['DEVICE'])

#     # Perform prediction
#     with torch.no_grad():
#         out1, out2, out3, out4 = small_model(frame)
    
#     # Extract and store the results
#     prediction_row = [row['filepath'], row['label']]
#     prediction_row.extend(out1.cpu().numpy().flatten())
#     prediction_row.extend(out2.cpu().numpy().flatten())
#     prediction_row.extend(out3.cpu().numpy().flatten())
#     prediction_row.extend(out4.cpu().numpy().flatten())
#     main_imgs_results_small_model.append(prediction_row)


# # Define column names for the new DataFrame
# column_names = ['filepath', 'label']
# column_names += ['ProductType_' + name for name in le_prdtype.classes_]
# column_names += ['Weight_' + name for name in le_weight.classes_]
# column_names += ['HalalStatus_' + name for name in le_halal.classes_]
# column_names += ['HealthStatus_' + name for name in le_healthy.classes_]


# # Create the DataFrame
# main_imgs_results_small_model = pd.DataFrame(main_imgs_results_small_model, columns=column_names)
# main_imgs_results_small_model.head()

In [None]:
# # Check if any name from 'extracted_names' is not in 'df' and add it as a new column
# new_prdtype = list(set(all_imgs_results_big_model.columns) - set(main_imgs_results_small_model.columns))

# if len(new_prdtype)>0:
#     for col in new_prdtype:
#         main_imgs_results_small_model[col] = np.random.normal(loc=CONFIGS["MEAN_PRIOR"], scale=np.sqrt(0.1), size=main_imgs_results_small_model.shape[0])  # Initialize new columns

# main_imgs_results_small_model.head()  # Display the updated DataFrame for verificatio

In [None]:
# main_imgs_results_small_model['img_type'] = "existing"
# new_imgs_results_small_model['img_type'] = "new"
# all_imgs_results_small_model = pd.concat([main_imgs_results_small_model, new_imgs_results_small_model], axis=0)
# all_imgs_results_small_model.reset_index(drop=True, inplace=True)
# all_imgs_results_small_model.head()

In [None]:
# all_imgs_results_small_model.to_csv("all_imgs_results_small_model.csv", index=True)

# Scoring using updated Bayes model

In [None]:
# test images of the same product and different angles
test_img_paths = ["IMG_20230428_123708_jpg.rf.141ecd0cefaea75c0b7a5f281475dd6b.jpg",
                 "IMG_20230428_123659_jpg.rf.5e1b6c4caabe48cf36003cb4184ff380.jpg",
                 "IMG_20230428_123704_jpg.rf.5fc2415d06061ea102ef125a37bbc88c.jpg",
                 "IMG_20230428_123703_jpg.rf.6a9c54175f59238cdc83999cdee6dad4.jpg"]

In [None]:
from IPython.display import HTML, display

# Create HTML for images
html_images = ''.join([f'<img src="../../rshiny/www/all_images/{path}" width="150" />' for path in test_img_paths])

# Display images
display(HTML(html_images))

## Prepare average logits on product type prediction from big model

In [None]:
all_imgs_results_big_model.shape

In [None]:
big_model_input = all_imgs_results_big_model[all_imgs_results_big_model.filepath.isin(test_img_paths)]
big_model_input.reset_index(inplace=True, drop=True)
big_model_input.head()

In [None]:
big_model_input.shape

In [None]:
def get_max_col_name(row):
    return row.idxmax()

# individual predictions
big_model_input.filter(like="ProductType").apply(get_max_col_name, axis=1)

In [None]:
# predictions using average logits
big_model_input.filter(like="ProductType").mean().idxmax()

## Prepare predictions on product type and image quality from GPT

In [None]:
# choose the first image to call gpt
import base64
import requests
import sys
import pandas as pd
from openai import OpenAI

def encode_image(image_path):
    """Encodes an image to Base64."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

with open('/Users/liupeng/Desktop/Research/api.txt', 'r') as file:
    api_key = file.read()

image_path = os.path.join("../../rshiny/www/all_images/", test_img_paths[0])
base64_image = encode_image(image_path)

headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {api_key}"
    }

payload = {
          "model": "gpt-4-vision-preview",
          "messages": [
            {
              "role": "user",
              "content": [
                {
                  "type": "text",
                  "text": """ 
                  For this image, can you make a prediction for the following four labels? 
                  The product type is based on the appearance of the product, 
                  the weight is to recognize how heavy the product is by identify the weight information on the appearance,
                  the halal status is to recognize if the product is halal food or not, 
                  and the healthy status is to recognize if the product is healthy if it contains a red triangle shape based on Singapore standard.
                  Note that you can only choose one from the given options in the bracket for each label, even if you are not sure. 
                  also there is no need to add an extra note to your answer.
                    product type (
                        Babyfood
                        BabyMilkPowder
                        BeehoonVermicelli
                        BiscuitsCrackersCookies
                        Book
                        BreakfastCereals
                        CannedBakedBeans
                        CannedBeefOtherMeats
                        CannedBraisedPeanuts
                        CannedChicken
                        CannedFruits
                        CannedMushrooms
                        CannedPacketCreamersSweet
                        CannedPickles
                        CannedPorkLunchronMeat
                        CannedSardinesMackerel
                        CannedSoup
                        CannedTunaDace
                        CannedVegetarianFood
                        ChocolateMaltPowder
                        ChocolateSpread
                        CoffeePowder
                        CoffeeTeaDrink
                        CookingCreamMilk
                        CookingPastePowder
                        CornChip
                        DarkSoySauce
                        DriedBeans
                        DriedFruits
                        DriedMeatSeafood
                        DriedVegetables
                        FlavoredMilkDrink
                        Flour
                        FruitJuiceDrink
                        HerbsSpices
                        InstantMeals
                        InstantNoodlesMultipack
                        InstantNoodlesSingle
                        Jam
                        Kaya
                        KetchupChilliSauce
                        LightSoySauce
                        MaternalMilkPowder
                        MilkDrink
                        MilkPowder
                        Nuts
                        Oil
                        OtherBakingNeeds
                        OtherCannedBeansPeasNuts
                        OtherCannedSeafood
                        OtherCannedVegetables
                        OtherDriedFood
                        OtherHotBeveragesPowder
                        OtherNoodles
                        OtherSauceDressing
                        OtherSpreads
                        Pasta
                        PastaSauce
                        PeanutButter
                        Potatochips
                        PotatoSticks
                        RiceBrownOthers
                        RiceWhite
                        RolledOatsInstantOatmeal
                        Salt
                        SoftDrinksOtherReadyToDrink
                        SoupStock
                        Sugar
                        SweetsChocolatesOthers
                        TeaPowderLeaves
                        WetWiper
                    ),
                    weight ('400-499g', '700-799g', '500-599g', '200-299g', '100-199g',
                       '1-99g', '300-399g', '600-699g', '800-899g', '1000-1999g',
                       '900-999g', '3000-3999g'
                    ),
                    halal status ('NonHalal', 'Halal'),
                    healthy status ('NonHealthy', 'Healthy'),
                    
                    Also, provide the following assessment
                    image reflection (High, Medium, Low)
                    image clarity (High, Medium, Low)
                    prediction confidence for the product type (High, Medium, Low)
                    prediction confidence for the weight (High, Medium, Low)
                    prediction confidence for the halal status (High, Medium, Low)
                    prediction confidence for the healthy status (High, Medium, Low)

                    format your answer in json format so that it could be easily converted to dataframe
                    """
                },
                {
                  "type": "image_url",
                  "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                  }
                }
              ]
            }
          ],
          "max_tokens": 300
        }


response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
content = response.json()['choices'][0]['message']['content']

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

# Extracting the message content (the table with predictions)
content = response.json()['choices'][0]['message']['content']

# Parsing the content to extract prediction values
# Splitting the content string into lines and then parsing each line
lines = content.strip().split('\n')[2:]  # Skipping the header

# Parsing the strings to extract the relevant information
data_parsed = [line.replace('"', '').strip() for line in lines if ':' in line]
data_dict = {item.split(":")[0].strip(): item.split(":")[1].strip().strip(',') for item in data_parsed}

# Converting the dictionary into a DataFrame
df_from_strings = pd.DataFrame([data_dict])


In [None]:
df_from_strings

In [None]:
col_names = ['product_type', 'weight', 'halal', 'health', 'image_reflection', 
                  'image_clarity', 'product_type_confidence', 'weight_confidence',
                  'halal_confidence', 'health_confidence']
parsed_data = df_from_strings
# Normalizing column names for consistency across all DataFrames
normalized_dfs = []
tmp_valid_paths = []


df = df_from_strings
if df.shape[1] != 0:
    if df.shape[1] != 10:
        print("--\n")
        print(i)
        print(df.shape[1])
        if df.columns[-1]=="Note":
            df.drop(columns=['Note'], inplace=True)
            df.columns = col_names
            normalized_dfs.append(df)
            tmp_valid_paths.append(tmp_paths[i])
        elif df.shape[1] == 11:
            df = df.iloc[:,:-1]
            df.columns = col_names
    else:
        # Renaming columns to have consistent names across all DataFrames
        df.columns = col_names

# Combining all DataFrames into a single DataFrame
gpt_pred_df = df

cols = gpt_pred_df.columns.tolist()
gpt_pred_df

In [None]:
gpt_pred_label = gpt_pred_df['product_type'] + "_" + gpt_pred_df["weight"] + "_" + gpt_pred_df["halal"] + "_" + gpt_pred_df["health"]
gpt_pred_label.values[0]

In [None]:
gpt_pred_label

In [None]:
gpt_pred_confidence = "Product_" + gpt_pred_df['product_type_confidence'] + "-Weight_" + gpt_pred_df['weight_confidence'] + "-Halal_" + gpt_pred_df['halal_confidence'] + "-Health_" + gpt_pred_df['health_confidence']
gpt_pred_confidence.values[0]

## Call R script to perform scoring

In [None]:
import subprocess
import json
import os
import pandas as pd

# Set the R_HOME environment variable to the R home directory used by RStudio
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Resources'
os.environ['PATH'] = '/Library/Frameworks/R.framework/Resources/bin' + os.pathsep + os.environ['PATH']


In [None]:
# Your DataFrame
# df = pd.DataFrame({
#     'A': [1, 2, 3],
#     'B': [4, 5, 6]
# })

# Serialize DataFrame to JSON
# df_json = df.to_json()

# Pass the JSON directly as an argument (ensure it's not too large)
# input1 = df_json
input1 = big_model_input.to_json(orient='records')
input2 = gpt_pred_df.to_json(orient='records')

# The command (ensure correct paths)
command = ['Rscript', 'bayes_model_real_time.R', input1, input2]

# Run the command and capture the output
result = subprocess.run(command, capture_output=True, text=True)

# Check if the command was executed successfully
if result.returncode == 0:
    # Print stdout for debug messages and output
    # print("STDOUT from R:\n", result.stdout)
    # Parse the JSON output from the R script if needed
    try:
        output = json.loads(result.stdout)
        print("Output from R:", output)
    except json.JSONDecodeError:
        print("Failed to parse JSON output.")
else:
    # Print stderr for errors
    print("Error running R script:\n", result.stderr)


In [None]:
output['pred'][0]