In [1]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torchvision.io import decode_image
from imageio.v2 import imread
import pandas as pd
import json
import requests as r
import os


class PillDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, target_transform=None):
        self.img_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image_path = (self.img_paths.iloc[idx])
        # image = decode_image(image_path)
        # image = Image.open(image_path)
        image = imread(image_path)
        label = self.labels.iloc[idx]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [3]:
def zeroPadFront(x, desiredLength):
    x = str(x)
    while len(x) < desiredLength:
        x = '0' + x
    return x

all_labels = pd.read_csv("./datasets/ePillID_data/all_labels.csv")
codes = all_labels.apply(lambda x: zeroPadFront(x['label_code_id'], 4) + '-' + zeroPadFront(x['prod_code_id'], 4), axis=1)
all_labels['NDC'] = codes

numeric_labels = all_labels['NDC'].map(lambda x: codes.index[codes == x][0])
image_paths = all_labels['image_path'].map(lambda x: './datasets/ePillID_data/classification_data/' + x)
segmented_mask = all_labels['image_path'].str.startswith('segmented_nih_pills_224')

epillid = PillDataset(image_paths[segmented_mask], numeric_labels[segmented_mask])


In [4]:
from sklearn.model_selection import train_test_split

segmented_image_paths = image_paths[segmented_mask]
segmented_numeric_labels = numeric_labels[segmented_mask]

x_train, x_test, y_train, y_test = train_test_split(segmented_image_paths, segmented_numeric_labels, test_size=.2, random_state=42)

train_data = PillDataset(x_train, y_train)
test_data = PillDataset(x_test, y_test)


In [5]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [None]:
from torchvision.models import resnet50, ResNet50_Weights

model = resnet50(weights=ResNet50_Weights.DEFAULT)


In [None]:
import numpy as np

imgs = x_train.map(imread).reset_index()
imgs.rename(columns={'image_path':'image'}, inplace=True)

im = np.stack(imgs['image'])
n,h,w,c = im.shape
mean = np.mean(im, axis=(0,1,2))
torch.tensor((im - mean).reshape(n, h*w,3))