In [92]:
import glob
import tensorflow as tf
from pathlib import Path
import math

def datasetImageFolder(start_path,split=None):
    filePath = getFilePaths(start_path,["jpg","png","jpeg"])
    splits =  list(set(map(lambda x: x[x.find('\\',len(start_path)-1 )+1:x.find('\\',x.find('\\',len(start_path)-1 )+1) ] , filePath)))
    
    allSplits = []
    for i in range(len(splits)):
        rs = list(map(lambda x : x if splits[i] in x else None, filePath))
        result = [x for x in rs if x]
        allSplits.append(result)
    
    datasets = dict()
    for i in range(len(allSplits)):
        images  = tf.data.Dataset.from_tensor_slices(allSplits[i])
        datasets[splits[i]] = images
    
    return datasets

def getFilePaths(start_path,extensions= None):
    paths = []
    if(extensions is not None):
        for extension in extensions:
            paths.extend(list(Path(start_path).rglob("*." + extension)))
    
    else:
        path.extend(list(Path(start_path).rglob()))
    
    return [str(x) for x in paths if x.is_file()]
    
    
def getLabelFromFilePathTF(file_path):
    splits = tf.strings.split(file_path,'\\')
    label = splits[len(splits)-2]
    return label

def getLabelFromFilePath(file_path):
    lastIndex = file_path.rfind('\\') -1
    firstIndex = file_path.rfind('\\',0,lastIndex)+1
    label =file_path[firstIndex:lastIndex]
    
    return label

def getAllLabels(start_path,generateUndefined=True):
    paths = getFilePaths(start_path,["jpg","png","jpeg"])
    distinctLabels = []
    if (generateUndefined):
        distinctLabels.append('Undefined')
    
    distinctLabels.extend(list(set(map(lambda x: getLabelFromFilePath(x) , paths))))
        
    return distinctLabels

def generateOneHotEncodeDict(labels):
    indices = [x for x in range(len(labels))]
    one_hots = tf.one_hot(indices,len(labels))
    
    one_hot_dict = dict()
    for i in range(len(one_hots)):
        one_hot_dict[labels[i]] = one_hots[i]
        
    return one_hot_dict
    
def generateBinaryEncoding(labels):
    indices = [x for x in range(len(labels))]
    n_bits = int(math.log(len(labels),2))
    binaryEncode = []
    for index in indices:
        encoding = []
        for i in range(n_bits+1):
            encoding.append( (index >> i) & 1)
        encoding.reverse()
        binaryEncode.append(encoding)
    
    binaryEncodeTensors = [tf.convert_to_tensor(x,dtype=tf.uint8) for x in binaryEncode]
    
    binary_encode_dict = dict()
    for i in range(len(labels)):
        binary_encode_dict[labels[i]] = binaryEncodeTensors[i]
        
    return binary_encode_dict


    

In [111]:
ds = datasetImageFolder("Vgg\\data\\")

#print(datasetImageFolder("Vgg\\data\\"))
#print(generateBinaryEncoding(getAllLabels("Vgg\\data\\",False)))
dsTrain = ds['train']
dsTrain.map(lambda x  : tf.py_function(getLabelFromFilePathTF,[x],[tf.string,tf.string]))

dsTrain = dsTrain.shuffle(15000)

for a in dsTrain.take(35):
    print(a.numpy().decode("utf-8"))

Vgg\data\train\BLACK-THROATED SPARROW\125.jpg
Vgg\data\train\CALIFORNIA GULL\079.jpg
Vgg\data\train\AFRICAN FIREFINCH\057.jpg
Vgg\data\train\BALD EAGLE\137.jpg
Vgg\data\train\AMERICAN KESTREL\048.jpg
Vgg\data\train\BANANAQUIT\049.jpg
Vgg\data\train\CAPE MAY WARBLER\089.jpg
Vgg\data\train\CURL CRESTED ARACURI\126.jpg
Vgg\data\train\ELLIOTS  PHEASANT\113.jpg
Vgg\data\train\COMMON FIRECREST\038.jpg
Vgg\data\train\EASTERN TOWEE\042.jpg
Vgg\data\train\CAPUCHINBIRD\100.jpg
Vgg\data\train\CROWNED PIGEON\081.jpg
Vgg\data\train\EASTERN TOWEE\055.jpg
Vgg\data\train\CEDAR WAXWING\021.jpg
Vgg\data\train\CEDAR WAXWING\172.jpg
Vgg\data\train\CRESTED NUTHATCH\121.jpg
Vgg\data\train\BLUE GROUSE\133.jpg
Vgg\data\train\EASTERN ROSELLA\010.jpg
Vgg\data\train\BLUE HERON\063.jpg
Vgg\data\train\EMPEROR PENGUIN\082.jpg
Vgg\data\train\DOUBLE BARRED FINCH\039.jpg
Vgg\data\train\COMMON GRACKLE\160.jpg
Vgg\data\train\ALEXANDRINE PARAKEET\050.jpg
Vgg\data\train\AFRICAN FIREFINCH\019.jpg
Vgg\data\train\GILDED FLIC

In [None]:
100001101