In [None]:
import pandas as pd
import numpy as np
import os
from concurrent.futures import ThreadPoolExecutor
from glob import iglob
import matplotlib.pyplot as plt

In [None]:
class img_to_df:
    
    def __init__(self,images_path,split_ratio):
        self.path = images_path
        self.unique_labels = os.listdir(images_path)
        self.train_cv_split = split_ratio
        
    #in our example, self.path = "./DevnagariHandwrittenCharacterDataset/DevnagariHandwrittenCharacterDataset/Train"
        
    def list_of_images(self,folder):
        return iglob(os.path.join(self.path,folder)+"/*.png")
    
    #os.path.join will result in the following output:
    #"./DevnagariHandwrittenCharacterDataset/DevnagariHandwrittenCharacterDataset/Train/character_10_yna/*.png"
    
    def read_image(self,folder_image):
        image = plt.imread(folder_image)
        return image.reshape(image.shape[0]*image.shape[1],)
    
    
    def stacking_row_vectors(self,folder):
        images_list_generator = self.list_of_images(folder)    
        with ThreadPoolExecutor(max_workers=16) as p:
            lazy_loop = p.map(self.read_image,images_list_generator)
        return lazy_loop
    
    
    def generate_df(self):
        train_data = list()
        cv_data = list()
        
        for folder in self.unique_labels:
            dir_images_generator = self.stacking_row_vectors(folder)
            
            train_folder_matrix = list()
            cv_folder_matrix = list()
            
            for i in range(int(self.train_cv_split[0]*1700)):
                try:
                    train_folder_matrix.append(next(dir_images_generator))
                except StopIteration:
                    break
                
            for i in range(int(self.train_cv_split[1]*1700)):
                try:
                    cv_folder_matrix.append(next(dir_images_generator))
                except StopIteration:
                    break
                
            train_folder_matrix = np.array(train_folder_matrix)
            cv_folder_matrix = np.array(cv_folder_matrix)
            
            train_data.append(train_folder_matrix)
            cv_data.append(cv_folder_matrix)
            
            #Now, your assignment is to write something else inspite of the above two statements such that
            #we are putting very less load on our main memory
            
        train_data = np.concatenate(train_data,axis=0)
        cv_data = np.concatenate(cv_data,axis=0)
        train_labels = list()
        cv_labels = list()
        
        for folder_name in self.unique_labels:
            train_labels = train_labels + [folder_name]*train_folder_matrix.shape[0]
            cv_labels = cv_labels + [folder_name]*cv_folder_matrix.shape[0]
            
        train_data = pd.DataFrame(data=train_data)
        train_data['label'] = train_labels
        cv_data = pd.DataFrame(data=cv_data)
        cv_data['label'] = cv_labels
        return train_data,cv_data

In [None]:
obj = img_to_df("./DevanagariHandwrittenCharacterDataset/DevanagariHandwrittenCharacterDataset/Train",(0.8,0.2))

In [None]:
training_data,cv_data = obj.generate_df()

In [None]:
def image_fetch():
    
    gen = iglob("./DevanagariHandwrittenCharacterDataset/DevanagariHandwrittenCharacterDataset/Train/character_10_yna/*.png")
    for img in gen:
        yield img

In [None]:
g = image_fetch()

In [None]:
g

In [None]:
next(g)

In [4]:
def fetch_numbers():
    for number in range(100000000000000000000000000000000000000000000000000000000000000000000000000000000):
        yield number

In [5]:
f = fetch_numbers()

In [7]:
next(f)

0

In [8]:
next(f)

1

In [9]:
next(f)

2