In [3]:
from pathlib import Path
from urllib.request import urlretrieve
import zipfile
#from scipy.misc import imread
from scipy.ndimage.interpolation import zoom#
import numpy as np
from matplotlib.pyplot import imread

def fetch_cropped_yaleb(data_folder, zooming=0.5, max_n_subjects=None):
    """Returns a dictionary of paths
    
    Parameters
    ----------
    data_folder: string
    zooming: float, optional, default is 0.5
        factor by which to resize the images
    max_n_subjects: {None, int}, optional, default is None
        if not None, only the first max_n_subjects are returned
    
    Returns
    -------
    dict: {
        subjects_1: {'images': [image_1, ... image_N],
               'ambient': image_ambient,
        }
    }
    
    images are stored as numpy arrays
    """
    url = 'http://vision.ucsd.edu/extyaleb/CroppedYaleBZip/CroppedYale.zip'
    yaleb_path = Path(data_folder).joinpath('cropped_yaleb')
    
    if not yaleb_path.joinpath('CroppedYale').exists():
        yaleb_path.mkdir(parents=True)
    
    # If not already unzip, do it
    if not list(yaleb_path.iterdir()):
        zip_path = yaleb_path.joinpath('yaleb.zip')
        
        # If zip not already downloaded, download it
        if not zip_path.exists():
            urlretrieve(url, zip_path.as_posix())
        
        zfile = zipfile.ZipFile(zip_path.as_posix())
        zfile.extractall(path=yaleb_path.as_posix())

    yaleb = {}
    for folder_path in yaleb_path.joinpath('CroppedYale').iterdir():
        if max_n_subjects is not None and len(yaleb) > max_n_subjects:
            return yaleb
        
        if not folder_path.is_dir():
            continue
            
        video_name = folder_path.name
        paths = sorted(list(folder_path.glob('*.pgm')))
        images = []
        for path in paths:
            if 'Ambient' in path.name:
                ambient = imread(path.as_posix())
            else:
                images.append(zoom(imread(path.as_posix()), zooming)[None, ...])
                
        data = {'images':np.concatenate(images),
        'ambient':ambient}
        yaleb[video_name] = data

    return yaleb

In [7]:
#dataset_path = path for url download #C:\Users\015159428\Downloads\CroppedYale.zip
#data= fetch_cropped_yaleb(dataset_path, zooming = 0..3 )

dataset_path = '\Downloads\CroppedYale.zip'

data = fetch_cropped_yaleb(dataset_path, zooming=0.3, max_n_subjects=5)

In [8]:
data

{'yaleB01': {'images': array([[[ 80,  91,  95, ...,  77,  69,  71],
          [ 79,  90,  89, ...,  80,  68,  63],
          [ 79,  87,  96, ...,  78,  69,  56],
          ...,
          [ 20,  29,  32, ...,  42,  36,  34],
          [ 20,  20,  20, ...,  37,  23,  14],
          [ 12,  14,  18, ...,  24,  13,  11]],
  
         [[ 94, 105, 109, ...,  88,  80,  79],
          [ 87, 102, 105, ...,  91,  81,  70],
          [ 93,  96, 111, ...,  87,  79,  69],
          ...,
          [  8,  10,  17, ...,  23,  13,  14],
          [  7,  10,  11, ...,  14,  12,   8],
          [  4,  10,   8, ...,  14,   6,   8]],
  
         [[ 59,  89, 109, ...,  92,  73,  58],
          [ 75,  91, 110, ..., 100,  82,  88],
          [ 90,  99, 117, ...,  92,  78,  81],
          ...,
          [  4,   4,   5, ...,   8,   8,   7],
          [  4,   4,   3, ...,   8,   4,   6],
          [  3,   5,   4, ...,   7,   4,   7]],
  
         ...,
  
         [[  4,   4,   5, ..., 133, 141, 111],
          [ 