In [92]:
# lets first get our data imports and actual data we will work with
# NO PYTORCH 
from datasets import load_dataset

from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, Union
import tinygrad

In [93]:
# let's get teh dataset first, we will not use dataloaders so we may need to build one from scratch? this should be fun who knows?
name = "fashion_mnist"
dsd = load_dataset(name)
dsd

Found cached dataset fashion_mnist (/Users/diegomedina-bernal/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48)
100%|██████████| 2/2 [00:00<00:00, 63.26it/s]


DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

In [4]:
# just split for now, we know this, and x, y so we dont keep writing the names
train, test = dsd["train"], dsd["test"]
x, y = 'image', 'label'

In [29]:
# imports for dataloader to work
from PIL import Image
try: import accimage
except ImportError: accimage = None
import numpy as np

In [77]:
from tinygrad.tensor import Tensor
from tinygrad.helpers import dtypes

In [30]:
def is_pil_image(img: Any) -> bool:
    if accimage is not None: return isinstance(img, (Image.Image, accimage.Image))
    else: return isinstance(img, Image.Image)
def is_numpy(img: Any) -> bool: return isinstance(img, np.ndarray)
def is_numpy_img(img: Any) -> bool: return is_numpy(img) and img.ndim in {2, 3}

def get_image_num_channels(img: Tensor) -> int:
    if img.ndim == 2:
        return 1
    elif img.ndim > 2:
        return img.shape[-3]

In [32]:
tst = train[0]['image']

In [62]:
type(tst)

PIL.PngImagePlugin.PngImageFile

In [89]:
tst_np = np.array(tst)
type(tst_np), tst_np.shape

(numpy.ndarray, (28, 28))

In [91]:
# get number of channels from pil image
"""
I decided to answer my own question (although I basically will sum up the comment of @cryptonome).

Well, when it comes to PIL the options as I get it are:

image.mode: returns a str containing the mode of the data read. Typical values are "RGB" and "L" for RGB and gray-scale images respectively. Modes are presented here.
im2.info: which returns a dict containing various information about the image. This is image format specific. For jpg images for example it (possibly) contains fields with keys: dpi, jfif, jfif_density, exif etc. More information about jpg images can be found here.
image.getbands(): which returns a tuple (even a 1 element one) containing all different channel present in the data. For a typical RGB image this would be ('R', 'G', 'B') and for a typical gray-scale image would be ('L',).
So, judging from the above the more concise method in my opinion would be to compare image.mode against L and RGB strings to find if an image is gray-scale or not or if the number of channels (as in this question) is the main question then a simple len(image.getbands()) would do the job.

Normally len(image.mode) will coincide with len(image.getbands()) and could be used in its place but since there is at least one mode YCbCr which contains 5 characters but only 3 channels (3x8-bit pixels, color video format) it's safer to use len(image.getbands()) I guess
"""
tst_pil = Image.fromarray(tst_np

('L',)

In [70]:
tst_np = tst_np[:, :, None] # cast
tst_np = tst_np.transpose((2, 0, 1))
tst_np.shape

(1, 28, 28)

In [72]:
Tensor(tst_np).shape

(1, 28, 28)

In [48]:
x = Tensor.eye(3, requires_grad=True)
y = Tensor([[2.0,0,-2.0]], requires_grad=True)

In [76]:
x.cast(dtypes.float16)

<Tensor <LB (3, 3) dtypes.half op:UnaryOps.CAST st:ShapeTracker(shape=(3, 3), views=[View((3, 3), (3, 1), 0, None)])> on GPU with grad None>

In [50]:
x, y

(<Tensor <LB (3, 3) dtypes.float op:MovementOps.RESHAPE st:ShapeTracker(shape=(3, 3), views=[View((3, 4), (0, 0), 0, ((0, 3), (0, 1))), View((3, 3), (3, 1), 0, None)])> on GPU with grad None>,
 <Tensor <LB (1, 3) dtypes.float op:LoadOps.FROMCPU st:ShapeTracker(shape=(1, 3), views=[View((1, 3), (0, 1), 0, None)])> on GPU with grad None>)

In [None]:
def to_tensor(pic) -> Tensor:
    """Convert a ``PIL Image`` or ``numpy.ndarray`` to Tinygrad tensor.
    thats right a tinygrad tensor not those damn pytorch ones
    """
    if not (is_pil_image(pic) or is_numpy(pic)):
        raise TypeError(f"pic should be PIL Image or ndarray. Got {type(pic)}")

    if is_numpy(pic) and not is_numpy_img(pic):
        raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")

    # TOOD: need a way to grab the default dtype from the tensor module
    # currently there is no way for this atm, lets change it 
    default_float_dtype = dtypes.float32

    if isinstance(pic, np.ndarray):
        if pic.ndim == 2: pic = pic[:, :, None] # add extra channel
        img = Tensor(pic.transpose((2, 0, 1)), requires_grad=False)
        return img

    if accimage is not None and isinstance(pic, accimage.Image):
        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
        pic.copyto(nppic)
        return Tensor(nppic, requires_grad=False).cast(dtype=default_float_dtype)

    # handle PIL Image
    mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32}
    img = Tensor(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True), requires_grad=False)

    if pic.mode == "1": img = 255 * img
    img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic))
    # put it from HWC to CHW format
    img = img.permute((2, 0, 1)).contiguous()
    if isinstance(img, torch.ByteTensor):
        return img.to(dtype=default_float_dtype).div(255)
    else:
        return img

In [10]:
## Okay we will slightly cheat, it seems that tinygrad doesnt have dataloaders or datasets so lets use pytorch ones for now and maybe we build our own? maybe? 
import torchvision.transforms.functional as TF

ModuleNotFoundError: No module named 'torchvision'

In [94]:
t = [1, 2]
t.insert(0, 0)

In [96]:
t = [0] + [1, 2]

In [97]:
t

[0, 1, 2]