# Dataset Base and Meta

> Base Classes for Datasets

In [None]:
#| default_exp abc.dfds.base

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass, field

from beartype.typing import (
    Tuple, Union, List, Any, Optional,  Type
)

import numpy as np, pandas as pd

from torch.utils.data import Dataset

In [None]:
#| export
from litds.types import IterLike

from litds.abc.dfds.meta import MetaDataFrameDataset
from litds.abc.dfds.mixs import DataFrameArgsMixin, DataFrameKWArgsMixins
from litds.abc.idxs import (
    DataFrameDatasetLocIndexer,
    DataFrameDatasetILocIndexer, 
    DataFrameDatasetCLocIndexer
)

## Base DFSet

In [None]:
#| export
@dataclass
class BaseDataFrameDataset(Dataset, DataFrameArgsMixin, DataFrameKWArgsMixins, metaclass=MetaDataFrameDataset):
          
    def __post_init__(self):
        super().__post_init__()

        self.loc  = DataFrameDatasetLocIndexer(self)
        self.iloc = DataFrameDatasetILocIndexer(self)
        self.cloc = DataFrameDatasetCLocIndexer(self)

    def __len__(self):
        return len(self.df)
    
    def __iter__(self):
        for i in range(len(self)):
            yield self[i]
    
    def check(self, attr: Any, default: Optional[Any]=None) -> Any:
        return getattr(self, attr, default)

    def __len__(self):
        return len(self.df)
    
    def __iter__(self):
        for i in range(len(self)):
            yield self[i]

    def __getitem__(self, idx: Union[int, slice, IterLike]) -> List:
        return self.df.iloc[idx]

    def getall(self):
        unique_idxs = self.cloc.cindex.categories
        return self.__getitem__(np.arange(len(unique_idxs)))

#### Example

In [None]:
#| eval: False
df = pd.DataFrame(
    np.random.randint(0, 10, (10, 3)), 
    index=np.random.choice('a b c'.split(), 10),
    columns='x y z'.split()
)
df.head()

Unnamed: 0,x,y,z
b,7,5,7
a,0,9,0
c,5,6,9
a,1,3,9
b,8,2,4


In [None]:
#| eval: False
class DataFrameDataSetTest(BaseDataFrameDataset):
    pass

In [None]:
#| eval: False
ds = DataFrameDataSetTest(df)

In [None]:
#| eval: False
ds.iloc[0]

x    7
y    5
z    7
Name: b, dtype: int64

In [None]:
#| eval: False
ds.cloc[0]

Unnamed: 0,x,y,z
a,0,9,0
a,1,3,9


In [None]:
#| eval: False
ds.cloc.cindex.get_indexer_for(['a'])

array([1, 3])

In [None]:
#| eval: False
ds.cloc.icat(0)

['a']

In [None]:
#| eval: False
ds.loc['a']

Unnamed: 0,x,y,z
a,0,9,0
a,1,3,9


In [None]:
#| eval: False
ds.iloc[:2]

Unnamed: 0,x,y,z
b,7,5,7
a,0,9,0


In [None]:
#| eval: False
ds.cloc.cindex.categories[:2]

Index(['a', 'b'], dtype='object')

In [None]:
#| eval: False
ds.loc[['a', 'b']]

Unnamed: 0,x,y,z
b,7,5,7
a,0,9,0
a,1,3,9
b,8,2,4
b,0,9,2
b,2,3,4


In [None]:
#| eval: False
ds.loc[pd.Index(['a', 'b'])]

Unnamed: 0,x,y,z
b,7,5,7
a,0,9,0
a,1,3,9
b,8,2,4
b,0,9,2
b,2,3,4


In [None]:
#| eval: False
ds.loc[['a', 'b']]

Unnamed: 0,x,y,z
b,7,5,7
a,0,9,0
a,1,3,9
b,8,2,4
b,0,9,2
b,2,3,4


In [None]:
#| eval: False
ds.df.iloc[ds.cloc.cindex.get_indexer_for(['a', 'b'])]

Unnamed: 0,x,y,z
a,0,9,0
a,1,3,9
b,7,5,7
b,8,2,4
b,0,9,2
b,2,3,4


In [None]:
#| eval: False
ds.cloc[pd.Index(['a', 'b'])]

Unnamed: 0,x,y,z
a,0,9,0
a,1,3,9
b,7,5,7
b,8,2,4
b,0,9,2
b,2,3,4


In [None]:
#| eval: False
ds.cloc[:2]

Unnamed: 0,x,y,z
a,0,9,0
a,1,3,9
b,7,5,7
b,8,2,4
b,0,9,2
b,2,3,4


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()