In [None]:
import intake
import pandas as pd
import hvplot.pandas

#### Guess reader from existing code

In [None]:
url = "s3://mymdtemp/intake_1.csv"

In [None]:
df = pd.read_csv(url, storage_options={"anon": True}, usecols=[1, 2])

In [None]:
reader = intake.reader_from_call(_i)

In [None]:
reader

In [None]:
reader.kwargs

#### Or guess from the URL alone

In [None]:
# uses URL alone, but can also match on magic bytes
intake.datatypes.recommend(url)

In [None]:
data = intake.readers.datatypes.CSV(url, storage_options={"anon": True})

#### "What can read this?"

In [None]:
data.possible_outputs

In [None]:
data.possible_readers

In [None]:
# same reader as original
# reader = data.to_reader("pandas:DataFrame")
reader = intake.readers.readers.PandasCSV(data)

```python
class PandasCSV(Pandas):
    implements = {datatypes.CSV}
    func = "pandas:read_csv"
    url_arg = "filepath_or_buffer"

    def discover(self, **kw):
        kw["nrows"] = 10
        kw.pop("skipfooter", None)
        kw.pop("chunksize", None)
        return self.read(**kw)
```

#### Reader API

In [None]:
reader.read()

In [None]:
print(reader.doc())

In [None]:
# known transforms and what they make
reader.transform

In [None]:
# but have access to full DataFrame API
dir(reader)

In [None]:
# or "pd" namespace (useful for some packages)
reader.pd

#### So lets make a catalog and a pipeline using pandas syntax

In [None]:
cat = intake.entry.Catalog()
cat["tute"] = reader
cat["capitals"] = reader.a.str.capitalize()
cat["inverted"] = reader.sort_values("b", ascending=False)
cat["multi"] = cat.tute.assign(c=cat.capitals)  # <- uses multiple readers

In [None]:
reader.a.str.capitalize()

In [None]:
# what gets stored in the catalog entry?
cat.entries["multi"].kwargs

In [None]:
cat

In [None]:
cat.tute.read()

In [None]:
cat.data  # just one data item

#### To and from catalog file, which you can put anywhere

In [None]:
cat.to_yaml_file("intake_1.yaml")

In [None]:
# a "shared" one I prepared for everyone
cat = intake.from_yaml_file("s3://mymdtemp/intake_1.yaml", anon=True)

In [None]:
# yes, you have <tab> completion
cat.tute

In [None]:
cat.inverted.read()

#### And now you can go about your work; but some convenience functions might still be useful.

In [None]:
# add arguments to make a reader you can persist
cat.inverted.ToHvPlot(explorer=True).read()

In [None]:
cat.inverted.ToMatplotlib.read()

In [None]:
# you can even have Intake guess the whole pipeline
intake.auto_pipeline(data, "PNG", avoid="Geo")

#### But pandas was not the only engine that can work on this data. We can play with the API or make more readers to persist in the catalog.

In [None]:
data

In [None]:
data.to_reader("dask").read()

In [None]:
data.to_reader("ray").read()

In [None]:
# dask-on-ray!
data.to_reader("dask").DaskToRay.read()