In [1]:
#| default_exp core

In [2]:
#| hide
#| export
from __future__ import annotations
import duckdb
from duckdb import DuckDBPyConnection, DuckDBPyRelation
from typing import List, Dict, Optional, Union, Any, Tuple, Set, Literal
from fastcore.all import store_attr, patch, L
import numpy as np
import pandas as pd
from dataclasses import field, make_dataclass
from fastcore.xtras import hl_md, dataclass_src
from functools import wraps, partial
from pathlib import Path
from IPython.display import Markdown


# Source

>  Wrapping DuckDB Python API

When you `import duckdb`, there are two main concepts that you have to handle:


A *DuckDBPyConnection* represents a connection to a DuckDB database in a file or memory

DuckDB API has no concept of a Database, only of a connection that may envolve different attached databases (or catalogs) and schemas.

A Catalog is just a holder for schemas, and schemas hold catalog entries, like tables, views, functions, types, etc..

We will change this behaviour in a bit...

A DuckDBPyRelation represents a query. It is a table-like object that will be lazy executed and can be queried further. 

Once it's executed then yes, it contains the result set.

But when further projections are made on it, that result set is not used, the projections are just done on top of the original query as a subquery

`duckdb.table('tbl').sql("select a")`


Really becomes, in essence:
`select a from (select * from tbl)`


In [3]:
db = duckdb.connect()
db.sql(f"FROM duckdb_tables()").select('table_name')

┌────────────┐
│ table_name │
│  varchar   │
├────────────┤
│   0 rows   │
└────────────┘

In [4]:
type(db.sql(f"SELECT table_name FROM duckdb_tables()"))

duckdb.duckdb.DuckDBPyRelation

In [5]:
db.close()

## Improving Connection

We want the developer to understand the connection as a database.

```python

import duckdb

conn = duckdb.connect()

```

becomes


```python

from fastduck import database
db = database()

```
By acessing the same Python API using `fastduck`, the developer shall get some niceties.


In [6]:
#| export
@wraps(duckdb.connect)
def database(*args, **kwargs):
    db = duckdb.connect(*args, **kwargs)
    return db


Let's start by simplifying the access to some information on the current catalog and schema in the connection.

In [7]:
db = database('../data/chinook.duckdb')

In [8]:
#| export
def _current(self: DuckDBPyConnection): return self.sql('select current_catalog, current_schema').fetchone()
@patch(as_prop=True)
def catalog(self: DuckDBPyConnection): return _current(self)[0]

@patch(as_prop=True)
def schema(self: DuckDBPyConnection): return _current(self)[1]

@patch(as_prop=True) # just the name part in the alias
def name(self:DuckDBPyRelation): return self.alias.split('.')[-1]

@patch
def __getitem__(self:DuckDBPyRelation, idxs) -> DuckDBPyRelation: # selecting by passing a list of column names
    return self.select(*idxs) if isinstance(idxs, Union[List, Set, Tuple]) else self.select(idxs)
@patch 
def to_recs(self:DuckDBPyRelation) -> List[Dict[str, Any]]:
    '''The relation as a list of records'''
    return self.df().to_dict(orient='records')
@patch 
def to_list(self:DuckDBPyRelation) -> List[List]:
    '''The relation as a list'''
    return [list(r.values()) if len(r.values())>1 else list(r.values())[0] for r in self.to_recs() ]
@patch 
def q(self:DuckDBPyConnection, *args, **kwargs) -> List[Dict[str, Any]]:
    '''Run a query and return the result as a list of records'''
    return self.sql(*args, **kwargs).to_recs()



In [9]:
album = db.sql("select * from Album")
album['AlbumId', 'Title'].limit(5)

┌─────────┬───────────────────────────────────────┐
│ AlbumId │                 Title                 │
│  int32  │                varchar                │
├─────────┼───────────────────────────────────────┤
│       1 │ For Those About To Rock We Salute You │
│       2 │ Balls to the Wall                     │
│       3 │ Restless and Wild                     │
│       4 │ Let There Be Rock                     │
│       5 │ Big Ones                              │
└─────────┴───────────────────────────────────────┘

In [10]:
db.table('Album')['AlbumId', 'Title'].limit(2).to_recs()

[{'AlbumId': 1, 'Title': 'For Those About To Rock We Salute You'},
 {'AlbumId': 2, 'Title': 'Balls to the Wall'}]

We also need a way to know the tables in the database.


In [11]:
#| export
@patch(as_prop=True)
def tables(self: DuckDBPyConnection, catalog:str=None) -> DuckDBPyRelation:
    '''Returns a dictionary of tables in the database'''
    q = f"from {catalog or self.catalog}.information_schema.tables"
    s = f"'{catalog or self.catalog}' as catalog, table_schema as schema, table_name as name, table_type as type, table_comment as comment"
    return self.sql(q).distinct().select(s)

@patch(as_prop=True)
def views(self: DuckDBPyConnection) -> DuckDBPyRelation:
    '''Returns a dictionary of views in the database'''
    return self.tables.filter(f"type =='VIEW' and catalog='{self.catalog}' and schema = '{self.schema}'")
@patch(as_prop=True)
def base_tables(self: DuckDBPyConnection) -> DuckDBPyRelation:
    '''Returns a dictionary of views in the database'''
    return self.tables.filter(f"type =='BASE TABLE' and catalog='{self.catalog}' and schema = '{self.schema}'")

In [12]:
db.base_tables.limit(3)

┌─────────┬─────────┬───────────────┬────────────┬─────────────┐
│ catalog │ schema  │     name      │    type    │   comment   │
│ varchar │ varchar │    varchar    │  varchar   │   varchar   │
├─────────┼─────────┼───────────────┼────────────┼─────────────┤
│ chinook │ main    │ Album         │ BASE TABLE │ Album table │
│ chinook │ main    │ Artist        │ BASE TABLE │ NULL        │
│ chinook │ main    │ PlaylistTrack │ BASE TABLE │ NULL        │
└─────────┴─────────┴───────────────┴────────────┴─────────────┘

The functions bellow add some utilities that are useful for working with tables and views in a database.

In [13]:
db.views.limit(3)

┌─────────┬─────────┬───────────┬─────────┬─────────────┐
│ catalog │ schema  │   name    │  type   │   comment   │
│ varchar │ varchar │  varchar  │ varchar │   varchar   │
├─────────┼─────────┼───────────┼─────────┼─────────────┤
│ chinook │ main    │ a_view    │ VIEW    │ Just a view │
│ chinook │ main    │ temp_view │ VIEW    │ NULL        │
└─────────┴─────────┴───────────┴─────────┴─────────────┘

In [14]:
#| export
@patch
def datamodel(self: DuckDBPyConnection, table_name:str) ->List[Dict]:
    ''' Returns the data model of a table or view. 
    The columns names, types, nullable status, default value and
    primary key status.'''
    
    return [{'name': r[1], 'type': r[2], 'nullable': not r[3], 'default': r[4], 'pk': r[5]} 
            for r in self.sql(f"PRAGMA table_info='{table_name}'").fetchall()]

In [15]:
db.datamodel('Artist')


[{'name': 'ArtistId',
  'type': 'INTEGER',
  'nullable': False,
  'default': None,
  'pk': True},
 {'name': 'Name',
  'type': 'VARCHAR',
  'nullable': True,
  'default': None,
  'pk': False}]

In [16]:
#| export
def convertTypes(s:str)->type:
    ''' Convert DuckDB types to Python and Numpy types'''
    d = {
        # Built-in types
        'BOOLEAN': bool,
        'BLOB': bytearray,  # For bytes, bytearray can be used in Python
        'DOUBLE': float,
        'BIGINT': int,
        'VARCHAR': str,
        'VARCHAR[]': str,
    
        # NumPy DTypes
        'FLOAT': np.float32,
        'DOUBLE': float,
        'SMALLINT': np.int16,
        'INTEGER': np.int32,
        'TINYINT': np.int8,
        'USMALLINT': np.uint16,
        'UINTEGER': np.uint32,
        'UBIGINT': np.uint64,
        'UTINYINT': np.uint8,
        'TIMESTAMP': np.timedelta64
    }
    if s in d: return d[s]
    if s[:7]=='DECIMAL': return float
    raise ValueError(f'Unknown type {s}')


import re, keyword
def clean(s):
    s = re.sub(r'\W|^(?=\d)', '_', s)
    return s + '_' if keyword.iskeyword(s) else s

@patch
def dataclass(self: DuckDBPyConnection, 
              table_name:str, # table or view name
              pref='', # prefix to add to the field names
              suf='', # suffix to add to the field names
              cls_name:str = None # defaults to table_name
              ) -> type:
    '''Creates a `dataclass` type from a table or view in the database.'''
    cls_name = cls_name or table_name
    fields = self.datamodel(table_name)
    fields = [(clean(pref+f['name']+suf), convertTypes(f['type']) if not f['nullable'] else convertTypes(f['type'])|None , field(default=f['default'])) for f in fields]
    return make_dataclass(table_name, fields)

In [17]:
artist_dc = db.dataclass('Artist')
# src = dataclass_src(artist_dc)
# hl_md(src, 'python') # fix error in nbdev_prepare

In [18]:
acdc = db.sql(f"select * from artist where artist.Name like 'AC/%'").df().to_dict(orient='records')
acdc

[{'ArtistId': 1, 'Name': 'AC/DC'}]

In [19]:
acdc_object = artist_dc(**acdc[0])
acdc_object

Artist(ArtistId=1, Name='AC/DC')

## Relation utilities

Once we know that a certain `DuckDBPyRelation` is a table (or view), we can also make it keep some valuable props.

In [20]:
#| export
_saved = {}

@patch
def _set(self:DuckDBPyRelation, k, v):
    global _saved
    # use hash to avoid clashes
    _saved[str(hash(self))+'_'+k] = v

@patch
def _get(self:DuckDBPyRelation, key):
    global _saved
    k = str(hash(self))+'_'+key
    return _saved[k] if k in _saved else None

def custom_dir(c, add): return sorted(dir(type(c)) + list(c.__dict__.keys()) if hasattr(c, '__dict__') else [] + add)

def create_patch_property(name):
    @patch(as_prop=True)
    def prop(self: DuckDBPyRelation):
        return self._get(name)
    return prop

props = ['cls', 'rel', 'model', 'meta']
for p in props: setattr(DuckDBPyRelation, p, create_patch_property(p))

@patch
def __dir__(self:DuckDBPyRelation) -> List[str]: return custom_dir(DuckDBPyRelation, props)
    
def create_prop(c, name, f): setattr(c, name, property(f))
@patch(as_prop=True)
def cls(self:DuckDBPyRelation): return self._get('cls')

@patch(as_prop=True)
def model(self:DuckDBPyRelation): return self._get('model')

@patch(as_prop=True)
def meta(self:DuckDBPyRelation): return self._get('meta')

@patch(as_prop=True)
def rel(self:DuckDBPyRelation): return self._get('rel')


@patch
def table(self:DuckDBPyConnection, name:str, schema:str= None, catalog:str=None) -> DuckDBPyRelation:
    if isinstance(name, Union[List, Set, Tuple]): return [self.table(n) for n in name]
    if not isinstance(name,str): raise AttributeError
    r = self.tables.filter(f"name == '{name}' and schema == '{schema or self.schema}' and catalog =='{catalog or self.catalog}'")
    catalog, schema, name, type, comment = r.fetchone()
    tbl = self.sql(f"from {catalog}.{schema}.{name}")
    tbl = tbl.set_alias(f"{catalog}.{schema}.{name}")
    tbl._set('cls', self.dataclass(name))
    tbl._set('model', self.datamodel(name))
    meta = {'base': self, 'catalog': catalog, 'schema': schema, 'name': name, 'type': type, 'comment': comment, 'shape': tbl.shape}
    tbl._set('meta', meta)
    tbl._set('rel', tbl)
    return tbl



In [21]:
# custom_dir(DuckDBPyRelation, props)
dir(db.table('Album'))

['__abstractmethods__',
 '__annotations__',
 '__base__',
 '__bases__',
 '__basicsize__',
 '__call__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dictoffset__',
 '__dir__',
 '__dir__',
 '__doc__',
 '__doc__',
 '__eq__',
 '__flags__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init__',
 '__init_subclass__',
 '__instancecheck__',
 '__itemsize__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__module__',
 '__mro__',
 '__name__',
 '__ne__',
 '__new__',
 '__or__',
 '__prepare__',
 '__pybind11_module_local_v5_clang_libcpp_cxxabi1002__',
 '__qualname__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr__',
 '__ror__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__str__',
 '__subclasscheck__',
 '__subclasses__',
 '__subclasshook__',
 '__text_signature__',
 '__type_params__',
 '__weakrefoffset__',
 '_get',
 '_orig___dir__',
 '_orig___getitem__',
 '_set',
 'aggregate',
 

In [22]:
#| export
@patch
def _select(self:DuckDBPyRelation, k) -> DuckDBPyRelation:
    return self.select(k) if isinstance(k, str) else self.select(*k)

@patch(as_prop=True)
def c(self:DuckDBPyRelation): 
    '''Column autocomplete'''
    return _Getter(self, 'column', self.columns, self._select)

### Autocomplete

We want an easy access to table information.

like db`.tables` should work similar to the `.table` cli command.

In [23]:
#| export
def noop(*args, **kwargs): return None
def identity(x): return x


class _Getter: 
    """ A Getter utility check https://github.com/AnswerDotAI/fastlite """
    def __init__(self, db:DuckDBPyConnection, type:str='', dir:List=[], get=noop): store_attr()    
    def __dir__(self): return self.dir
    def __str__(self): return ", ".join(dir(self))
    def __repr__(self): return f"{str(self.db).split(' ')[-1]} {self.type.title()}s: {str(self)}"
    def __contains__(self, s:str): return s in dir(self)
    def __getitem__(self, k): return self.get(k)
    def __getattr__(self, k):
        if k[0]!='_': return self.get(k)
        else: raise AttributeError 


@patch
def use(self:DuckDBPyConnection, catalog_schema:str=None, catalog:str=None, schema=None):
    catalog, schema = catalog_schema.split('_')
    self.sql(f"use {catalog}.{schema}")
    print("Using ", self)
    return self

@patch(as_prop=True) # tables
def s(self:DuckDBPyConnection): 
    '''Autocomplete functonality for schemas'''
    return _Getter(self, 'schema', self.tables.project(f"catalog || '_' || schema").distinct().to_list(), self.use)
@patch(as_prop=True) # tables
def t(self:DuckDBPyConnection): 
    '''Autocomplete functonality for tables'''
    return _Getter(self,'table', self.base_tables.select('name').to_list(), self.table)
@patch(as_prop=True) # views
def v(self:DuckDBPyConnection): 
    '''Autocomplete functonality for views'''
    return _Getter(self, 'view', self.views.select('name').to_list(), self.table)
@patch(as_prop=True) # functions
def fns(self:DuckDBPyConnection): raise NotImplementedError
# def fns(self:DuckDBPyConnection): return _Getter(self, f"SELECT function_name FROM duckdb_functions() WHERE schema_name = '{self.schema}' and internal = False")

@patch(as_prop=True) # secrets
def shh(self:DuckDBPyConnection): raise NotImplementedError
# def shh(self:DuckDBPyConnection): return _Getter(self, f"SELECT name FROM duckdb_secrets()")

@patch
def __repr__(self:DuckDBPyConnection): return f'{self.__class__.__name__} ({self.catalog}_{self.schema})'


::: 
 
![Autocomplete in Jupyter](images/autocomplete.png){.lightbox}

:::

In [24]:

a = db.t.Album.c['AlbumId', 'Title'].limit(4)
a

┌─────────┬───────────────────────────────────────┐
│ AlbumId │                 Title                 │
│  int32  │                varchar                │
├─────────┼───────────────────────────────────────┤
│       1 │ For Those About To Rock We Salute You │
│       2 │ Balls to the Wall                     │
│       3 │ Restless and Wild                     │
│       4 │ Let There Be Rock                     │
└─────────┴───────────────────────────────────────┘

Let's also improve the representation of `Relations`.

In [25]:

@patch
def __str__(self:DuckDBPyRelation): return f'{self.alias}'

@patch
def __repr__(self:DuckDBPyRelation): 
    return f"<{self.__class__.__name__} {self.meta['type'] if self.meta else ''} **{self.alias}** ({self.shape[0]} rows, {self.shape[1]} cols)>\n\n"
@patch
def _repr_markdown_(self: DuckDBPyRelation): 
    markdown =  f"{self.__repr__()}\n\n"
    if self.meta and self.meta['comment']: markdown += f"> {self.meta['comment']}\n\n"
    df = self.df()
    if self.shape[0] > 5: 
        head = df.head(3)
        tail = df.tail(2)
        ellipsis = pd.DataFrame([["..."] * df.shape[1]], columns=df.columns)
        df = pd.concat([head, ellipsis, tail])
    markdown += df.to_markdown(index=False)
    return markdown



In [26]:
db.tables.show()

┌─────────┬─────────┬───────────────┬────────────┬─────────────┐
│ catalog │ schema  │     name      │    type    │   comment   │
│ varchar │ varchar │    varchar    │  varchar   │   varchar   │
├─────────┼─────────┼───────────────┼────────────┼─────────────┤
│ chinook │ main    │ Customer      │ BASE TABLE │ NULL        │
│ chinook │ main    │ Genre         │ BASE TABLE │ NULL        │
│ chinook │ main    │ tst           │ BASE TABLE │ NULL        │
│ chinook │ main    │ Invoice       │ BASE TABLE │ NULL        │
│ chinook │ main    │ temp_view     │ VIEW       │ NULL        │
│ chinook │ main    │ Album         │ BASE TABLE │ Album table │
│ chinook │ main    │ Artist        │ BASE TABLE │ NULL        │
│ chinook │ main    │ PlaylistTrack │ BASE TABLE │ NULL        │
│ chinook │ main    │ Employee      │ BASE TABLE │ NULL        │
│ chinook │ main    │ InvoiceLine   │ BASE TABLE │ NULL        │
│ chinook │ main    │ todos         │ BASE TABLE │ NULL        │
│ chinook │ main    │ Med

In [27]:
db.t.Employee

<DuckDBPyRelation BASE TABLE **chinook.main.Employee** (8 rows, 15 cols)>



| EmployeeId   | LastName   | FirstName   | Title               | ReportsTo   | BirthDate           | HireDate            | Address                     | City       | State   | Country   | PostalCode   | Phone             | Fax               | Email                  |
|:-------------|:-----------|:------------|:--------------------|:------------|:--------------------|:--------------------|:----------------------------|:-----------|:--------|:----------|:-------------|:------------------|:------------------|:-----------------------|
| 1            | Adams      | Andrew      | General Manager     | nan         | 1962-02-18 00:00:00 | 2002-08-14 00:00:00 | 11120 Jasper Ave NW         | Edmonton   | AB      | Canada    | T5K 2N1      | +1 (780) 428-9482 | +1 (780) 428-3457 | andrew@chinookcorp.com |
| 2            | Edwards    | Nancy       | Sales Manager       | 1.0         | 1958-12-08 00:00:00 | 2002-05-01 00:00:00 | 825 8 Ave SW                | Calgary    | AB      | Canada    | T2P 2T3      | +1 (403) 262-3443 | +1 (403) 262-3322 | nancy@chinookcorp.com  |
| 3            | Peacock    | Jane        | Sales Support Agent | 2.0         | 1973-08-29 00:00:00 | 2002-04-01 00:00:00 | 1111 6 Ave SW               | Calgary    | AB      | Canada    | T2P 5M5      | +1 (403) 262-3443 | +1 (403) 262-6712 | jane@chinookcorp.com   |
| ...          | ...        | ...         | ...                 | ...         | ...                 | ...                 | ...                         | ...        | ...     | ...       | ...          | ...               | ...               | ...                    |
| 7            | King       | Robert      | IT Staff            | 6.0         | 1970-05-29 00:00:00 | 2004-01-02 00:00:00 | 590 Columbia Boulevard West | Lethbridge | AB      | Canada    | T1K 5N8      | +1 (403) 456-9986 | +1 (403) 456-8485 | robert@chinookcorp.com |
| 8            | Callahan   | Laura       | IT Staff            | 6.0         | 1968-01-09 00:00:00 | 2004-03-04 00:00:00 | 923 7 ST NW                 | Lethbridge | AB      | Canada    | T1H 1Y8      | +1 (403) 467-3351 | +1 (403) 467-8772 | laura@chinookcorp.com  |

In [28]:
db.sql('select * from Album')

<DuckDBPyRelation  **unnamed_relation_db012eee38ea7ee6** (347 rows, 3 cols)>



| AlbumId   | Title                                              | ArtistId   |
|:----------|:---------------------------------------------------|:-----------|
| 1         | For Those About To Rock We Salute You              | 1          |
| 2         | Balls to the Wall                                  | 2          |
| 3         | Restless and Wild                                  | 2          |
| ...       | ...                                                | ...        |
| 346       | Mozart: Chamber Music                              | 274        |
| 347       | Koyaanisqatsi (Soundtrack from the Motion Picture) | 275        |

In [29]:
db.t.Genre

<DuckDBPyRelation BASE TABLE **chinook.main.Genre** (25 rows, 2 cols)>



| GenreId   | Name      |
|:----------|:----------|
| 1         | Rock      |
| 2         | Jazz      |
| 3         | Metal     |
| ...       | ...       |
| 24        | Classical |
| 25        | Opera     |

In [30]:
a

<DuckDBPyRelation  **chinook.main.Album** (4 rows, 2 cols)>



|   AlbumId | Title                                 |
|----------:|:--------------------------------------|
|         1 | For Those About To Rock We Salute You |
|         2 | Balls to the Wall                     |
|         3 | Restless and Wild                     |
|         4 | Let There Be Rock                     |

#### Replacement Scans

You may be asking yourself why I am patching `DuckDBPyRelation` and `DuckDBPyConnection` instead of subclassing them.
The problem is that these classes do not allow subclassing.  They do not implement `__init__`.

We could have create our own classes like `Database` and `Table` and just wrap DuckDBPy objects. But then we would loose a very nice feature of the PyRelation objects.....


*replacement scans*.


In [31]:
a = db.t.Album

In [32]:
db.sql("select * from a")

<DuckDBPyRelation  **unnamed_relation_ac1817debb6ad7c4** (347 rows, 3 cols)>



| AlbumId   | Title                                              | ArtistId   |
|:----------|:---------------------------------------------------|:-----------|
| 1         | For Those About To Rock We Salute You              | 1          |
| 2         | Balls to the Wall                                  | 2          |
| 3         | Restless and Wild                                  | 2          |
| ...       | ...                                                | ...        |
| 346       | Mozart: Chamber Music                              | 274        |
| 347       | Koyaanisqatsi (Soundtrack from the Motion Picture) | 275        |

I did not had to use a f-string and pass the variable.  DuckDBPy objects (as well as Pandas and Polars Dataframes, Arrow tables, and Datasets) are replaced in the query automagically.

In [33]:
# db.sql(f"select * from {a}")

In [34]:
str(a)

'chinook.main.Album'

In [35]:
db.tables

<DuckDBPyRelation  **unnamed_relation_4c3ce5d87b52df26** (16 rows, 5 cols)>



| catalog   | schema   | name        | type       | comment   |
|:----------|:---------|:------------|:-----------|:----------|
| chinook   | main     | Customer    | BASE TABLE |           |
| chinook   | main     | Genre       | BASE TABLE |           |
| chinook   | main     | fd_Customer | BASE TABLE |           |
| ...       | ...      | ...         | ...        | ...       |
| chinook   | main     | Invoice     | BASE TABLE |           |
| chinook   | main     | temp_view   | VIEW       |           |

## Database management

#### Drop

In [36]:

@patch
def __contains__(self:DuckDBPyConnection, name:str):
    return name in self.tables.select('name').to_list()

@patch
def drop(self:DuckDBPyConnection, table_name:str):
    '''Drop a table or view'''
    if table_name not in self: raise ValueError(f"Table {table_name} does not exist")
    self.sql(f"DROP TABLE {table_name}")

#### Create Table or View

In [37]:
    
@patch
def _create(self: DuckDBPyConnection, 
            type: str, fileglob: str, 
            table_name: Optional[str] = None, 
            filetype: Optional[Literal['csv', 'xlsx', 'json', 'parquet']] = None, 
            replace: bool = False, 
            *args, **kwargs):
  
    filepath = Path(fileglob)
    name = table_name or filepath.stem
    if name in self and not replace: raise ValueError(f"Table {name} already exists")
    if name in self: self.drop(name)
    
    filetype = filetype or filepath.suffix[1:]
    if filetype == 'xlsx':
        self.install_extension('spatial')
        self.load_extension('spatial') # for excel import/export
        fn = 'st_read'
        options = ', '.join(f"{key}={repr(value)}" for key, value in kwargs.items())
        self.sql(f"CREATE {type} {name} AS SELECT * FROM {fn}('{str(filepath)}' {options})")
    else:
        (getattr(self, f'read_{filetype}')(fileglob, *args, **kwargs)).to_table(name)


@patch
def create_table(self: DuckDBPyConnection, 
                 fileglob: str, # file path or glob
                 table_name: Optional[str] = None, # table name
                 filetype: Optional[Literal['csv', 'xlsx', 'json', 'parquet', 'sqlite']] = None, # file type
                 replace: bool = False, # replace existing table
                 *args, **kwargs 
                 ):
    '''Create a table from a file'''
    return self._create('TABLE', fileglob, table_name, filetype, replace, *args, **kwargs)

@patch
def create_view(self: DuckDBPyConnection, 
                 fileglob: str, # file path or glob
                 view_name: Optional[str] = None, # view name
                 filetype: Optional[Literal['csv', 'xlsx', 'json', 'parquet', 'sqlite']] = None, # file type
                 replace: bool = False,  # replace existing view
                 *args, **kwargs
                 ):
    '''Create a view from a file'''
    return self._create('VIEW', fileglob, view_name, filetype, replace, *args, **kwargs)
    

In [38]:
db.create_table('../data/username.latin1.csv', 'latin', replace=True)
db.tables

<DuckDBPyRelation  **unnamed_relation_7266e9deb9b85538** (17 rows, 5 cols)>



| catalog   | schema   | name      | type       | comment   |
|:----------|:---------|:----------|:-----------|:----------|
| chinook   | main     | latin     | BASE TABLE |           |
| chinook   | main     | MediaType | BASE TABLE |           |
| chinook   | main     | Track     | BASE TABLE |           |
| ...       | ...      | ...       | ...        | ...       |
| chinook   | main     | Genre     | BASE TABLE |           |
| chinook   | main     | todos     | BASE TABLE |           |

In [39]:
db.create_view('../data/username.latin1.csv', 'latin1view', replace=True)

In [40]:
db.tables.filter(f"name like '%latin%'")
#  db.sql("select distinct database_name, schema_name, table_name, column_name from duckdb_columns()")
# db.sql("attach database '../data/chinook.sqlite' as sqlite")

<DuckDBPyRelation  **unnamed_relation_eb73ae285d2447a7** (2 rows, 5 cols)>



| catalog   | schema   | name       | type       | comment   |
|:----------|:---------|:-----------|:-----------|:----------|
| chinook   | main     | latin      | BASE TABLE |           |
| chinook   | main     | latin1view | BASE TABLE |           |

In [41]:
db.drop('latin1view')
db.drop('latin')

In [42]:
# from fastcore.test import test_fail
# test_fail(db.drop, 'banana') # fix error in nbdev_prepare

In [43]:
db.create_table('https://jsonplaceholder.typicode.com/todos/', 'todos', filetype='json', replace=True)

In [44]:
db.t.todos.limit(10)

<DuckDBPyRelation  **chinook.main.todos** (10 rows, 4 cols)>



| userId   | id   | title                                        | completed   |
|:---------|:-----|:---------------------------------------------|:------------|
| 1        | 1    | delectus aut autem                           | False       |
| 1        | 2    | quis ut nam facilis et officia qui           | False       |
| 1        | 3    | fugiat veniam minus                          | False       |
| ...      | ...  | ...                                          | ...         |
| 1        | 9    | molestiae perspiciatis ipsa                  | False       |
| 1        | 10   | illo est ratione doloremque quia maiores aut | True        |

In [45]:
# db.create_table('https://huggingface.co/datasets/ibm/duorc/resolve/refs%2Fconvert%2Fparquet/ParaphraseRC/test/0000.parquet', 'hf_movies')

In [46]:
# db.t.hf_movies.limit(2)

In [47]:
# db.drop('hf_movies')


In [48]:

db.create_table('../data/example.xlsx')

In [49]:
sheet = db.t.example
sheet

<DuckDBPyRelation BASE TABLE **chinook.main.example** (5 rows, 3 cols)>



|   COLUMN1 |   COLUMN WITH SPACE | Cólumn name   |
|----------:|--------------------:|:--------------|
|         1 |                 1.3 | a             |
|         2 |                 1.3 | bn            |
|         3 |                 1.3 | v             |
|         4 |                 1.3 | sgf           |
|         5 |                 1.3 | asd           |

In [50]:

db.drop('example')

#### Attach, Detach and Use

In [51]:
class RemoteSqliteError(Exception):
    pass
class InvalidPathError(Exception):
    pass

In [52]:
@patch
def attach(self: DuckDBPyConnection, path, read_only:bool = False, type:Literal['duckdb' | 'sqlite']='duckdb', catalog_name:str=None) -> None:
    type = 'sqlite' if Path(path).suffix =='.sqlite' else type
    if path.startswith(('s3://', 'gcp://', 'https://')):
        self.install_extension('httpfs')
        self.load_extension('httpfs')
        read_only = True
        if type =='sqlite': raise RemoteSqliteError('Cannot attach to a remote sqlite database.')
    elif not Path(path).exists(): raise InvalidPathError(f"Couldn't find {path}")
    self.install_extension('sqlite')
    self.load_extension('sqlite')
    o = "(TYPE sqlite, " if type=='sqlite' else "("
    o += f"READ_ONLY  {read_only})"
    q = f" '{path}' {f" AS {catalog_name}" if catalog_name else ""} {o}"
    print (f"Attaching {q}")
    self.sql(f"ATTACH {q}")

In [53]:
@patch
def detach(self: DuckDBPyConnection, catalog_name) -> None:
    self.sql(f"DETACH {catalog_name}")

In [54]:
@patch
def use(self:DuckDBPyConnection, catalog_schema:str=None, catalog:str=None, schema=None) -> None:
    if not catalog_schema and not catalog and not schema: return self
    catalog, schema = catalog_schema.split('_') if catalog_schema else (catalog, schema)
    catalog = catalog or self.catalog
    schema = schema or self.schema
    self.sql(f"use {catalog}.{schema}")
    print("Using ", self)


In [55]:
db.attach('s3://duckdb-blobs/databases/stations.duckdb')

Attaching  's3://duckdb-blobs/databases/stations.duckdb'  (READ_ONLY  True)


In [56]:
db.s.chinook_main

Using  DuckDBPyConnection (chinook_main)


## export - 

In [57]:
#| hide
import nbdev; nbdev.nbdev_export()