In [1]:
from operator import itemgetter
from types import SimpleNamespace
import pandas as pd
import opossom.column as col

In [2]:
url = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv"
penguins = pd.read_csv(url).convert_dtypes()

In [3]:
penguins.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007
3,Adelie,Torgersen,,,,,,2007
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007


In [4]:
class Schema(SimpleNamespace):
    
    def __getattr__(self, name):
        setattr(self, name, col.Column(name))
        return col.Column(name)


def make_schema(table: pd.DataFrame):
    schema = {}
    for name, dtype in table.dtypes.items():
        if isinstance(dtype, pd.StringDtype):
            schema[name] = col.str(name)
        elif isinstance(dtype, (pd.Int64Dtype, pd.Float64Dtype)):
            schema[name] = col.num(name)
        else:
            schema[name] = col.Column(name)

    return Schema(**schema)

In [5]:
t = make_schema(penguins)

In [6]:
[item for item in dir(t) if not item.startswith("_")]

['bill_depth_mm',
 'bill_length_mm',
 'body_mass_g',
 'flipper_length_mm',
 'island',
 'sex',
 'species',
 'year']

In [7]:
penguins.shape

(344, 8)

In [8]:
(
    penguins
    .assign(bill_depth_cm=t.bill_depth_mm / 100)  # Column name is offered as autocomplete
    .loc[t.bill_depth_cm > 0.185]  # Uses a column that was not previously in schema
    .shape
)

(97, 9)

In [9]:
# Column is now in schema and is offered as autocomplete
[item for item in dir(t) if not item.startswith("_")]

['bill_depth_cm',
 'bill_depth_mm',
 'bill_length_mm',
 'body_mass_g',
 'flipper_length_mm',
 'island',
 'sex',
 'species',
 'year']

In the future, it might be nice to add some set-like methods to the `Schema` object.
If desired, all column names could be collected in a single object or they could be split into multiple objects.

```python
t = make_schema(penguins)
i = make_schema(islands)
a = t | i                  # Has columns from both tables
a |= make_schema(species)  # Has columns from all three tables
```