In [1]:
from chalk.client import ChalkClient

# Notebook Development In Chalk

Chalk allows you to run queries, make datasets, and write new features directly from a Jupyter Notebook. In this notebook, we'll show all the features available:

1. Creating a new branch based on your main deployment and loading features defined in your main deployment into your notebook
2. Running online queries
3. Running offline queries
4. Defining new features:
    - Defining Alias Expressions
    - Defining New Feature Classes
    - Defining New Features on Existing Feature Classes
    - Defining SQL Resolvers
    - Defining New Python Resolvers

## 1. Creating a Branch & Loading Features

In [2]:
client = ChalkClient()
client.get_or_create_branch(branch_name="notebook-demo")
client.load_features()

Combobox(value='', continuous_update=False, ensure_option=True, layout=Layout(width='auto'), options=('CreditR…

Output()

## 2. Running Online Queries

In [None]:
result = client.query(
    input={User.id: 1},
    output=[
        User,
        User.credit_report.percent_past_due,
    ],
    staleness={
        User.credit_report.percent_past_due: "0s",
    }
)
result

In [6]:
result = client.query(
    input={User.id: 1},
    output=[
        User.transactions
    ],
    staleness={
        User.credit_report.percent_past_due: "0s",
    }
)

# get the transactions from the online query as a pandas dataframe
result.get_feature_value(User.transactions).to_pandas()

Unnamed: 0,transaction.id,transaction.amount,transaction.memo,transaction.clean_memo,transaction.user_id,transaction.name_memo_sim,transaction.at,transaction.completion,transaction.category,transaction.is_nsf,transaction.is_ach
0,172,150.00,ACH TRANSFER 5432109876,ACH TRANSFER,1,0.111111,2024-09-01 08:10:29+00:00,"{""category"": ""transfer"", ""is_nsf"": false, ""cle...",transfer,False,True
1,173,-7.45,Starbucks 56789,Starbucks,1,0.117647,2024-09-01 10:35:45+00:00,"{""category"": ""Food & Drink"", ""is_nsf"": false, ...",Food & Drink,False,False
2,174,-79.56,SQ - Target 07/01,Target,1,0.142857,2024-09-01 13:45:33+00:00,"{""category"": ""shopping"", ""is_nsf"": false, ""cle...",shopping,False,False
3,169,-45.78,SQ - Chipotle 9876,Chipotle,1,0.285714,2024-09-02 07:35:16+00:00,"{""category"": ""Food"", ""is_nsf"": false, ""clean_m...",Food,False,False
4,170,-12.89,Venmo Payment to Amy / Groceries,Venmo Payment to Amy / Groceries,1,0.350000,2024-09-02 09:25:44+00:00,"{""category"": ""food"", ""is_nsf"": false, ""clean_m...",food,False,False
...,...,...,...,...,...,...,...,...,...,...,...
153,2,-12.45,Starbucks 57892,Starbucks,1,0.117647,2024-10-15 08:15:22+00:00,"{""category"": ""Food and Drink"", ""is_nsf"": false...",Food and Drink,False,False
154,3,-64.50,SQ - WALMART SUPERCENTER #345,WALMART SUPERCENTER #345,1,0.125000,2024-10-15 09:34:01+00:00,"{""category"": ""Shopping"", ""is_nsf"": false, ""cle...",Shopping,False,False
155,4,-32.78,Uber ride 08/15,Uber ride,1,0.214286,2024-10-15 11:48:16+00:00,"{""category"": ""Transportation"", ""is_nsf"": false...",Transportation,False,False
156,5,150.00,ACH TRANSFER 1234567890,ACH TRANSFER,1,0.111111,2024-10-15 14:22:45+00:00,"{""category"": ""ACH Transfer"", ""is_nsf"": false, ...",ACH Transfer,False,True


## 3. Running Offline Queries

Chalk offline queries return `Dataset`s which are lazy references to parquet files in cloud storage. These datasets can be pulled into your local machine by calling: `ds.to_pandas()` or `ds.to_polars()`.


In [10]:
from chalk.client import Dataset, ResourceRequests

# Chalk Offline
dataset: Dataset = client.offline_query(
    output=[
        User.name,
        User.denylisted,
        User.name_email_match_score,
        User.domain_name,
        User.email_username,
        User.total_spend,
        User.credit_report.percent_past_due,
    ],
    recompute_features=[User.credit_report.percent_past_due],
    
    ## offline queries support horizontal and vertical scaling: they can be sharded and configured to run on their own 
    ## separate pods with a specified amounts of memory and cpu.
    # run_asynchronously=True,
    # ResourceRequests(cpu=4, memory="16Gi"),
    # num_shards=1, 
    
    max_samples=30,
    dataset_name="user_features"
)
dataset

SyntaxError: invalid syntax. Perhaps you forgot a comma? (405617245.py, line 15)

In [11]:
df = dataset.to_pandas()
df

Unnamed: 0,user.name,user.denylisted,user.name_email_match_score,user.domain_name,user.email_username,user.total_spend,user.credit_report.id,user.id,user.credit_report.percent_past_due
0,Nicole Mann,False,75.0,nasa.gov,nicoleam,,123,1,0.0
1,Emily Carter,False,75.0,gmail.com,emily_carter21,,123,4,0.0


### Reading an Existing Dataset

`client.offline_query` takes a `dataset_name` parameter. If this is supplied, the dataset can be read in a future session or by another user through it's name. It will also be searchable in the chalk dashboard by that name.

In [None]:
dataset = client.get_dataset(dataset_name="user_features")

## 4. Creating New Features