In [None]:
from chalk.client import ChalkClient

client: ChalkClient = ChalkClient()

In [None]:
from chalk.client.models import BulkOnlineQueryResponse, BulkOnlineQueryResult
from src.marketplace.models import UserItem

bulk_query: BulkOnlineQueryResponse = client.query_bulk(
    input={
        UserItem.user_id: [50] * 10,
        UserItem.item_id: list(range(80,90)),
    },
    output=[
        UserItem.item.title,
        # Joint features
        UserItem.price_diff,
        UserItem.price_diff_ratio,
        UserItem.affordability_cap,
        UserItem.price_fit,
        # Item pricing
        UserItem.item.times_purchased,
        UserItem.item.most_recent_price,
        UserItem.item.average_price,
         # Item ratings
        UserItem.item.average_rating,
        UserItem.item.total_reviews,
        # User details
        UserItem.user.id,
        UserItem.user.created_at,
        # User interactions
        # User purchases
        # Reviews
        UserItem.user.review_count,
        UserItem.user.average_rating_given,
    ],
    query_name="user-item",
    # branch="elvis",
)
query_result: BulkOnlineQueryResult
query_result, *_ = bulk_query.results
query_result.to_pandas()

In [None]:
from src.marketplace.models import User
from chalk.features import _
import chalk.functions as F

# initialized in the context of this notebook session
User.full_name: str = _.first_name + " " + _.last_name

result = client.query(
    input={
        User.id: 1,
    },
    output=[
        User.full_name,
        User.username,
        (F.jaccard_similarity(
                a=F.lower(_.full_name),
                b=F.lower(_.email)
        )).alias("user.name_email_sim"),
    ],
)
result

In [None]:
from datetime import timedelta
from datetime import datetime

last_year = datetime(2023, 12, 31)
christmas = datetime(2024, 12, 25)
new_year = datetime(2025, 1, 1)

from chalk.client.response import Dataset
from src.marketplace import User

results: Dataset = client.offline_query(
    input={
        User.id: [896, 896, 896, ],
    },
    recompute_features=True,
    # recompute_features=[User.review_count, User.average_rating_given], # can re-compute just a subset of features
    input_times=[christmas, new_year, datetime.now() - timedelta(days=7)],
    output=[
        User.review_count,
        User.average_rating_given,
        User.username,
    ],
)
results.to_pandas()

In [None]:
# chalk for data scientists
from chalk.features import _

# can load features from prod into notebook directly
client.load_features()
User.new_on_demand_feature = _.total_product_inquiry_count / _.total_orders_placed

In [None]:
# chalk for data scientists
from chalk.integrations import GlueCatalog

catalog = GlueCatalog(
    name="aws_glue_catalog",
    aws_region="us-west-2",
    catalog_id="123",
    aws_role_arn="arn:aws:iam::123456789012:role/YourCatalogueAccessRole",
)
# results is dataframe returned from offline query
results.write_to(destination="database.table_name", catalog=catalog)
