In [15]:
from datetime import datetime

import polars as pl

In [2]:
df_customers = pl.DataFrame({
    'customer_id': [1, 2, 3],
    'name': ['Amira', 'Beetlegeuse', 'Charybdis']})
df_customers

customer_id,name
i64,str
1,"""Amira"""
2,"""Beetlegeuse"""
3,"""Charybdis"""


In [3]:
df_orders = pl.DataFrame({
    'order_id': ['a', 'b', 'c'],
    'customer_id': [1, 2, 2],
    'amount': [100, 200, 300]})

In [4]:
df_customers.join(df_orders, on='customer_id', how='inner')

customer_id,name,order_id,amount
i64,str,str,i64
1,"""Amira""","""a""",100
2,"""Beetlegeuse""","""b""",200
2,"""Beetlegeuse""","""c""",300


In [5]:
df_customers.join(df_orders, on='customer_id', how='left')

customer_id,name,order_id,amount
i64,str,str,i64
1,"""Amira""","""a""",100.0
2,"""Beetlegeuse""","""b""",200.0
2,"""Beetlegeuse""","""c""",300.0
3,"""Charybdis""",,


In [6]:
df_customers.join(df_orders, on='customer_id', how='outer')

customer_id,name,order_id,amount
i64,str,str,i64
1,"""Amira""","""a""",100.0
2,"""Beetlegeuse""","""b""",200.0
2,"""Beetlegeuse""","""c""",300.0
3,"""Charybdis""",,


In [7]:
df_colors = pl.DataFrame({'color': ['red', 'green', 'blue']})
df_colors

color
str
"""red"""
"""green"""
"""blue"""


In [8]:
df_sizes = pl.DataFrame({'size': list('SML')})
df_sizes

size
str
"""S"""
"""M"""
"""L"""


In [9]:
df_colors.join(df_sizes, how='cross')

color,size
str,str
"""red""","""S"""
"""red""","""M"""
"""red""","""L"""
"""green""","""S"""
"""green""","""M"""
"""green""","""L"""
"""blue""","""S"""
"""blue""","""M"""
"""blue""","""L"""


In [10]:
cars = pl.DataFrame({
    'id': ['a', 'b', 'c'],
    'make': ['ford', 'toyota', 'bmw']})
cars

id,make
str,str
"""a""","""ford"""
"""b""","""toyota"""
"""c""","""bmw"""


In [11]:
repairs = pl.DataFrame({
    'id': ['c', 'c'],
    'cost': [100, 200]})
repairs

id,cost
str,i64
"""c""",100
"""c""",200


In [12]:
cars.join(repairs, on='id', how='inner')

id,make,cost
str,str,i64
"""c""","""bmw""",100
"""c""","""bmw""",200


In [13]:
cars.join(repairs, on='id', how='semi')

id,make
str,str
"""c""","""bmw"""


In [14]:
cars.join(repairs, on='id', how='anti')

id,make
str,str
"""a""","""ford"""
"""b""","""toyota"""


In [16]:
trades = pl.DataFrame({
    'time': [
        datetime(2020, 1, 1, 9, 1, 0),
        datetime(2020, 1, 1, 9, 1, 0),
        datetime(2020, 1, 1, 9, 3, 0),
        datetime(2020, 1, 1, 9, 6, 0)],
    'stock': ['AAPL', 'BYND', 'BYND', 'C'],
    'trade': [101, 299, 301, 500]})
trades

time,stock,trade
datetime[μs],str,i64
2020-01-01 09:01:00,"""AAPL""",101
2020-01-01 09:01:00,"""BYND""",299
2020-01-01 09:03:00,"""BYND""",301
2020-01-01 09:06:00,"""C""",500


In [17]:
quotes = pl.DataFrame({
    'time': [
        datetime(2020, 1, 1, 9, 0, 0),
        datetime(2020, 1, 1, 9, 2, 0),
        datetime(2020, 1, 1, 9, 4, 0),
        datetime(2020, 1, 1, 9, 6, 0)],
    'stock': ['AAPL', 'BYND', 'C', 'APPL'],
    'quote': [100, 300, 501, 102]})
quotes

time,stock,quote
datetime[μs],str,i64
2020-01-01 09:00:00,"""AAPL""",100
2020-01-01 09:02:00,"""BYND""",300
2020-01-01 09:04:00,"""C""",501
2020-01-01 09:06:00,"""APPL""",102


In [18]:
trades.join_asof(quotes, on='time', by='stock')

time,stock,trade,quote
datetime[μs],str,i64,i64
2020-01-01 09:01:00,"""AAPL""",101,100.0
2020-01-01 09:01:00,"""BYND""",299,
2020-01-01 09:03:00,"""BYND""",301,300.0
2020-01-01 09:06:00,"""C""",500,501.0


In [19]:
trades.join_asof(quotes, on='time', by='stock', tolerance='1m')

time,stock,trade,quote
datetime[μs],str,i64,i64
2020-01-01 09:01:00,"""AAPL""",101,100.0
2020-01-01 09:01:00,"""BYND""",299,
2020-01-01 09:03:00,"""BYND""",301,300.0
2020-01-01 09:06:00,"""C""",500,
