Polars Dataframes Init

In [1]:
import polars as pl

df = pl.DataFrame(
    {
    'Title': ['Python Crash Course', 'Hands-On Machine Learning',
    'Data Science for Business', 'Learning SQL',
    'JavaScript: The Good Parts', 'Clean Code'],
    'UnitsSold': [250, 180, 320, 150, 200, 280],
    'Publisher': ["OReilly", "OReilly", "OReilly", "OReilly",
    "OReilly", "OReilly"],
    }
)

df

Title,UnitsSold,Publisher
str,i64,str
"""Python Crash C…",250,"""OReilly"""
"""Hands-On Machi…",180,"""OReilly"""
"""Data Science f…",320,"""OReilly"""
"""Learning SQL""",150,"""OReilly"""
"""JavaScript: Th…",200,"""OReilly"""
"""Clean Code""",280,"""OReilly"""


Queries using built in functions in Polars

In [5]:
# Sort
top_selling_books = df.sort(by="UnitsSold", descending=True)

top_books_data = top_selling_books.select(["Title", "UnitsSold"]).limit(5).to_pandas()
print("Top Selling OReilly Books")
print(top_books_data)


Top Selling O'Reilly Books
                        Title  UnitsSold
0   Data Science for Business        320
1                  Clean Code        280
2         Python Crash Course        250
3  JavaScript: The Good Parts        200
4   Hands-On Machine Learning        180


Connect with duckdb with polars

In [6]:
import duckdb

con = duckdb.connect()

# Register to duckdb the polars db
con.register('books', df)

result = con.execute("SELECT Title, UnitsSold FROM books WHERE Publisher = 'OReilly'").fetchall()

# Convert the result to Polars DF
result_df = pl.DataFrame(result)
# result_df.select(["Title", "UnitsSold"])      
print(result_df)              

shape: (6, 2)
┌────────────────────────────┬──────────┐
│ column_0                   ┆ column_1 │
│ ---                        ┆ ---      │
│ str                        ┆ i64      │
╞════════════════════════════╪══════════╡
│ Python Crash Course        ┆ 250      │
│ Hands-On Machine Learning  ┆ 180      │
│ Data Science for Business  ┆ 320      │
│ Learning SQL               ┆ 150      │
│ JavaScript: The Good Parts ┆ 200      │
│ Clean Code                 ┆ 280      │
└────────────────────────────┴──────────┘


Creating SQL Context for data type checking

In [17]:
sql = pl.SQLContext()
sql.register('df', df)

result_df = sql.execute(
    """
        select * 
        from df
        where Title = 'Python for Data Analysis'
    """
).collect()

result_df

Title,UnitsSold,Publisher
str,i64,str
