Polars Dataframes Init

In [2]:
import polars as pl

df = pl.DataFrame(
    {
    'Title': ['Python Crash Course', 'Hands-On Machine Learning',
    'Data Science for Business', 'Learning SQL',
    'JavaScript: The Good Parts', 'Clean Code'],
    'UnitsSold': [250, 180, 320, 150, 200, 280],
    'Publisher': ["OReilly", "OReilly", "OReilly", "OReilly",
    "OReilly", "OReilly"],
    }
)

df

Title,UnitsSold,Publisher
str,i64,str
"""Python Crash C…",250,"""O'Reilly"""
"""Hands-On Machi…",180,"""O'Reilly"""
"""Data Science f…",320,"""O'Reilly"""
"""Learning SQL""",150,"""O'Reilly"""
"""JavaScript: Th…",200,"""O'Reilly"""
"""Clean Code""",280,"""O'Reilly"""


Queries using built in functions in Polars

In [5]:
# Sort
top_selling_books = df.sort(by="UnitsSold", descending=True)

top_books_data = top_selling_books.select(["Title", "UnitsSold"]).limit(5).to_pandas()
print("Top Selling OReilly Books")
print(top_books_data)


Top Selling O'Reilly Books
                        Title  UnitsSold
0   Data Science for Business        320
1                  Clean Code        280
2         Python Crash Course        250
3  JavaScript: The Good Parts        200
4   Hands-On Machine Learning        180


Connect with duckdb with polars

In [15]:
import duckdb

con = duckdb.connect()

# Register to duckdb the polars db
con.register('books', df)

result = con.execute("SELECT Title, UnitsSold FROM books WHERE Publisher = 'OReilly'")

# Convert the result to Polars DF
result_df = pl.DataFrame(result)
result_df.select(["Title", "UnitsSold"])      
print(result_df)              

TypeError: DataFrame constructor called with unsupported type 'DuckDBPyConnection' for the `data` parameter

Creating SQL Context for data type checking

In [17]:
sql = pl.SQLContext()
sql.register('df', df)

result_df = sql.execute(
    """
        select * 
        from df
        where Title = 'Python for Data Analysis'
    """
).collect()

result_df

Title,UnitsSold,Publisher
str,i64,str
