In [240]:
import random
from datetime import datetime, timedelta

# Define the number of rows
num_rows = 1000

# Generate dates within the last 365 days
start_date = datetime.now() - timedelta(days=365)
date_data = [(start_date + timedelta(days=random.randint(0, 365))).strftime('%Y-%m-%d') for _ in range(num_rows)]

# Generate categorical data for 'Category' and 'Status' columns
categories = ["A", "B", "C"]
status_types = ["Active", "Inactive", "Pending"]
category_data = [random.choice(categories) for _ in range(num_rows)]
status_data = [random.choice(status_types) for _ in range(num_rows)]

# Generate numerical data for 'Sales', 'Profit', and 'Quantity' columns
sales_data = [round(random.uniform(1000, 5000), 2) for _ in range(num_rows)]
profit_data = [round(random.uniform(100, 500), 2) for _ in range(num_rows)]
quantity_data = [random.randint(1, 50) for _ in range(num_rows)]

# Combine data into a list of dictionaries for easy transformation
dummy_data = [
    {
        "Date": date_data[i],
        "Category": category_data[i],
        "Status": status_data[i],
        "Sales": sales_data[i],
        "Profit": profit_data[i],
        "Quantity": quantity_data[i],
        "Discount": round(random.uniform(0, 0.5), 2)  # Generate discount as a float between 0 and 0.5
    }
    for i in range(num_rows)
]

In [241]:
import polars as pl

In [242]:
data = pl.LazyFrame(dummy_data).collect(streaming=True)
data.write_parquet('./data/sales.parquet',compression="zstd",partition_by=['Category','Status'])

In [243]:
pl.scan_parquet('./data/sales.parquet').filter((pl.col('Category') == 'B') & (pl.col('Status')=='Active')).collect()

Date,Category,Status,Sales,Profit,Quantity,Discount
str,str,str,f64,f64,i64,f64
"""2024-05-21""","""B""","""Active""",1882.69,403.03,45,0.19
"""2024-04-06""","""B""","""Active""",3352.01,497.75,33,0.24
"""2024-10-09""","""B""","""Active""",4625.65,215.15,25,0.41
"""2024-11-04""","""B""","""Active""",4492.44,448.11,8,0.22
"""2024-08-25""","""B""","""Active""",4365.51,243.02,45,0.02
…,…,…,…,…,…,…
"""2023-12-28""","""B""","""Active""",2659.93,271.01,17,0.29
"""2023-12-26""","""B""","""Active""",4128.87,465.73,19,0.27
"""2024-10-29""","""B""","""Active""",2161.59,121.32,11,0.44
"""2024-01-27""","""B""","""Active""",4664.01,211.96,19,0.37


In [244]:
pl.read_parquet('./data/sales.parquet')

Date,Category,Status,Sales,Profit,Quantity,Discount
str,str,str,f64,f64,i64,f64
"""2024-05-09""","""A""","""Active""",1930.22,363.22,30,0.33
"""2024-01-14""","""A""","""Active""",2353.09,424.96,39,0.01
"""2024-10-30""","""A""","""Active""",4081.34,438.57,7,0.19
"""2024-08-12""","""A""","""Active""",4779.1,492.22,13,0.43
"""2024-10-12""","""A""","""Active""",2606.59,336.64,17,0.32
…,…,…,…,…,…,…
"""2024-07-15""","""C""","""Pending""",4112.51,467.37,20,0.46
"""2024-09-28""","""C""","""Pending""",3147.07,486.16,41,0.31
"""2023-12-26""","""C""","""Pending""",4187.74,224.68,2,0.08
"""2024-05-24""","""C""","""Pending""",2998.61,328.43,15,0.22


In [245]:
sqll = pl.scan_parquet('./data/sales.parquet')
sql_context = pl.SQLContext()

# Register the dataframe as a table 'df'
sql_context.register("df", sqll)

# Execute the query with the registered table 'df'
filtered_data = sql_context.execute('''
    SELECT * FROM df limit 10
''')

# Print the filtered data
print(filtered_data.collect(streaming=True))

shape: (10, 7)
┌────────────┬──────────┬──────────┬──────────┬────────┬─────────┬────────┐
│ Date       ┆ Quantity ┆ Discount ┆ Category ┆ Status ┆ Sales   ┆ Profit │
│ ---        ┆ ---      ┆ ---      ┆ ---      ┆ ---    ┆ ---     ┆ ---    │
│ str        ┆ i64      ┆ f64      ┆ str      ┆ str    ┆ f64     ┆ f64    │
╞════════════╪══════════╪══════════╪══════════╪════════╪═════════╪════════╡
│ 2024-05-09 ┆ 30       ┆ 0.33     ┆ A        ┆ Active ┆ 1930.22 ┆ 363.22 │
│ 2024-01-14 ┆ 39       ┆ 0.01     ┆ A        ┆ Active ┆ 2353.09 ┆ 424.96 │
│ 2024-10-30 ┆ 7        ┆ 0.19     ┆ A        ┆ Active ┆ 4081.34 ┆ 438.57 │
│ 2024-08-12 ┆ 13       ┆ 0.43     ┆ A        ┆ Active ┆ 4779.1  ┆ 492.22 │
│ 2024-10-12 ┆ 17       ┆ 0.32     ┆ A        ┆ Active ┆ 2606.59 ┆ 336.64 │
│ 2023-12-06 ┆ 17       ┆ 0.41     ┆ A        ┆ Active ┆ 1292.88 ┆ 122.67 │
│ 2024-08-14 ┆ 25       ┆ 0.01     ┆ A        ┆ Active ┆ 3380.64 ┆ 185.05 │
│ 2024-07-20 ┆ 8        ┆ 0.09     ┆ A        ┆ Active ┆ 3119.44 ┆ 428.99

In [246]:
import requests


quer = 'SELECT id FROM public.main'
# Fetch from database
response = requests.get(f"http://localhost:8000/get?queries={quer}")

response.json()

{'status': 'success',
 'timestamp': '2024-11-11T13:23:11.386662',
 'data': [{'id': 1}, {'id': 2}, {'id': 3}],
 'total_records': 3}

In [247]:
pqt = requests.get("http://localhost:8000/pqt")
print(pqt.json())  # This will use the default 'SELECT * FROM df'


{'status': 'success', 'timestamp': '2024-11-11T13:23:13.449232', 'data': [{'Date': '2024-05-09', 'Category': 'A', 'Status': 'Active', 'Sales': 1930.22, 'Profit': 363.22, 'Quantity': 30, 'Discount': 0.33}, {'Date': '2024-01-14', 'Category': 'A', 'Status': 'Active', 'Sales': 2353.09, 'Profit': 424.96, 'Quantity': 39, 'Discount': 0.01}, {'Date': '2024-10-30', 'Category': 'A', 'Status': 'Active', 'Sales': 4081.34, 'Profit': 438.57, 'Quantity': 7, 'Discount': 0.19}, {'Date': '2024-08-12', 'Category': 'A', 'Status': 'Active', 'Sales': 4779.1, 'Profit': 492.22, 'Quantity': 13, 'Discount': 0.43}, {'Date': '2024-10-12', 'Category': 'A', 'Status': 'Active', 'Sales': 2606.59, 'Profit': 336.64, 'Quantity': 17, 'Discount': 0.32}, {'Date': '2023-12-06', 'Category': 'A', 'Status': 'Active', 'Sales': 1292.88, 'Profit': 122.67, 'Quantity': 17, 'Discount': 0.41}, {'Date': '2024-08-14', 'Category': 'A', 'Status': 'Active', 'Sales': 3380.64, 'Profit': 185.05, 'Quantity': 25, 'Discount': 0.01}, {'Date': '2

In [248]:
pqt = requests.get("http://localhost:8000/pqt")
dats = pqt.json() 
print(dats) # This will use the default 'SELECT * FROM df'

{'status': 'success', 'timestamp': '2024-11-11T13:23:15.494436', 'data': [{'Date': '2024-05-09', 'Category': 'A', 'Status': 'Active', 'Sales': 1930.22, 'Profit': 363.22, 'Quantity': 30, 'Discount': 0.33}, {'Date': '2024-01-14', 'Category': 'A', 'Status': 'Active', 'Sales': 2353.09, 'Profit': 424.96, 'Quantity': 39, 'Discount': 0.01}, {'Date': '2024-10-30', 'Category': 'A', 'Status': 'Active', 'Sales': 4081.34, 'Profit': 438.57, 'Quantity': 7, 'Discount': 0.19}, {'Date': '2024-08-12', 'Category': 'A', 'Status': 'Active', 'Sales': 4779.1, 'Profit': 492.22, 'Quantity': 13, 'Discount': 0.43}, {'Date': '2024-10-12', 'Category': 'A', 'Status': 'Active', 'Sales': 2606.59, 'Profit': 336.64, 'Quantity': 17, 'Discount': 0.32}, {'Date': '2023-12-06', 'Category': 'A', 'Status': 'Active', 'Sales': 1292.88, 'Profit': 122.67, 'Quantity': 17, 'Discount': 0.41}, {'Date': '2024-08-14', 'Category': 'A', 'Status': 'Active', 'Sales': 3380.64, 'Profit': 185.05, 'Quantity': 25, 'Discount': 0.01}, {'Date': '2

In [249]:
pl.DataFrame(dats['data'])

Date,Category,Status,Sales,Profit,Quantity,Discount
str,str,str,f64,f64,i64,f64
"""2024-05-09""","""A""","""Active""",1930.22,363.22,30,0.33
"""2024-01-14""","""A""","""Active""",2353.09,424.96,39,0.01
"""2024-10-30""","""A""","""Active""",4081.34,438.57,7,0.19
"""2024-08-12""","""A""","""Active""",4779.1,492.22,13,0.43
"""2024-10-12""","""A""","""Active""",2606.59,336.64,17,0.32
…,…,…,…,…,…,…
"""2024-07-15""","""C""","""Pending""",4112.51,467.37,20,0.46
"""2024-09-28""","""C""","""Pending""",3147.07,486.16,41,0.31
"""2023-12-26""","""C""","""Pending""",4187.74,224.68,2,0.08
"""2024-05-24""","""C""","""Pending""",2998.61,328.43,15,0.22


In [250]:
quer1 = "select * from df"
pqt = requests.get("http://localhost:8000/pqt", params={"queries": quer1})

print("Response text:", pqt.text)  # Check the response content
print("Response JSON:", pqt.json()) if pqt.headers.get("Content-Type") == "application/json" else print("Not a JSON response")


Response text: {"status":"success","timestamp":"2024-11-11T13:23:17.594326","data":[{"Date":"2024-05-09","Category":"A","Status":"Active","Sales":1930.22,"Profit":363.22,"Quantity":30,"Discount":0.33},{"Date":"2024-01-14","Category":"A","Status":"Active","Sales":2353.09,"Profit":424.96,"Quantity":39,"Discount":0.01},{"Date":"2024-10-30","Category":"A","Status":"Active","Sales":4081.34,"Profit":438.57,"Quantity":7,"Discount":0.19},{"Date":"2024-08-12","Category":"A","Status":"Active","Sales":4779.1,"Profit":492.22,"Quantity":13,"Discount":0.43},{"Date":"2024-10-12","Category":"A","Status":"Active","Sales":2606.59,"Profit":336.64,"Quantity":17,"Discount":0.32},{"Date":"2023-12-06","Category":"A","Status":"Active","Sales":1292.88,"Profit":122.67,"Quantity":17,"Discount":0.41},{"Date":"2024-08-14","Category":"A","Status":"Active","Sales":3380.64,"Profit":185.05,"Quantity":25,"Discount":0.01},{"Date":"2024-07-20","Category":"A","Status":"Active","Sales":3119.44,"Profit":428.99,"Quantity":8,